zoukankan      html  css  js  c++  java
  • 爬虫

    一、完善球赛程序,测试球赛你程序、所有函数的测试结果:

    from random import random
    def printIntro():
        print("这个程序模拟两个选手A和B的排球竞技比赛")
        print("程序运行需要A和B的能力值(以0到1之间的小数表示)")
    def getInputs():
        a = eval(input("请输入选手A的能力值(0-1): "))
        b = eval(input("请输入选手B的能力值(0-1): "))
        n = eval(input("模拟比赛的场次: "))
        return a, b, n
    def simNGames(n, probA, probB):
        winsA, winsB = 0, 0
        for i in range(n):
            scoreA, scoreB = simOneGame(probA, probB)
            if scoreA > scoreB:
                winsA += 1
            else:
                winsB += 1
        return winsA, winsB
    def gameOver(a,b):
        return a==15 or b==15
    def simOneGame(probA, probB):
        scoreA, scoreB = 0, 0
        serving = "A"
        while not gameOver(scoreA, scoreB):
            if serving == "A":
                if random() < probA:
                    scoreA += 1
                else:
                    serving="B"
            else:
                if random() < probB:
                    scoreB += 1
                else:
                    serving="A"
        return scoreA, scoreB
    def printSummary(winsA, winsB):
        n = winsA + winsB
        print("竞技分析开始,共模拟{}场比赛".format(n))
        print("选手A获胜{}场比赛,占比{:0.1%}".format(winsA, winsA/n))
        print("选手B获胜{}场比赛,占比{:0.1%}".format(winsB, winsB/n))
        print("2019310143016  卢佳纯")
    def main():
        printIntro()
        probA, probB, n = getInputs()
        winsA, winsB = simNGames(n, probA, probB)
        printSummary(winsA, winsB)
    print("排球竞技比赛赛制规定:")
    print("1.前4局比赛采用25分制,每个队只有赢得至少25分,并同时超过对方2分时,才胜1局;")
    print("2.正式比赛采用5局3胜制,决胜局的比赛采用15分制,一队先得8分后,两队交换场区,按原位置顺序继续比赛到结束;")
    print("3.在决胜局(第五局)之比赛,先获15分并领先对方2分为胜;")
    main()
    

    测试函数:

    首先对GameOver(a,b)函数进行测试

    再对simOneGame(proA,proB)函数进行测试

    后对simNGames(n,proA,proB)函数进行测试

    def GameOver(N,scoreA,scoreB):
        if N<=4:
              return(scoreA>=25 and scoreB>=25 and abs(scoreA-scoreB)>=2)
        else:
              return(scoreA>=15 and abs(scoreA-scoreB)>=2) or (scoreB>=15 and abs(scoreA-scoreB)>=2)
    ai=[]
    bi=[]
    try:
        for scoreA,scoreB in ((1,25),(1,26),(25,25),(16,17),(28,30)):
            if GameOver(scoreA,scoreB):
                ai.append(scoreA)
                bi.append(scoreB)
    except:
        print('Error')
    
    print(ai)
    print(bi)
    
    #对simOneGame(proA,proB)函数进行测试
    from random import random
    try:
        probA,probB=0.5,0.5
        scoreA,scoreB=0,0
        serving = "A"
        if serving == "A":
            if random() < probA:
                scoreA += 1
            else:
                serving="B"
        else:
            if random() < probB:
                scoreB += 1
            else:
                serving="A"
        print(scoreA) 
        print(scoreB)
    except:
        print('Error')
    
    #对simNGames(n, probA, probB)
    try:
        n,scoreA,scoreB=1,1,21
        winsA, winsB = 0, 0
        scoreA_ls=[]
        scoreB_ls=[]
        for i in range(n):
            scoreA_ls.append(scoreA)
            scoreB_ls.append(scoreB)
            if scoreA > scoreB:
                winsA += 1
            else:
                winsB += 1
        print(winsA, winsB)
        print(scoreA_ls,scoreB_ls)
    except:
        print('Error')
    

    二、用requests库的get()函数访问百度网站,打印返回状态,text()内容,计算text()和content属性所返回的页面内容的长度:

    import requests
    def getHTMLText(url):
        try: 
            for i in range(0,20):                   
                r = requests.get(url, timeout=30)
            r.raise_for_status() 
            r.encoding = 'utf-8' 
            return r.status_code,r.text,r.content,len(r.text),len(r.content) 
        except:
            return ""
    url = 'http://www.baidu.com.cn/'
    print(getHTMLText(url))
    

    结果为:

    三、制作一个简单的html页面:a、获取body标签的内容;b、获取id为first的标签对象;c、获取并打印html页面中的中文字符

    <!DOCTYPE html>
    <html>
    <head> 
    <meta charset="utf-8"> 
    <title>菜鸟教程(runoob.com)</title> 
    </head>
    <body>
    	
    	<b>第一个html页面————来自学号2019310143016</b><br><br>
    
        <p id="first">制作者的基本信息:</p>
    </body>
    	<table border="1">
    
    	<tr>
    		<td>班级</td>
            <td>姓名</td>
            <td>年级</td>
        </tr>
        <tr>
    		<td>信计1班</td>
            <td>纯牛奶</td>
            <td>19级</td>
        </tr>
    </table>
    </html>
    

    四、利用爬虫获取2017中国大学排名:

    import requests
    from bs4 import BeautifulSoup 
    import bs4
    import pandas as pd
    
    info = []#用来存放爬取信息
    url ="http://www.zuihaodaxue.com/Greater_China_ranking2017_0.html"
    try:
        r=requests.get(url,timeout=100)
        r.raise_for_status()
        r.encoding=r.apparent_encoding
        soup  = BeautifulSoup(r.text,"html.parser")
        for tr in soup.find("tbody").children:
            if isinstance(tr,bs4.element.Tag):
                tds=tr.find_all("td") 
                info.append([tds[0].string,tds[1].string,tds[3].string])
        print("{0:^10}	{1:{3}^10}	{2:^10}".format("排名","学校名称","总分",chr(12288)))
        for i in range(50):
            print("{0:^10}	{1:{3}^10}	{2:^10}".format(info[i][0],info[i][1],info[i][2],chr(12288)))
        name = ["排名","学校名称","总分"]
        test = pd.DataFrame(columns=name,data=info)
        test.to_csv(r"C:code_python	est1.csv")
        print("保存成功")
    except Exception as e : 
        print(e)
    

  • 相关阅读:
    Python装饰器理解(新手)
    vue项目随笔
    ajax 请求数据传到后台为空字符
    关于document.body.scrollTop 的谷歌,火狐浏览器兼容问题
    Nginx 反向代理解决浏览器跨域问题
    SpringBoot maven build a new demo
    UI收集
    git
    编译
    网络2
  • 原文地址:https://www.cnblogs.com/cnn-ljc/p/12905635.html
Copyright © 2011-2022 走看看