zoukankan      html  css  js  c++  java
  • 第一个爬虫和测试

    (1)模拟乒乓球赛函数测试结果:

    单打:

    from random import random
    def printIntro():
        print("这个程序模拟两个选手A和B的某种比赛")
        print("程序运行需要A和B的能力值(0到1之间)")
    def getInputs():
        a=eval(input("请输入选手A的能力值(0-1):"))
        b=eval(input("请输入选手B的能力值(0-1):"))
        return a,b
    def simNGames(probA,probB):
        winsA,winsB=0,0
        for i in range(7):
            scoreA,scoreB=simOneGame(probA,probB)
            if scoreA>scoreB:
                winsA+=1
                if winsA==5:
                    break
            else:
                winsB+=1
                if winsB==5:
                    break
        return winsA,winsB
    def gameOver(a,b):
        return a==11 or b==11
    def simOneGame(probA,probB):
        scoreA,scoreB = 0,0
        serving = 'A'
        while not gameOver(scoreA,scoreB):
            if serving == 'A':
                if random()<probA:
                    scoreA+=1
                else:
                    serving='B'
            else:
                if random()<probB:
                    scoreB+=1
                else:
                    serving='A'
        return scoreA,scoreB
    def printSummary(winsA,winsB):
        n=winsA+winsB
        print("竞技分析开始,共模拟{}场比赛".format(n))
        print("选手A获胜{}场比赛,占比{:.1%}".format(winsA,winsA/n))
        print("选手B获胜{}场比赛,占比{:.1%}".format(winsB,winsB/n))
    try:
        printIntro()
        probA,probB = 0.5,0.5
        winsA,winsB = simNGames(probA,probB)
        printSummary(winsA,winsB)
        print("043林树锋的结果截图")
    except:
        print('Error')

    测试结果:

    双打:

    from random import random
    def printIntro():
        print("这个程序模拟两个选手A和B的某种比赛")
        print("程序运行需要A和B的能力值(0到1之间)")
    def getInputs():
        a=eval(input("请输入选手A的能力值(0-1):"))
        b=eval(input("请输入选手B的能力值(0-1):"))
        return a,b
    def simNGames(probA,probB):
        winsA,winsB=0,0
        for i in range(5):
            scoreA,scoreB=simOneGame(probA,probB)
            if scoreA>scoreB:
                winsA+=1
                if winsA==3:
                    break
            else:
                winsB+=1
                if winsB==3:
                    break
        return winsA,winsB
    def gameOver(a,b):
        return a==11 or b==11
    def simOneGame(probA,probB):
        scoreA,scoreB = 0,0
        serving = 'A'
        while not gameOver(scoreA,scoreB):
            if serving == 'A':
                if random()<probA:
                    scoreA+=1
                else:
                    serving='B'
            else:
                if random()<probB:
                    scoreB+=1
                else:
                    serving='A'
        return scoreA,scoreB
    def printSummary(winsA,winsB):
        n=winsA+winsB
        print("竞技分析开始,共模拟{}场比赛".format(n))
        print("选手A获胜{}场比赛,占比{:.1%}".format(winsA,winsA/n))
        print("选手B获胜{}场比赛,占比{:.1%}".format(winsB,winsB/n))
    try:
        printIntro()
        probA,probB = 0.5,0.5
        winsA,winsB = simNGames(probA,probB)
        printSummary(winsA,winsB)
        print("043林树锋的结果截图")
    except:
        print('Error')

    测试结果:

    (2)用 requests 库的 get()函数访问搜狗20次,打印返回状态,text()内容,计算 text()属性和 content()属性所返回的网页内容长度。

    import requests
    for i in range(0,20):
        r=requests.get("https://www.sogou.com/")
    print(r.status_code)
    print(r.text)
    print(type(r.text))
    print(len(r.content))

     (3)完成简单html页面计算:

    from bs4 import BeautifulSoup
    import re
    html="""
    <!DOCTYPE html>
    <head>
    <meta charset="utf-8">
    <title>菜鸟教程(runoob.com)</title>
    </head>
    <body>
        <h1>我的第一个标题</h1>
        <p id="first">我的第一个段落</p >
    </body>
            <table border="1">
        <tr>
            <td>row 1, cell 1</td>
            <td>row 1, cell 2</td>
        </tr>
        <tr>
            <td>row 2, cell 1</td>
            <td>row 2, cell 2</td>
        </tr>
    </table>
    </html>
    """
    abc= BeautifulSoup(html)
    print(str(abc.head.string)+'
    '+'43')#打印头标签内容加上学号
    print(abc.body.string)#打印body标签的内容
    print(abc.find_all(id="first"))
    r=abc.text
    zhongwen = re.findall(u'[u1100-uFFFDh]+?',r)
    print(zhongwen)

    (4)爬取中国大学排名网站内容并转换为csv文件:

    import csv
    import os
    import requests
    import pandas
    from bs4 import BeautifulSoup allUniv = [] def getHTMLText(url): try: r = requests.get(url, timeout=30) r.raise_for_status() r.encoding = 'utf-8' return r.text except: return "" def fillUnivList(soup): data = soup.find_all('tr') for tr in data: ltd = tr.find_all('td') if len(ltd)==0: continue singleUniv = [] for td in ltd: singleUniv.append(td.string) allUniv.append(singleUniv) def writercsv(save_road,num,title): #转化为csv文件 if os.path.isfile(save_road): with open(save_road,'a',newline='')as f: csv_write=csv.writer(f,dialect='excel') for i in range(num): u=allUniv[i] csv_write.writerow(u) else: with open(save_road,'w',newline='')as f: csv_write=csv.writer(f,dialect='excel') csv_write.writerow(title) for i in range(num): u=allUniv[i] csv_write.writerow(u) title=["排名","学校名称","省市","总分","生源质量","培养结果","科研规模","科研质量","顶尖成果","顶尖人才","科技服务","产学研究合作","成果转化"] save_road="C:\Users\Benny\Desktop\Python\Python练习sqlit_test02.csv" def main(): url = 'http://www.zuihaodaxue.cn/zuihaodaxuepaiming2016.html' html = getHTMLText(url) soup = BeautifulSoup(html, "html.parser") fillUnivList(soup) writercsv(save_road,100,title) main()
  • 相关阅读:
    记录心得-IntelliJ iDea 创建一个maven管理的的javaweb项目
    记录心得-FastJson分层解析demo示例
    11.05Mybatis注解
    11.03Mybatis标签
    11.04Mybatis resultMap元素
    11.02Mybatis Mapper映射器
    11.02Mybatis SQL执行方式
    10.30Mybatis配置文件及其元素
    10.30Mybatis三要素
    10.29第一个Mybatis程序
  • 原文地址:https://www.cnblogs.com/2640335699qqcom/p/12885262.html
Copyright © 2011-2022 走看看