球赛测试:
# -*- coding: utf-8 -*- """ Created on Thu May 23 00:41:48 2019 @author: h2446 """ from random import random def printIntro(): print("这个程序模拟两支排球队A和B的排球比赛") print("程序运行需要A和B的能力值(以0到1之间的小数表示)") def getInputs(): a=eval(input("请输入队伍A的能力值(0~1):")) b=eval(input("请输入队伍B的能力值(0~1):")) n=eval(input("模拟比赛的场次:")) return a,b,n def simNGames(n,probA,probB): winsA,winsB=0,0 for i in range(n): scoreA,scoreB=simOneGame(probA,probB) if scoreA>scoreB: winsA +=1 else: winsB +=1 return winsA,winsB def gameOver(a,b): if (a>25 and abs(a-b)>=2 )or(b>25 and abs(a-b)>=2): return True else: return False def simOneGame(probA,probB): scoreA,scoreB=0,0 serving = "A" while not gameOver(scoreA,scoreB): if serving =="A": if random()<probA: scoreA +=1 else: serving="B" else: if random()<probB: scoreB +=1 else: serving="A" return scoreA,scoreB def final(probA,probB): winsA,winsB=simNGames1(4,probA,probB) printSummary(winsA,winsB) if not winsA==3 or winsB==3: if winsA==winsB==2: winsA1,winsB1=simOneGame1(probA,probB) finalprintSummary(winsA,winsB) else: finalprintSummary(winsA,winsB) def simNGames1(n,probA,probB): winsA,winsB=0,0 for i in range(n): scoreA,scoreB=simOneGame2(probA,probB) if winsA==3 or winsB==3: break if scoreA>scoreB: winsA+=1 else: winsB+=1 return winsA,winsB def simOneGame2(probA,probB): scoreA,scoreB=0,0 serving="A" while not GG(scoreA,scoreB): if serving=="A": if random() < probA: scoreA += 1 else: serving="B" else: if random() < probB: scoreB += 1 else: serving="A" return scoreA,scoreB def simOneGame1(probA,probB): scoreA,scoreB=0,0 serving="A" while not finalGameOver(scoreA,scoreB): if serving=="A": if random() < probA: scoreA += 1 else: serving="B" else: if random() < probB: scoreB += 1 else: serving="A" return scoreA,scoreB def GG(a,b): return a==3 or b==3 def finalGameOver(a,b): if (a==8 or b==8): if a>b: print("A队获得8分,双方交换场地") else: print("B队获得8分,双方交换场地") if (a>15 and abs(a-b)>=2 )or(b>15 and abs(a-b)>=2): return True else: return False def finalprintSummary(winsA,winsB): n=winsA+winsB if n>=4: print("进行最终决赛") if winsA>winsB: print("最终决赛由A获胜") else: print("最终决赛由B获胜") else: if winsA>winsB: print("最终决赛由A获胜") else: print("最终决赛由B获胜") def printSummary(winsA,winsB): n=winsA+winsB print("竞技分析开始,共模拟{}场比赛".format(n)) print("选手A获胜{}场比赛,占比{:0.1%}".format(winsA,winsA/n)) print("选手B获胜{}场比赛,占比{:0.1%}".format(winsB,winsB/n)) def main(): printIntro() probA,probB,n=getInputs() winsA,winsB=simNGames(n,probA,probB) printSummary(winsA,winsB) final(probA,probB) try: main() except: print("Error")
测试结果:
使用requests库中的get()函数访问如下一个网站20次(这里采用必应网为例子),打印返回状态,text内容,计算text()属性和content属性所返回网页内容的长度。
import requests from bs4 import BeautifulSoup def getHTMLText(r): x = r.status_code if x == 200 : print("访问成功") if x == 404: print("访问失败") return '' r = requests.get("https://cn.bing.com/",timeout = 100) for i in range(1,21): print("第{}次".format(i)) getHTMLText(r) url="https://cn.bing.com/" r=requests.get(url) print(r.status_code) r.encoding='UTF-8' print(r.text)
print("text属性长度{}".format(len(r.text)))
print("content属性长度{}".format(len(r.content)))
返回状态:
text部分内容:
text()属性和content属性所返回网页内容的长度:
html页面的简单相关计算
先将下面的代码保存到电脑上,并使用浏览器打开
<!DOCTYPE html> <html> <head> <meta charset="utf-8"> <title>菜鸟教程(runoob.com)</title> </head> <body> <h1>我的第一个标题</h1> <p id=first>我的第一个段落。</p > </body> <table border="1"> <tr> <td>班级</td> <td>2班</td> </tr> <tr> <td>我的学号</td> <td>3109</td> </tr> </table> </html>
import requests from bs4 import BeautifulSoup f = open("C:/Users/86183/Desktop/.html",'r',encoding="utf-8") r = f.read() f.close() soup = BeautifulSoup(r,"html.parser") print("(1)head标签: ",soup.head) print("(2)body标签内容: ",soup.title) print("(3)id为first的标签对象: ",soup.find(id='first')) print("(4)获取该html中的中文字符:",soup.find('h1').string, soup.find(id='first').string)
运行截图如下:
爬取中国大学网站的相关内容,并保存为CSV格式,代码如下
import requests from bs4 import BeautifulSoup def GetHTMLText(url): try : r = requests.get(url,timeout = 30) r.raise_for_status() r.encoding = "utf-8" return r.text except: return "" def FindUnivList(soup): data = soup.find_all('tr') lth = data[0].find_all('th') #通过查看网页html代码,找到第一个'tr'标签下的'th'标签就是表格标题。 thead = [] for th in lth[:4]: #前四个表格标题(第五个特殊,下面处理) thead.append(th.string) tOptions = data[0].find_all('option') #处理第五个特殊的表格标题 for tOption in tOptions: thead.append(tOption.string) allUniv.append(thead) for tr in data: #获取数据内容 ltd = tr.find_all('td') if len(ltd) == 0: continue tbodies = [] for td in ltd: if td.string is None: #对无数据内容处理 tbodies.append('') continue tbodies.append(td.string) allUniv.append(tbodies) def main(): url = "http://www.zuihaodaxue.cn/zuihaodaxuepaiming2019.html" html = GetHTMLText(url) soup = BeautifulSoup(html,"html.parser") FindUnivList(soup) #储存为csv f = open('2019中国大学排名.csv','w',encoding='utf-8') for row in allUniv: f.write(','.join(row)+' ') f.close() allUniv = [] main()
截图如下: