zoukankan      html  css  js  c++  java
  • python 爬虫获取世界杯比赛赛程

    #!/usr/bin/python
    # -*- coding:utf8 -*-
    
    import requests
    import re
    import os  
    import time 
    # from urllib
    import json
    from bs4 import BeautifulSoup
    from datetime import date
    
    def getTimeExpire(time_play,time_gap):
    	# print(time_play)
    	try:
    		time_arr=time.strptime(time_play,"%Y-%m-%d %H:%M:%S")
    	except:
    		print('时间转化失败')
    		return ''
    	else:
    		t1=time.mktime(time_arr)
    		x = time.localtime(t1+time_gap)#是秒不是毫秒
    		return time.strftime('%Y-%m-%d %H:%M:%S',x)
    def getHtml():
    	#改成从网站直接获取,但是网站需要分页
    	with open('F:\test\python\worldcup.html', 'r',encoding='utf-8') as f:
    		content = f.read()
    		soup = BeautifulSoup(content,'lxml')
    		nodes=soup.select('.b-pull-refresh-content > div')
    		arr=[]
    		#写入CSV文件的头部
    		filename = "F:\test\python\worldcup.csv"
    		f = open(filename,'a')
    		f.writelines('team1,team2,time_expire,time_play 
    ')
    		f.close()
    		for node in nodes:
    			date = node.select('.wa-match-schedule-list-title')[0].get_text().strip()
    			datas = node.select('.sfc-contacts-list .wa-match-schedule-list-item')
    			for d in datas:
    				obj={'team1':'','team2':'','time':''}
    				obj['team1']=d.select('.wa-tiyu-schedule-item-name.c-line-clamp1')[0].get_text().strip()
    				obj['team2']=d.select('.wa-tiyu-schedule-item-name.c-line-clamp1')[1].get_text().strip()
    				obj['time_play']='2018-'+date[2:8]+''+d.select('.status-text')[0].get_text().strip()+':00'
    				obj['time_expire']=getTimeExpire(obj['time_play'],-10*60)
    				filename = "F:\test\python\worldcup.csv"
    				f = open(filename,'a')
    				f.writelines(obj['team1']+','+obj['team2']+','+obj['time_expire']+','+obj['time_play']+'
    ')
    				f.close()
    #getHtml()
    
    
    def getFromAPI():
    	month=6
    	day=11
    	#从2018-06-14 到 07-15
    	for d in range(0,15): 
    		day+=2
    		if day>30:
    			month+=1
    			day=1
    		url="http://tiyu.baidu.com/api/match/%E4%B8%96%E7%95%8C%E6%9D%AF/live/date/2018-"+str(month)+'-'+str(day)+"/direction/after?from=self"
    		time.sleep(1) 
    		data = json.loads(requests.get(url,timeout=3).text)
    		if(data['status']=='0'):
    			print('为0')
    			for matches in data['data']:
    				for m in matches['list']:
    					filename = "F:\test\python\worldcupFromAPI.csv"
    					f = open(filename,'a')
    					if m['startTime']>time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()):
    						f.writelines(m['leftLogo']['name']+','+m['rightLogo']['name']+','+getTimeExpire(m['startTime'],-10*60)+','+m['startTime']+'
    ')
    					f.close()
    					
    			
    	
    getFromAPI()
    

      

  • 相关阅读:
    c++中,map的使用
    关于C++中vector<vector<int> >的使用
    python中如何向shell脚本传递带空格的参数
    数组初始化及赋值的方法,memset的使用
    leetcode:首个缺少的最小正整数(js实现。)
    安装cmake过程出错:Error when bootstrapping CMake: Cannot find a C++ compiler that supports both C++11 and the specified C++ flags.
    React系列--三大属性 props refs state
    React系列--组件Component
    React系列--jsx语法及虚拟dom,渲染
    c++的复制构造函数,(郑莉 c++语言程序设计)
  • 原文地址:https://www.cnblogs.com/cao-zhen/p/9215222.html
Copyright © 2011-2022 走看看