基本操作,不再详述
直接贴源码(根据当前时间创建文件):
import requests from bs4 import BeautifulSoup import time def input_to_file(number,time,str1): with open('D:\python\python_code\'+time+'.txt',"a+",encoding="utf-8") as f: f.write(str(number)+" "+str1+" ") f.close() def get_topic(): headers = { 'user-agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.81 Safari/537.36', 'Host': 's.weibo.com' } topic_list = [] #for i in range(0,10): link = 'https://s.weibo.com/top/summary' request = requests.get(link,headers = headers,timeout = 10) #print(str(i+1),"页响应状态码:",request.status_code) #print(request.text) soup = BeautifulSoup(request.text,"lxml") td_list = soup.find_all('td',class_ = 'td-02') for each in td_list: print("开始获取链接") a_object = each.a print("开始获取热点内容") topic = a_object.text.strip() topic_list.append(topic+" "+"https://s.weibo.com"+a_object.get('href')) return topic_list print("开始获取网址内容") topics = get_topic() i = 0 print("开始写入文件") time_now = time.strftime('%Y_%m_%d_%H_%M_%S',time.localtime(time.time())) print("the time now is:",time_now) for each in topics: i+=1 input_to_file(i,str(time_now),each) print("所有的热点信息以及链接已经写入文件")
希望对大家有所帮助
以上