首先新建一个Lei.txt
内容为:
CloudComputing
ParentBlockchainTechnology
Enterprise
DotNET
Java
WebDevelop
VC
VB
Delphi
BCB
Cpp
OtherLanguage
MSSQL
PowerBuilder
Oracle
Linux
Windows
Embedded
GameDevelop
Network_communication
Other
Network_communication
OtherTechnicalForum
AI
爬虫代码如下:
1 import requests 2 from bs4 import BeautifulSoup 3 import io 4 import re 5 6 url="https://bbs.csdn.net/forums/Mobile?page=" 7 8 def Content(url): 9 try: 10 kv = {'user-agent': 'Mozilla/5.0'} 11 r = requests.get(url, headers=kv) 12 r.encoding = r.apparent_encoding 13 demo = r.text 14 soup = BeautifulSoup(demo, "html.parser") 15 text="" 16 for a in soup.find_all("a",class_="forums_title"): 17 text+=str(a.string).replace("【CSDN 20周年】8场大牛直播+周年T恤免费领", "").replace("小白学习笔记干货,记得点赞哦!", "").replace("有奖征集话题: 区块链大火,对于区块链开发零基础的我来说,要怎么入门呢?", "") 18 text+=" " 19 print(text.lstrip()) 20 write(text.lstrip()) 21 #print(soup.prettify()) 22 except: 23 print("没有数据了!") 24 25 #写入内容 26 def write(contents): 27 f=open('E://luntan.txt','a+',encoding='utf-8') 28 f.write(contents) 29 print('写入成功!') 30 f.close() 31 32 #循环写入 33 def write_all(): 34 try: 35 f=open('E://Lei.txt','r+',encoding='utf-8') 36 for line in f: 37 line=line.rstrip(" ") 38 for i in range(1,100): 39 url="https://bbs.csdn.net/forums/"+line+"?page="+str(i) 40 Content(url) 41 except: 42 print("超出页数!") 43 if __name__=="__main__": 44 write_all()