requests 模块
re模块和requests模块结合示范实例
.*? 不加圆括号表示在要匹配里面的内容不要;
(.*?)表示在要匹配的两者之间的内容都要;
import requests
import re
content_list=[]
response = requests.get('https://ishuo.cn/')
data=response.text
# print(data)
res=re.findall('<div class="content">(.*?)</div>',data)
res1=re.findall('</span><a href="/subject/.*?">(.*?)</a>',data)
# print(res1)
for i in res:
if i.startswith('<ul>'):
continue
content_list.append(i)
with open(r'duanzi.txt','w',encoding='utf8')as f:
for j in range(len(content_list)):
res2=str(f'
{j}>>>{res1[j]}:
{content_list[j]}
')
# print(res2)
f.write(res2)
f.flush()