import requests as r import re,encodings import time from lxml import etree def pa( url,name): headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.61 Safari/537.36' } z = r.get(url, headers = headers) z.encoding = 'UTF-8' html = etree.HTML(z.text) # 查找章节名字 zhangjie = html.xpath('//*[@id="wrapper"]/div[3]/div/div[2]/h1/text()')[0] print(zhangjie) # xpath 查找小说内容 content = html.xpath('//*[@id="content"]/text()') content = ' '.join(content) with open(name, 'a+', encoding="UTF-8") as txt: txt.write(zhangjie + " ") txt.write(content) print(zhangjie + ": 写入成功") if __name__ == '__main__': mulu_url = 'http://www.yuetutu.com/cbook_22694/' 'User-Agent: Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.61 Safari/537.36' s = r.get(mulu_url) s.encoding = 'utf-8' html = etree.HTML(s.text) text = s.text name = (re.search('<h1>(.*?)</h1>',text)).group() name = (name.replace("<h1>",'')).replace('</h1>','') name = "./%s.txt"%name mulu = html.xpath('//*[@id="list"]/dl/dd/a/@href') print(name) print(mulu) b = 1; for i in mulu: if b > 8 : pa('http://www.yuetutu.com'+i, name) b= 1+b time.sleep(1)