运行结果:
![](https://img2018.cnblogs.com/i-beta/1785610/201912/1785610-20191206202406639-2092244326.png)
代码:
1 import requests
2 from bs4 import BeautifulSoup
3 from selenium import webdriver
4 import os
5
6 class NovelSpider:
7 def __init__(self):
8 self.start_url = 'https://www.biqukan.com/1_1680/'
9
10 def get_novel(self):
11 response = requests.get(self.start_url)
12 soup = BeautifulSoup(response.text, 'html.parser')
13 div_chapter = soup.find(class_="listmain")
14 chapter_list = div_chapter.find_all('a')
15 chapter_list = chapter_list[12:]
16 chapter = []
17 chapter_num = len(chapter_list)
18 count = 0
19 print('《凡人修仙传仙界篇》开始下载:')
20 for cl in chapter_list:
21 chapter_dict = {}
22 chapter_name = cl.get_text()
23 chapter_dict['name'] = chapter_name
24 chapter_url = cl.get('href')
25 chapter_dict['value'] = 'https://www.biqukan.com' + chapter_url
26 if chapter_dict not in chapter:
27 chapter.append(chapter_dict)
28 print(f"已下载:{count}/{chapter_num}")
29 self.download_novel(chapter_dict)
30 count += 1
31
32 def parse_novel(self, url):
33 browser = webdriver.PhantomJS(executable_path=r'F:Spider
ovelSpiderphantomjs.exe')
34 browser.get(url)
35 soup = BeautifulSoup(browser.page_source, 'html.parser')
36 find_txt = soup.find(class_='showtxt')
37 # print(type(find_txt.get_text()))
38 return find_txt.get_text()
39
40 def download_novel(self, data):
41 filename = data['name']
42 url = data['value']
43 txt = self.parse_novel(url)
44
45 path = r"F:Spider
ovelSpider"
46 isExists = os.path.exists(path)
47 if not isExists:
48 os.mkdir(path)
49 else:
50 pass
51
52 with open(path + f'凡人修仙传仙界篇.txt', 'a', encoding='utf-8') as f:
53 f.write(f'{filename}
')
54 f.write(txt)
55 f.write('
======
')
56 f.close()
57
58 if __name__ == '__main__':
59 ns = NovelSpider()
60 ns.get_novel()