zoukankan html css js c++ java

python爬取小说

运行结果：

代码：

 1 import requests
 2 from bs4 import BeautifulSoup
 3 from selenium import webdriver
 4 import os
 5  
 6 class NovelSpider:
 7     def __init__(self):
 8         self.start_url = 'https://www.biqukan.com/1_1680/'
 9  
10     def get_novel(self):
11         response = requests.get(self.start_url)
12         soup = BeautifulSoup(response.text, 'html.parser')
13         div_chapter = soup.find(class_="listmain")
14         chapter_list = div_chapter.find_all('a')
15         chapter_list = chapter_list[12:]
16         chapter = []
17         chapter_num = len(chapter_list)
18         count = 0
19         print('《凡人修仙传仙界篇》开始下载:')
20         for cl in chapter_list:
21             chapter_dict = {}
22             chapter_name = cl.get_text()
23             chapter_dict['name'] = chapter_name
24             chapter_url = cl.get('href')
25             chapter_dict['value'] = 'https://www.biqukan.com' + chapter_url
26             if chapter_dict not in chapter:
27                 chapter.append(chapter_dict)
28             print(f"已下载:{count}/{chapter_num}")
29             self.download_novel(chapter_dict)
30             count += 1
31  
32     def parse_novel(self, url):
33         browser = webdriver.PhantomJS(executable_path=r'F:Spider
ovelSpiderphantomjs.exe')
34         browser.get(url)
35         soup = BeautifulSoup(browser.page_source, 'html.parser')
36         find_txt = soup.find(class_='showtxt')
37         # print(type(find_txt.get_text()))
38         return find_txt.get_text()
39  
40     def download_novel(self, data): 
41         filename = data['name']
42         url = data['value']
43         txt = self.parse_novel(url)
44  
45         path = r"F:Spider
ovelSpider"
46         isExists = os.path.exists(path)
47         if not isExists:
48             os.mkdir(path)
49         else:
50             pass
51  
52         with open(path + f'凡人修仙传仙界篇.txt', 'a', encoding='utf-8') as f:
53             f.write(f'{filename}

')
54             f.write(txt)
55             f.write('
======

')
56             f.close()
57  
58 if __name__ == '__main__':
59     ns = NovelSpider()
60     ns.get_novel()

查看全文

相关阅读:
axis2的wsdl无法使用eclipse axis1插件来生成client--解决方法
 引用的存在价值
 阿里亲心小号实測
 UVA 1328
XMPP 协议工作流程具体解释
 10g异机恢复后EM无法启动故障处理一例
 JVM 内存
 abstract class和interface有什么区别?
ArrayList 如何增加大小
 IndexOutOfBoundsException ArrayList 访问越界

原文地址：https://www.cnblogs.com/huanghuangwei/p/11997460.html