zoukankan      html  css  js  c++  java
  • python爬取小说

    运行结果:

    代码:

     1 import requests
     2 from bs4 import BeautifulSoup
     3 from selenium import webdriver
     4 import os
     5  
     6 class NovelSpider:
     7     def __init__(self):
     8         self.start_url = 'https://www.biqukan.com/1_1680/'
     9  
    10     def get_novel(self):
    11         response = requests.get(self.start_url)
    12         soup = BeautifulSoup(response.text, 'html.parser')
    13         div_chapter = soup.find(class_="listmain")
    14         chapter_list = div_chapter.find_all('a')
    15         chapter_list = chapter_list[12:]
    16         chapter = []
    17         chapter_num = len(chapter_list)
    18         count = 0
    19         print('《凡人修仙传仙界篇》开始下载:')
    20         for cl in chapter_list:
    21             chapter_dict = {}
    22             chapter_name = cl.get_text()
    23             chapter_dict['name'] = chapter_name
    24             chapter_url = cl.get('href')
    25             chapter_dict['value'] = 'https://www.biqukan.com' + chapter_url
    26             if chapter_dict not in chapter:
    27                 chapter.append(chapter_dict)
    28             print(f"已下载:{count}/{chapter_num}")
    29             self.download_novel(chapter_dict)
    30             count += 1
    31  
    32     def parse_novel(self, url):
    33         browser = webdriver.PhantomJS(executable_path=r'F:Spider
    ovelSpiderphantomjs.exe')
    34         browser.get(url)
    35         soup = BeautifulSoup(browser.page_source, 'html.parser')
    36         find_txt = soup.find(class_='showtxt')
    37         # print(type(find_txt.get_text()))
    38         return find_txt.get_text()
    39  
    40     def download_novel(self, data): 
    41         filename = data['name']
    42         url = data['value']
    43         txt = self.parse_novel(url)
    44  
    45         path = r"F:Spider
    ovelSpider"
    46         isExists = os.path.exists(path)
    47         if not isExists:
    48             os.mkdir(path)
    49         else:
    50             pass
    51  
    52         with open(path + f'凡人修仙传仙界篇.txt', 'a', encoding='utf-8') as f:
    53             f.write(f'{filename}
    
    ')
    54             f.write(txt)
    55             f.write('
    ======
    
    ')
    56             f.close()
    57  
    58 if __name__ == '__main__':
    59     ns = NovelSpider()
    60     ns.get_novel()
  • 相关阅读:
    范仁义css3课程---7、文本样式2
    android图片缓存框架Android-Universal-Image-Loader(二)
    Android 开源框架Universal-Image-Loader完全解析(三)---源代码解读
    憨人 音译
    Android 开源框架Universal-Image-Loader完全解析(一)--- 基本介绍及使用
    Android 开源框架Universal-Image-Loader完全解析(二)--- 图片缓存策略详解
    Android开发
    87狂热
    迟志强
    翟惠民
  • 原文地址:https://www.cnblogs.com/huanghuangwei/p/11997460.html
Copyright © 2011-2022 走看看