zoukankan      html  css  js  c++  java
  • 项目练习 —— 贴吧爬虫

     1 # -*- coding:utf-8 -*-
     2 # Author:Sure Feng
     3 
     4 import requests
     5 
     6 class TiebaSpider(object):
     7     def __init__(self, tieba_name):
     8         self.tieba_name = tieba_name
     9         self.url_temp = "https://tieba.baidu.com/f?kw=" + tieba_name + "&ie=utf-8&pn={}"
    10         self.headers = {
    11             "User-Agent": "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36"}
    12 
    13 
    14     def get_url_list(self):
    15         """构造URL列表"""
    16         # url_list = []
    17         # for i in range(10):
    18         #     url_list.append(self.url_temp.format(i * 50))
    19         # return url_list
    20         return [self.url_temp.format(i * 50) for i in range(10)]
    21 
    22     def parse_url(self, url):
    23         """遍历列表,发送请求,获取响应内容"""
    24         # print(url)
    25         respond = requests.get(url, headers = self.headers)
    26         return respond.content.decode()
    27 
    28 
    29     def save_html(self, html_str, page_number):
    30         """保存数据"""
    31         file_name = "{} 第{}页" .format(self.tieba_name, page_number)
    32         with open(file_name, "w", encoding="utf-8") as f:
    33             f.write(html_str)
    34 
    35     def run(self):
    36 
    37         # 构造URL列表
    38         url_list = self.get_url_list()
    39         print(url_list)
    40         # 遍历列表,发送请求,获取响应内容
    41         for url in url_list:
    42             html_str = self.parse_url(url)
    43             # 保存数据
    44             page_number = url_list.index(url) + 1
    45             self.save_html(html_str, page_number)
    46 
    47 
    48 if __name__ == "__main__":
    49     tieba_sipder = TiebaSpider("棋魂")
    50     tieba_sipder.run()
  • 相关阅读:
    ege demo
    Easy Graphics Engine vs2015使用
    c++ demo
    leetcode 13 -> Roman to Integer
    leetcode 12 -> Integer to Roman
    12. Integer to Roman
    leetcode 9 -> Palindrome Number
    8. String to Integer (atoi)
    获取字符串中长度最长的回文字符串
    leetcode 5-> Longest Palindromic Substring
  • 原文地址:https://www.cnblogs.com/sure-feng/p/10022836.html
Copyright © 2011-2022 走看看