zoukankan      html  css  js  c++  java
  • requests爬取百度贴吧:python 美女 3

    import requests
    import sys
    
    
    class Tieba(object):
        def __init__(self, tieba_name, pn):
            self.tieba_name = tieba_name
            self.base_url = 'https://tieba.baidu.com/f?kw=%spn=' % (tieba_name)
            # print(self.base_url)
    
            self.url_list = []
            for i in range(pn):
                url = self.base_url + str(i * 50)
                self.url_list.append(url)
            # print(self.url_list)
    
            self.headers = {
                'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.89 Safari/537.36'
            }
    
            pass
    
        # 获取url对应的响应
        def get_page(self, url):
            response = requests.get(url, headers=self.headers)
            return response.content
    
        # 保存数据
        def save_content(self, content, index):
            filename = self.tieba_name + '_' + str(index) + '.html'
            with open(filename, 'wb')as f:
                f.write(content)
    
        def run(self):
            # 构建基础url
            # 生成url列表
            # 构建请求头
            # 遍历url列表
            for url in self.url_list:
                # 发送请求
                content = self.get_page(url)
    
                # 保存响应
                index = self.url_list.index(url)
                self.save_content(content, index)
            pass
    
    
    if __name__ == '__main__':
        name = sys.argv[1]
        pn = sys.argv[2]
        tieba = Tieba(name, int(pn))
        tieba.run()
    

      

    使用方法

    python  代码所在文件名   美女  3

  • 相关阅读:
    列表、元组、字典练习
    周总结04
    站立会议07
    人月神话阅读笔记01
    站立会议06
    站立会议05
    站立会议04
    典型用户需求分析第一期
    站立会议03
    站立会议02
  • 原文地址:https://www.cnblogs.com/andy9468/p/8025420.html
Copyright © 2011-2022 走看看