zoukankan      html  css  js  c++  java
  • 输入关键字从百度贴吧下载相应的图片

    import requests
    from lxml import etree
    
    class TieBa(object):
        def __init__(self,query_string):
            self.query_string = query_string
            self.base_url = 'https://tieba.baidu.com/f'
            self.headers = {"User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) "
                                          "AppleWebKit/537.36 (KHTML, like Gecko) "
                                         }
    
        def params(self):
            para = {"kw": self.query_string}
            return para
    
        def send_request(self,url, parms={}):
            response = requests.get(url, params= parms, headers = self.headers)
            return response.content
    
        # 2. 数据类型转换,提取数据
        def parse_data(self, data, rule):
            html_data = etree.HTML(data)
            data_list = html_data.xpath(rule)
            return data_list
    
        # 3. 保存数据
        def save_data(self, data, name):
            print(name)
            image_path = "D:/img/" + name
            with open(image_path, 'wb') as f:
                f.write(data)
    
        # 主要运行的方法
        def run(self):
            tieba_params = self.params()
            datas = self.send_request(self.base_url,tieba_params)
    
            #xpath解析
            detail_rule = '//div[@class="t_con cleafix"]/div/div/div/a/@href'
            url_list = self.parse_data(datas, detail_rule)
    
            for label in url_list:
                image_url = 'https://tieba.baidu.com' + label
                detail_data = self.send_request(image_url)
    
                # 解析图片
                detail_url = '//img[@class="BDE_Image"]/@src'
                image_url_list = self.parse_data(detail_data, detail_url)
    
                for image_url_1 in image_url_list:
                    image_data = self.send_request(image_url_1)
                    image_name = image_url_1[-12:]
                    #保存图片
                    self.save_data(image_data, image_name)
    
    if __name__ =='__main__':
        a = input('请先在D盘创建一个名为img的文件夹来接收图片
    '
                  '接下来请输入你要查询的关键字: ')
        tieba = TieBa(a)
        tieba.run()
    

      

  • 相关阅读:
    表变量与临时表
    表单提交原理
    C_Free引用链接库
    禁用ViewState(转)
    jquery设置元素的readonly和disabled
    WebService经验分享
    如何给GridView添加滚动条 收藏
    bcp+Sql语句抽取数据导出
    List<T>的排序
    AppDomain与动态加载程序集
  • 原文地址:https://www.cnblogs.com/wshr210/p/11302299.html
Copyright © 2011-2022 走看看