zoukankan      html  css  js  c++  java
  • python 采集斗图啦(多线程)

    import concurrent
    import requests;
    from concurrent.futures import ThreadPoolExecutor
    import os;
    import parsel;
    
    def send_request(url):
        header = {
            "user-agent":'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36'
        }
        requests.packages.urllib3.disable_warnings()
        response = requests.get(url,headers=header)
        return response
    def pare_data(data):
         selector = parsel.Selector(data)
         result_list = selector.xpath('//a[@class="col-xs-6 col-sm-3"]')
         for result in result_list:
            title = result.xpath('./img/@alt').get()
            src_url = result.xpath('./img/@data-original'). get()
    
            all_title = title+'.'+src_url.split('.')[-1]
            yield all_title,src_url
    
    def save_data(file_name,data):
        dir_name = 'doutu_list'
        if not os.path.exists(dir_name):
            os.mkdir(dir_name)
        with open(dir_name+'/'+file_name,'wb') as f:
            f.write(data)
            print("保存完成:",file_name)
    
    
    def main(page):
        for page in range(1,page+1):
            print('########################当前为第{}页########################'.format(page))
            thread_pool = concurrent.futures.ThreadPoolExecutor(max_workers=1)
            res = send_request('https://www.doutula.com/photo/list/?page={}'.format(str(page)))
            src_url = pare_data(res.text)
            for file,url in src_url:
                print(file)
                print(url)
                image_response = send_request(url)
                thread_pool.submit(save_data,file,image_response.content)
            thread_pool.shutdown()
    
    if __name__=='__main__':
        main(1)
    

    高颜值后台管理系统免费使用 ### 子枫后台管理系统 ###,可在宝塔面板直接安装

    欢迎关注我的公众号:子枫的奇妙世界,获得独家整理的学习资源和日常干货推送。
    如果您对我的其他专题内容感兴趣,直达我的个人博客:www.wangmingchang.com

  • 相关阅读:
    熔断降级(Polly)
    网站被黑
    Redis 缓存穿透
    搭建私有Nuget仓库
    什么是配置
    css3的@media媒体查询
    css设置background图片的位置实现居中
    UTF-8有签名和无签名的区别
    SQL Server表结构和数据导入到MySQL
    Jquery实现滚动到底部加载更多(最原始)
  • 原文地址:https://www.cnblogs.com/wmc1125/p/13399375.html
Copyright © 2011-2022 走看看