zoukankan      html  css  js  c++  java
  • python爬虫下载壁纸图片

    有疑问可以回复我,共同交流学习。

    #!/usr/bin/env python
    # -*- coding:utf-8 -*- 
    #Author: ss
    
    from bs4 import BeautifulSoup
    import requests
    import time
    import random
    import os
    
    my_headers = [
        "Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36",
        "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.153 Safari/537.36",
        "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:30.0) Gecko/20100101 Firefox/30.0"
        "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_2) AppleWebKit/537.75.14 (KHTML, like Gecko) Version/7.0.3 Safari/537.75.14",
        "Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.2; Win64; x64; Trident/6.0)"
    
        ]
    
    
    headers = {
        'User-Agent':random.choice(my_headers)
    }
    
    
    def Downloads(url,abc,title1):
        data = requests.get(url,headers=headers)
        time.sleep(1)
        with open('.\壁纸\' + title1 + '\' + str(abc) + '.jpg','wb+') as f:
            f.write(data.content)
    
    
    def get_image(url,abc,title1):
        #url = 'http://desk.zol.com.cn/showpic/1366x768_89338_102.html'
        data = requests.get(url, headers=headers)
        soup = BeautifulSoup(data.text, 'lxml')
        url = soup.select('img')[0].get('src')
        print('正在下载{}第{}张'.format(title1, abc))
        Downloads(url,abc,title1)
    
    
    def get_image_url(url,abc):
        #url = 'http://desk.zol.com.cn/bizhi/7254_89744_2.html'
        data = requests.get(url,headers=headers)
        soup = BeautifulSoup(data.text,'lxml')
        time.sleep(2)
        url = 'http://desk.zol.com.cn' + soup.select('dd#tagfbl > a#1366x768')[0].get('href')
        title1 = soup.select('a#titleName')[0].text
        if not os.path.exists('.\壁纸\' + title1):
            os.mkdir('.\壁纸\' + title1)
        get_image(url,abc,title1)
    
    
    def get_one_urls(url):
        #url = 'http://desk.zol.com.cn/bizhi/7211_89338_2.html'
        data = requests.get(url,headers=headers)
        soup = BeautifulSoup(data.text,'lxml')
        urlss = soup.select('div.photo-list-box > ul.clearfix')
        title1 = soup.select('a#titleName')[0].text
        print('正在下载{}'.format(title1))
        abc = 0
        for urls in urlss:
            urls = urls.select('li > a')
            for url in urls:
                try:
                    url = 'http://desk.zol.com.cn' + url.get('href')
                    time.sleep(1)
                    abc += 1
                    get_image_url(url,abc)
                except:
                    continue
    
    
    
    def get_urls(url):
        #url = 'http://desk.zol.com.cn/fengjing/1366x768/'
        data = requests.get(url,headers=headers)
        soup = BeautifulSoup(data.text.encode('ISO-8859-1').decode('GB18030'),'lxml')
        urls = soup.select('li.photo-list-padding > a')
    #titles = soup.select('li.photo-list-padding > a.pic > span > em')
    #urls = soup.select('body > div.wrapper.top-main.clearfix > div.main > ul > li > a')[0].get('href')
        for url in urls:
            try:
                time.sleep(1)
                url = 'http://desk.zol.com.cn' + url.get('href')
                get_one_urls(url)
            except:
                continue
    
    
    def urls():
        for i in range(10):
            try:
                url = 'http://desk.zol.com.cn/fengjing/1366x768/' + str(i) + '.html'
                time.sleep(1)
                get_urls(url)
            except:
                continue
    
    
    urls()
  • 相关阅读:
    kafka producer batch expired TimeoutException: KAFKA-5621、KIP-91(Provide Intuitive User Timeouts in The Producer)、KAFKA-5886
    Kafka Producer NetworkException and Timeout Exceptions
    How LinkedIn customizes Apache Kafka for 7 trillion messages per day
    elasticsearch es java api Using Bulk Processor
    ranger kafka
    kafka clients大全:支持语言集合(java/go/python/c++……)
    Apache NiFi之Kafka流数据到HBase
    Apache NiFi之MySQL数据同步到HBase
    Apache NiFi之MySQL数据同步到本地文件系统
    如何在 Flink 1.9 中使用 Hive?
  • 原文地址:https://www.cnblogs.com/ssxsy/p/9055818.html
Copyright © 2011-2022 走看看