zoukankan      html  css  js  c++  java
  • 爬取动物图片源码

    爬取动物图片源码

    import requests
    from bs4 import BeautifulSoup
    import os, shutil
    from threading import Thread
    import time
    from datetime import datetime
    
    def fun_makedir():
        """
        创建文件夹
        """
        file_path = os.getcwd() + '/down/' + time.strftime('%Y%m%d%H%M%S', time.localtime())
        if os.path.exists(file_path):
            shutil.rmtree(file_path)
            os.makedirs(file_path)
        else:
            os.makedirs(file_path)
        os.chdir(file_path)
    
    
    def getmsg(url):
        """
        获取图片缩在页面的链接
        :param url:
        :return:返回一个图片列表:含有图片名称,图片所在页面的链接
        """
        pictrues = []
        response = requests.get(url)
        response.encoding = 'utf-8'
        soup = BeautifulSoup(response.text, 'html.parser')
        pictrue_list = soup.find('div', id='container').findAll('div', class_='box picblock col3')
        # print(len(pictrue_list))  #80  本来只有40。原因是findAll('div',class_='box picblock col3')这儿之前没有指定class_
        for pictrue in pictrue_list:
            pictrue = pictrue.find('a')
            pictrue_name = pictrue['alt']
            pictrue_url = pictrue['href']
            pictrues.append([pictrue_name, pictrue_url])
            # print("{:<20s}{:<60s}".format(pictrue_name,pictrue_url))
        return pictrues
    
    
    def save_pic(pic_name, pic_url):
        """
        下载图片
        :param pic_name:
        :param pic_url:
        :return:
        """
        global count
        count =count +1
        filename = str(count) + ' ' + pic_name + '.jpg'
        pic = requests.get(pic_url)
        with open(filename, 'ab') as f:
            # f.write(pic)  #这种写法是错误的,一定要加content
            f.write(pic.content)
        print("图片:{}下载成功".format(filename))
    
    def down_pictrue(pictrue_name, pictrue_url):
        """
        获取图片链接,下载图片
        :param pictrue_name:
        :param pictrue_url:
        :return:
        """
        try:
            down_res = requests.get(pictrue_url)
            down_res.encoding = 'utf-8'
            down_soup = BeautifulSoup(down_res.text, 'html.parser')
            down_link = down_soup.find('div', class_='imga').find('a')['href']
            pictrue_url = down_link
            # print(pictrue_name,pictrue_url)
    
            save_pic(pictrue_name, pictrue_url)
        except:
            print("{}未获取到链接".format(pictrue_name))
    
    # 主函数
    def main():
        start_time = datetime.now()
        pictrues = []
        global count #用于统计图片数量
        count=0
    
        url = "http://sc.chinaz.com/tupian/dongwutupian.html"
        for i in range(1, 11):
            if (i == 1):
                url = url
            else:
                url = "http://sc.chinaz.com/tupian/dongwutupian_{}.html".format(i)
            print("collecting message from {}".format(url))
    
            pictrues = getmsg(url)
            threads = []
            for item in pictrues:
                # print(item[0],item[1])
                # 创建多线程,线程执行函数为down_pictrue,传递函数所需参数args=(item[0], item[1])
                t = Thread(target=down_pictrue, args=(item[0], item[1]))
                threads.append(t)
            for t in threads:
                t.start()
            for t in threads:
                t.join()
    
        run_time = (datetime.now() - start_time).total_seconds()
        print("
    一共下载{}张图片,共用时{}秒".format(count,run_time, end='	'))
    
    # 程序入口
    if __name__ == '__main__':
        # 创建文件夹,保存图片
        fun_makedir()
        # 执行主函数
        main()
    
    
  • 相关阅读:
    CodeForces 681D Gifts by the List (树上DFS)
    UVa 12342 Tax Calculator (水题,纳税)
    CodeForces 681C Heap Operations (模拟题,优先队列)
    CodeForces 682C Alyona and the Tree (树上DFS)
    CodeForces 682B Alyona and Mex (题意水题)
    CodeForces 682A Alyona and Numbers (水题,数学)
    Virtualizing memory type
    页面跳转
    PHP Misc. 函数
    PHP 5 Math 函数
  • 原文地址:https://www.cnblogs.com/yuexiao/p/12788165.html
Copyright © 2011-2022 走看看