zoukankan      html  css  js  c++  java
  • 多进程下载 断点下载

    import glob
    import os
    import requests, time, threading
    os_sep = os.sep
    img_dir = 'D:\mylogo\'
    def spider_webimg_dl_return_local_img_path(img_dir, img_url, uid, uid_n, local_default='default.DONOT_REMOVE.png'):
        r = '%s%s' % (img_dir, local_default)
        if '.' not in img_url:
            return r
        img_url = img_url.split('?')[0]
        try:
            bytes = requests.get(img_url)._content
            if bytes != 0 and requests.get(img_url).status_code == 200:
                # r = '%s%s%s%s%s%s' % (
                #     img_dir, time.strftime('%Y%m%d%H%M%S', time.localtime(time.time())), '_',
                #     uid,
                #     uid_n,
                #     '.jpg')
                r = '%s%s%s%s' % (img_dir, uid, uid_n, '.jpg')
                with open(r, 'wb')as f:
                    f.write(bytes)
        except Exception as e:
            print(e)
            time.sleep(10)
        return r
    
    
    f = 'dbuid.hadlogo.txt'
    with open(f, 'r', encoding='utf-8') as fr:
        for i in fr:
            uid = i.replace('	', '').replace('
    ', '')
            had = False
            f_img_d = '{}{}{}'.format(img_dir, os_sep, '*.jpg')
            imgs = glob.glob(f_img_d)
            for ii in imgs:
                if uid in ii:
                    had = True
                    continue
            if not had:
                logo_url = 'http://img.a.r.com/site/34075/logo.jpg'.replace('34475', uid)
                spider_webimg_dl_return_local_img_path(img_dir, logo_url, uid, 'logo')
                time.sleep(0.5)
                pass
            else:
                print(uid)
  • 相关阅读:
    Linear Predictors
    Non-Programmer's Tutorial for Python 3/File IO
    Python File I/O
    Introduction to Machine Learning
    Python3.6爬虫+Djiago2.0+Mysql --数据爬取
    MySql5.7 找回密码
    pyinstaller 打包python3.6文件成exe 运行
    python 连接mssql数据库
    Nopi 导出设置行高
    python登录aspx网站
  • 原文地址:https://www.cnblogs.com/rsapaper/p/8888904.html
Copyright © 2011-2022 走看看