import glob import os import requests, time, threading os_sep = os.sep img_dir = 'D:\mylogo\' def spider_webimg_dl_return_local_img_path(img_dir, img_url, uid, uid_n, local_default='default.DONOT_REMOVE.png'): r = '%s%s' % (img_dir, local_default) if '.' not in img_url: return r img_url = img_url.split('?')[0] try: bytes = requests.get(img_url)._content if bytes != 0 and requests.get(img_url).status_code == 200: # r = '%s%s%s%s%s%s' % ( # img_dir, time.strftime('%Y%m%d%H%M%S', time.localtime(time.time())), '_', # uid, # uid_n, # '.jpg') r = '%s%s%s%s' % (img_dir, uid, uid_n, '.jpg') with open(r, 'wb')as f: f.write(bytes) except Exception as e: print(e) time.sleep(10) return r f = 'dbuid.hadlogo.txt' with open(f, 'r', encoding='utf-8') as fr: for i in fr: uid = i.replace(' ', '').replace(' ', '') had = False f_img_d = '{}{}{}'.format(img_dir, os_sep, '*.jpg') imgs = glob.glob(f_img_d) for ii in imgs: if uid in ii: had = True continue if not had: logo_url = 'http://img.a.r.com/site/34075/logo.jpg'.replace('34475', uid) spider_webimg_dl_return_local_img_path(img_dir, logo_url, uid, 'logo') time.sleep(0.5) pass else: print(uid)