#!/usr/bin/env python
#encoding=utf-8
"""
2010.10.19
v0.1
获取图片
"""
import socket as original_socket
original_socket.setdefaulttimeout(10)
import sys
reload(sys)
sys.setdefaultencoding(sys.stdout.encoding)
import eventlet
from eventlet import pools
from eventlet.timeout import with_timeout
#from eventlet.green import urllib2
urllib = eventlet.import_patched('urllib')
from sqlalchemy.ext.sqlsoup import SqlSoup
from sqlalchemy import create_engine,MetaData
import time
#import urllib
import urlparse
import os
import os.path
from functools import wraps
g_error_file = os.path.join("./","img_error.txt")
g_success_file = os.path.join("./","img_success.txt")
def error(url):
f=open(g_error_file,"a")
f.write("%s\n"%(url,))
f.close()
def success(url):
f=open(g_success_file,"a")
f.write("%s\n"%url)
f.close()
def cost_time(f):
@wraps(f)
def wrapper(*args, **args2):
t0 = time.time()
print "@%s, {%s} start" % (time.strftime("%X", time.localtime()), f.__name__)
back = f(*args, **args2)
print "@%s, {%s} end" % (time.strftime("%X", time.localtime()), f.__name__)
print "@%.3fs taken for {%s}" % (time.time() - t0, f.__name__)
return back
return wrapper
def export_img2txt():
db = create_engine("mysql://root:zhoubt@localhost:3306/search?charset=utf8")#使用前首先要创建数据库
soup = SqlSoup(MetaData(db))
rp=soup.bind.execute(u"select bigimage from model_product where site='万客商城'")
f=open("bigimages.txt","w")
for row in rp.fetchall():
f.write("%s\n"%row[0])
f.close()
def down_img(url):
print url
path=urlparse.urlparse(url).path
print path
dirs,filename=os.path.split(path)
#eventlet.sleep(1)
if not os.path.exists("."+dirs):
print "==========>",dirs
os.makedirs("."+dirs)
urllib.urlretrieve(url,filename="."+path)
return True
def do_down_img(url):
r=with_timeout(12,down_img,url,timeout_value=False)
if r:
print "success",url
success(url)
else:
print "error",url
error(url)
@cost_time
def batch_imgs():
pool=eventlet.GreenPool(5)
for url in open("./bigimages.txt","r").readlines():
if url.strip()<>"":
print url
pool.spawn_n(do_down_img,url)
pool.waitall()
if __name__=="__main__":
#url="http://www.cpzy123.com/pro_b/2009107100957675.jpg"
#down_img(url)
#export_img2txt()
batch_imgs()