代码是同事写的,我把它单独抠出来,可以作为工具函数使用。当然,性能还是个问题,有待解决。
import random import cookielib import urllib import urllib2 import HTMLParser import re cookie_support = urllib2.HTTPCookieProcessor(cookielib.CookieJar()) opener = urllib2.build_opener(cookie_support, urllib2.HTTPHandler) urllib2.install_opener(opener) user_agents = [ 'Mozilla/5.0 (Windows; U; Windows NT 5.1; it; rv:1.8.1.11) Gecko/20071127 Firefox/2.0.0.11', 'Opera/9.25 (Windows NT 5.1; U; en)', 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 1.1.4322; .NET CLR 2.0.50727)', 'Mozilla/5.0 (compatible; Konqueror/3.5; Linux) KHTML/3.5.5 (like Gecko) (Kubuntu)', 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.8.0.12) Gecko/20070731 Ubuntu/dapper-security Firefox/1.5.0.12', 'Lynx/2.8.5rel.1 libwww-FM/2.14 SSL-MM/1.4.1 GNUTLS/1.2.9', "Mozilla/5.0 (X11; Linux i686) AppleWebKit/535.7 (KHTML, like Gecko) Ubuntu/11.04 Chromium/16.0.912.77 Chrome/16.0.912.77 Safari/535.7", "Mozilla/5.0 (X11; Ubuntu; Linux i686; rv:10.0) Gecko/20100101 Firefox/10.0 ", ] agent = random.choice(user_agents) opener.addheaders = [("User-agent", agent), ("Accept", "*/*"), ('Referer', 'http://www.google.com')] def unescape(text): parser = HTMLParser.HTMLParser() return (parser.unescape(text)) def TranslateByGoogle(text="", fromLang="en", toLang="zh-CN"): base_link = "https://translate.google.cn/m?hl=%s&sl=%s&q=%s" text = urllib.quote_plus(text.encode('utf8')) link = base_link % (toLang, fromLang, text) try: raw_data = urllib2.urlopen(link).read() data = raw_data.decode("utf-8") expr = r'class="t0">(.*?)<' re_result = re.findall(expr, data) if (len(re_result) == 0): result = "" else: result = unescape(re_result[0]) return (result) except Exception, e: print e