参考:http://www.cnblogs.com/fnng/p/3576154.html
import urllib,re page = urllib.urlopen('http://trend.baidu.lecai.com/ssq/') html = page.read() reg = r"<td class='chart_table_td omission_entry omission_hit .+_ball background_color_.+'>d{2}</td>" htmlre = re.compile(reg) balllist = re.findall(htmlre,html) len(balllist) # 30天,6红1蓝,210个结果 last30=[] redlist=[] for item in imglist: if 'red_ball' in item: redlist.append(item[-7:-5]) elif 'blue_ball' in item: last30.append({'blue':item[-7:-5],'red':redlist}) redlist=[] print last30