Traceback (most recent call last):
File "androidmarket82.py", line 108, in <module>
main()
File "androidmarket82.py", line 54, in main
pattern=re.compile('<label id="ctl00_AndroidMaster_Content_Apk_SoftVersionName">(.+?)</label>)</div>')#版本号
File "/usr/lib/python2.7/re.py", line 190, in compile
return _compile(pattern, flags)
File "/usr/lib/python2.7/re.py", line 242, in _compile
raise error, v # invalid expression
sre_constants.error: unbalanced parenthesis
#!/usr/env python #-*- coding: utf-8 -*- import requests import os,sys import time import MySQLdb import re num=0 dataresult=[] def main(): try: conn=MySQLdb.connect(host='localhost',user='root',passwd='123456',db='addressbookdb',charset="utf8") conn.query("set names utf8") except Exception,e: print e sys.exit() cursor=conn.cursor() for k in range(51): try: url="http://apk.hiapk.com/apps#"+str(k)+"_1_0_0_0_0_0" print url html=requests.get(url) result=html.content pattern=re.compile('''<a target='_blank' title=".+?" href="(.+?)">.+?</a></span>''') daresult=re.findall(pattern,result) global dataresult dataresult+=daresult dataresult=list(set(dataresult)) print len(dataresult) except: time.sleep(30) pass f=file("androidmarket.txt","a+") content=str(len(dataresult)) f.write(content) f.close() print len(dataresult) for i in dataresult: print i try: html=requests.get(i) result=html.content except: time.sleep(30) pass pattern=re.compile('<label id="ctl00_AndroidMaster_Content_Apk_SoftName">(.+?)</label>')#名称 data0=re.findall(pattern,result) print data0[0] pattern=re.compile('<label id="ctl00_AndroidMaster_Content_Apk_SoftVersionName">(.+?)</label>)</div>')#版本号 data1=re.findall(pattern,result) print data1[0] pattern=re.compile('<span class="d_u_line"><label id="ctl00_AndroidMaster_Content_Apk_SoftDeveloper">(.+?)</label>')#开发者 data2=re.findall(pattern,result) print data2[0] pattern=re.compile('<label id="ctl00_AndroidMaster_Content_Apk_SoftPublishTime">(.+?)</label>')#发布时间 data3=re.findall(pattern,result) print data3[0] pattern=re.compile('<span class="font14"><label id="ctl00_AndroidMaster_Content_Apk_SoftSize">(.+?)</label>')#文件大小 data4=re.findall(pattern,result) print data4[0] pattern=re.compile('<span class="font14 d_gj_line"><label id="ctl00_AndroidMaster_Content_Apk_SoftSuitSdk">(.+?)</label>')#支持固件 data5=re.findall(pattern,result) print data5[0] pattern=re.compile('<span id="ctl00_AndroidMaster_Content_Apk_SoftCategory" class="font14">(.+?)</span></a></span></div>')#类别 data6=re.findall(pattern,result) print data6[0] pattern=re.compile('<label id="ctl00_AndroidMaster_Content_Apk_Description">([sS]*?)</label>')#介绍 data7=re.findall(pattern,result) for items in data7: pass#print re.sub('<br />',' ',items) sql="insert into androidmarket(name,version,developer,pubtime,filesize,support,classifyintroduction) values(%s,%s,%s,%s,%s,%s,%s,%s)" for items in data7: try: values=(data0[0],data1[0],data2[0],data3[0],data4[0],data5[0],data6[0],re.sub('<br />',' ',items)) except: pass try: cursor.execute(sql,values) conn.commit() except: pass pattern=re.compile(' <div class="appTitle clearfix">[sS]*?<img src=(.+?)/>') data=re.findall(pattern,result) for j in data: print j try: temp=requests.get(j[1:-2]) except: time.sleep(30) pass global num f=file("androidmarket/"+str(num),"w+") num=num+1 print num f.write(temp.content) cursor.close() conn.close() f.close() if __name__=="__main__": main()