zoukankan      html  css  js  c++  java
  • sre_constants.error: unbalanced parenthesis

    Traceback (most recent call last):
      File "androidmarket82.py", line 108, in <module>
        main()
      File "androidmarket82.py", line 54, in main
        pattern=re.compile('<label id="ctl00_AndroidMaster_Content_Apk_SoftVersionName">(.+?)</label>)</div>')#版本号
      File "/usr/lib/python2.7/re.py", line 190, in compile
        return _compile(pattern, flags)
      File "/usr/lib/python2.7/re.py", line 242, in _compile
        raise error, v # invalid expression
    sre_constants.error: unbalanced parenthesis



    #!/usr/env  python
    #-*- coding: utf-8  -*-
    import requests
    import os,sys 
    import time
    import MySQLdb
    import re
    num=0
    dataresult=[]
    def main():
        try:
            conn=MySQLdb.connect(host='localhost',user='root',passwd='123456',db='addressbookdb',charset="utf8")
            conn.query("set names utf8")
        except Exception,e:
            print e
            sys.exit() 
        cursor=conn.cursor() 
        for k in range(51):
            try:
                
                url="http://apk.hiapk.com/apps#"+str(k)+"_1_0_0_0_0_0"
                print url
                html=requests.get(url)
                result=html.content
                pattern=re.compile('''<a target='_blank' title=".+?" href="(.+?)">.+?</a></span>''')
                daresult=re.findall(pattern,result)
                global dataresult
                dataresult+=daresult
                dataresult=list(set(dataresult))
                print len(dataresult)
            except:
                
                time.sleep(30)
                pass
           
                    
        f=file("androidmarket.txt","a+")
        content=str(len(dataresult))
        f.write(content)
        f.close()
        print len(dataresult)
        for i in dataresult:
            
            print i
            try:
                html=requests.get(i)
                result=html.content
            except:
                time.sleep(30)
                pass
            pattern=re.compile('<label id="ctl00_AndroidMaster_Content_Apk_SoftName">(.+?)</label>')#名称
            data0=re.findall(pattern,result)
            print data0[0]
            pattern=re.compile('<label id="ctl00_AndroidMaster_Content_Apk_SoftVersionName">(.+?)</label>)</div>')#版本号
            data1=re.findall(pattern,result)
            print data1[0]
            pattern=re.compile('<span class="d_u_line"><label id="ctl00_AndroidMaster_Content_Apk_SoftDeveloper">(.+?)</label>')#开发者
            data2=re.findall(pattern,result)
            print data2[0]
            pattern=re.compile('<label id="ctl00_AndroidMaster_Content_Apk_SoftPublishTime">(.+?)</label>')#发布时间
            data3=re.findall(pattern,result)
            print data3[0]
            pattern=re.compile('<span class="font14"><label id="ctl00_AndroidMaster_Content_Apk_SoftSize">(.+?)</label>')#文件大小
            data4=re.findall(pattern,result)
            print data4[0]
            pattern=re.compile('<span class="font14 d_gj_line"><label id="ctl00_AndroidMaster_Content_Apk_SoftSuitSdk">(.+?)</label>')#支持固件
            data5=re.findall(pattern,result)
            print data5[0]
            pattern=re.compile('<span id="ctl00_AndroidMaster_Content_Apk_SoftCategory" class="font14">(.+?)</span></a></span></div>')#类别
            data6=re.findall(pattern,result)
            print data6[0]
            pattern=re.compile('<label id="ctl00_AndroidMaster_Content_Apk_Description">([sS]*?)</label>')#介绍
            data7=re.findall(pattern,result)
            for items in data7:
                pass#print re.sub('<br />',' ',items)
            sql="insert into androidmarket(name,version,developer,pubtime,filesize,support,classifyintroduction) values(%s,%s,%s,%s,%s,%s,%s,%s)"
            for items in data7:
                try:
                    
                    values=(data0[0],data1[0],data2[0],data3[0],data4[0],data5[0],data6[0],re.sub('<br />',' ',items))
                    
                except:
                    pass
                try:
                    cursor.execute(sql,values)
                    conn.commit()
                except:
                    pass
            pattern=re.compile(' <div class="appTitle clearfix">[sS]*?<img src=(.+?)/>')
            data=re.findall(pattern,result)
            for j in data:
                print j
       
                try:
                    temp=requests.get(j[1:-2])
                except:
                    time.sleep(30)
                    pass
            global num
            f=file("androidmarket/"+str(num),"w+")
            num=num+1
            print num
            f.write(temp.content)
        cursor.close()
        conn.close()
        f.close()
    if  __name__=="__main__":
           main()
    
    




  • 相关阅读:
    kaggle CTR预估
    基于大规模语料的新词发现算法【转自matix67】
    vim E437: terminal capability "cm" required
    makefile 中的符号替换($@、$^、$<、$?)
    【转】Makefile 中:= ?= += =的区别
    python urljoin问题
    python 写文件刷新缓存
    python Popen卡死问题
    nohup 日志切割
    换行和回车野史
  • 原文地址:https://www.cnblogs.com/javawebsoa/p/3236894.html
Copyright © 2011-2022 走看看