zoukankan      html  css  js  c++  java
  • 下载oschina.net源代码

    import urllib2
    import urllib
    import re
    import sqlite3
    
    
    print 'Create table test'
    db = sqlite3.connect('test.db')
    db.row_factory = sqlite3.Row
    db.text_factory = str
    db.execute('drop table if exists test')
    db.execute('create table test (title text, name text)')
    
    
    for j in range(14):
        try:
            
            f = urllib2.urlopen('http://www.oschina.net/code/list/7/python?show=time&p=%d' % j).read()
            ll = re.findall('http://www.oschina.net/code/snippet(.*?)"',f)
            for i in ll[:]:
                #urllib.urlretrieve('http://www.oschina.net/code/snippet%s' % i,"c:\\" + "snippet%s" % i+".html")
                f = urllib2.urlopen('http://www.oschina.net/code/snippet%s' % i).read()
                s1 = re.findall('<[H|h]1><[A|a].+>(.*?)</[A|a]></[H|h]1>',f)
                re_co = re.compile('''<div class='code_pieces code_module'>.*">(.*?)</pre>.*<div id='related_codes' class='CodeList code_module'>''',re.S)
                s2 = re_co.findall(f)
                #print s2[0]
                db.execute('insert into test (title, name ) values (?, ?)', (s1[0], s2[0]))
                db.commit()
                print s1[0]
                #break
                #print i
                #print "OK"
            #print f
            #print "Hello"
        except:
            print j
            
    db.close()
  • 相关阅读:
    axis
    LRU
    apk 反编译
    android 设置 button 不同状态的图片
    resin
    scrum 项目管理
    android 国外广告平台
    JNI
    java 通信
    google网站分析
  • 原文地址:https://www.cnblogs.com/pythonschool/p/2744436.html
Copyright © 2011-2022 走看看