import urllib2 import urllib import re import sqlite3 print 'Create table test' db = sqlite3.connect('test.db') db.row_factory = sqlite3.Row db.text_factory = str db.execute('drop table if exists test') db.execute('create table test (title text, name text)') for j in range(14): try: f = urllib2.urlopen('http://www.oschina.net/code/list/7/python?show=time&p=%d' % j).read() ll = re.findall('http://www.oschina.net/code/snippet(.*?)"',f) for i in ll[:]: #urllib.urlretrieve('http://www.oschina.net/code/snippet%s' % i,"c:\\" + "snippet%s" % i+".html") f = urllib2.urlopen('http://www.oschina.net/code/snippet%s' % i).read() s1 = re.findall('<[H|h]1><[A|a].+>(.*?)</[A|a]></[H|h]1>',f) re_co = re.compile('''<div class='code_pieces code_module'>.*">(.*?)</pre>.*<div id='related_codes' class='CodeList code_module'>''',re.S) s2 = re_co.findall(f) #print s2[0] db.execute('insert into test (title, name ) values (?, ?)', (s1[0], s2[0])) db.commit() print s1[0] #break #print i #print "OK" #print f #print "Hello" except: print j db.close()