#-*- coding:utf-8 -*-
#from __future__ import unicode_liter
import urllib,urllib2
import re,sys,os,time
headers={
'Referer':'http://jandan.net/',
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36',
}
def getstr(n=1):
req=urllib2.Request('https://www.qiushibaike.com/text/page/%s'%n,headers=headers)
html=urllib2.urlopen(req).read()
#html=unicode(html,)
reg='''<div class="content">.*?<span>(.*?)</span.*?number">([\d]*)</i.*?number">([\d]*)</i'''
reg=re.compile(reg,re.S)
img=re.findall(reg,html)
return img
nu=raw_input('你想获取多少页内容:')
q=1
w=1
for m in xrange(1,int(nu)+1):
print '获取第%s页***********************************'%q
# time.sleep(2)
q+=1
for a in getstr(m):
print '\n获取第%s条内容'%w
w+=1
print a[0].replace('<br/>',' ').strip()#strip去掉首尾换行符
print '好笑数:'+a[1]
print '评论数:'+a[2]
print '共获取%s条'%(int(w)-1)