爬虫闯关链接:http://www.heibanke.com/lesson/crawler_ex00
知识点:Python正则表达式,Web编程
参考代码:
#!/usr/bin/env python
# encoding: utf-8
import requests
import sys
import re
reload(sys)
sys.setdefaultencoding("utf-8")
str = ""
def attack():
global str
while True:
resp = requests.get("http://www.heibanke.com/lesson/crawler_ex00/"+str)
pattern = u"数字是?(d+)".decode("utf8")
word = re.findall(pattern, unicode(resp.content).decode("utf8"))
print word
if not word:
break
else:
str = word[0]
continue
print resp.content
if __name__ == '__main__':
attack()