zoukankan html css js c++ java

Python 保存爬行动物捕捉网页

选址的桌面壁纸网站汽车主题：

下面的两个print打开调试期间

#print tag
#print attrs

#!/usr/bin/env python
import re
import urllib2
import HTMLParser
base = "http://desk.zol.com.cn"
path = '/home/mk/cars/'
star = ''
def get_url(html):
	parser = parse(False)
	request = urllib2.Request(html)
	response = urllib2.urlopen(request)
	resp = response.read()
	parser.feed(resp)
def download(url):
	content = urllib2.urlopen(url).read()
	format = '[0-9]*.jpg';
	res = re.search(format,url);
	print 'downloading:',res.group()
	filename = path+res.group()
	f = open(filename,'w+')
	f.write(content)
	f.close()	 
class parse(HTMLParser.HTMLParser):
	def __init__(self,Index):
		self.Index = Index;
		HTMLParser.HTMLParser.__init__(self)
	def handle_starttag(self,tag,attrs):
		#print tag
		#print attrs
		if(self.Index):
			if not cmp(tag,'a'):
				if(len(attrs) == 4):
					if(attrs[0] ==('class','pic')):
						#print tag
						#print attrs
						new = base+attrs[1][1]
						print 'found a link:',new
						global star
						star = new
						get_url(new)
		else:
			if not cmp(tag,'img'):
				if(attrs[0] == ('id','bigImg')):
					#print tag
					#print attrs
					Image_url = attrs[1][1]
					print 'found a picture:',Image_url
					download(Image_url)
			if not cmp(tag,'a'):
				if (len(attrs) == 4):
					if (attrs[1] == ('class','next')):
						#print tag
						#print attrs
						next = base + attrs[2][1]
						print 'found a link:',next
						if (star != next):
							get_url(next)
Index_url = 'http://desk.zol.com.cn/qiche/'
con = urllib2.urlopen(Index_url).read()
Parser_index = parse(True)
Parser_index.feed(con)

唯一的缺点是，在网站上漂亮的壁纸桌面壁纸。

。。

查看全文

相关阅读:
C++ 声明、定义、初始化、赋值
 skynet源码赏析
 python基础6函数柒哥
 Python基础4数据类型详解下柒哥
 Python基础1变量柒哥
 Python基础2数据类型柒哥
 Python基础3数据类型详解上柒哥
 Python基础5条件分支与循环柒哥
 PHP面试（A02）
Envoy 配置

原文地址：https://www.cnblogs.com/lcchuguo/p/4741489.html