Demo1:urllib使用
#encoding:utf-8
import urllib
import urlparse
def printlist(lines):
for i in lines:
print(i)
def demo():
s=urllib.urlopen('http://blog.kamidox.com')
msg=s.info()
#printlist(msg.items())
#printlist(msg.headers)
#print(s.getcode())
#printlist(s.readlines())
#print(msg.getheader("Content-Type"))
printlist(dir(msg))#find the methods of the class
def progress(blk,blk_size,total_size):
print("%d/%d - %.02f%%"%(blk*blk_size,total_size,(float)(blk*blk_size)*100/total_size))
def retrieve():
fname,msg=urllib.urlretrieve('http://blog.kamidox.com','index.html',reporthook=progress)
#print(fname)
#printlist(msg.items())
def urlencode():
params={'score':100,'name':'pachongjichu','comment':'very good'}
qs=urllib.urlencode(params)
print(qs)
print(urlparse.parse_qs(qs))
if __name__ == '__main__':
urlencode()
Demo2:抓取图片
#encoding:utf-8
import urllib
response=urllib.urlopen("http://placekitten.com/g/300/400")
cat_img=response.read()
with open('cat_300_400.jpg','wb')as f:#图片是二进制文件
f.write(cat_img)
print(response.info())
#print(response.read())
Demo3:有道词典翻译
# encoding:utf-8
import urllib
import json
import sys
reload(sys)
sys.setdefaultencoding('utf8')
# 解决错误 UnicodeDecodeError: 'ascii' codec can't decode byte 0xe7 in position 0:
content=raw_input('请输入要翻译的内容:')#raw_input是输入原始字符串
url = "http://fanyi.youdao.com/translate?smartresult=dict&smartresult=rule&smartresult=ugc&sessionFrom=http://www.youdao.com/"
data = {}#来自于form data那一项中的数据
data['i']=content
data['from']='AUTO'
data['to']='AUTO'
data['smartresult']='dict'
data['client']='fanyideskweb'
data['salt']='1497500950438'
data['sign']='cd8af15baafdac91e90445ce25f3cea1'
data['doctype']='json'
data['version']='2.1'
data['keyfrom']='fanyi.web'
data['action']='FY_BY_CLICKBUTTON'
data['typoResult']='true'
data=urllib.urlencode(data).encode('utf-8')
response=urllib.urlopen(url,data)
html=response.read().decode('utf-8')
#print(html)#此时得到的是json结构
target=json.loads(html)
#type(target)->dict
print("翻译结果:%s"%(target['translateResult'][0][0]['tgt']))
Demo4:代码隐藏和延迟请求
# encoding:utf-8
import urllib
import json
import time
import sys
reload(sys)
sys.setdefaultencoding('utf8')
# 解决错误 UnicodeDecodeError: 'ascii' codec can't decode byte 0xe7 in position 0:
while True:
content=raw_input('请输入要翻译的内容(输入”q!“退出程序):')#raw_input是输入原始字符串
if content=='q!':
break
url = "http://fanyi.youdao.com/translate?smartresult=dict&smartresult=rule&smartresult=ugc&sessionFrom=http://www.youdao.com/"
#request 对象生成之前
head={}
head['User-Agent']='Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.86 Safari/537.36'
data = {}
data['i']=content
data['from']='AUTO'
data['to']='AUTO'
data['smartresult']='dict'
data['client']='fanyideskweb'
data['salt']='1497500950438'
data['sign']='cd8af15baafdac91e90445ce25f3cea1'
data['doctype']='json'
data['version']='2.1'
data['keyfrom']='fanyi.web'
data['action']='FY_BY_CLICKBUTTON'
data['typoResult']='true'
data=urllib.urlencode(data).encode('utf-8')
response=urllib.urlopen(url,data,head)
html=response.read().decode('utf-8')
#print(html)#此时得到的是json结构
target=json.loads(html)
#type(target)->dict
print("翻译结果:%s"%(target['translateResult'][0][0]['tgt']))
time.sleep(3)#第一个方案,延迟请求
也可以在创建对象后再addheaders('User_Agent','Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.86 Safari/537.36')
不过python2.7好像不可以
但是如果你连续的下载那么就不太行了,不像一个正常人下载那样,每个ip单位时间就会有个数值
第一个方案,是延迟请求
第二个方案,是代理
1.参数是一个字典{‘类型’:‘代理ip:端口号’}
proxy_support=urllib.ProxyHandler({})
2.定制、创建一个opener
opener=urllib.build_opener(proxy_support)
3.安装 opener
urllib.install_opener(opener)
或者opener.open(url)
# coding: utf-8
import urllib
import urllib2
import random
import sys
reload(sys)
sys.setdefaultencoding('utf-8')
url='http://www.ip138.com/'
iplist=['183.203.208.166:8118','111.1.32.28:81','119.6.144.73:81']
proxy_support=urllib2.ProxyHandler({'http': random.choice(iplist)})
opener=urllib2.build_opener(proxy_support)
opener.addheaders=[('User_Agent','Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.86 Safari/537.36')]
urllib2.install_opener(opener)
response=urllib.urlopen(url)
html=response.read()
print(html)