python如何访问互联网 url+lib==urllib
url protocol://hostname[:port]/path/[;parameters][?query][#fragement]
protocol:http https ftp file ed2k 服务器域名/ip 资源地址
urllib(包 四个模块 urllib.request urllib.error urllib.parse urllib.robotparer)
rullib.request.urlopen(url,data,timeout,capath...)
import urllib.request response = rullib.request.urlopen('http://www.baidu.com') html = response.read() #二进制 html= html.decode("utf-8") print(html)
#下载一只猫 placekitten.com import urllib.request reponse = urllib.request.urlopen('placekitten.com/g/500/600') #req = urllib.request.Request('placekitten.com/g/500/600') #response = req.urlopen(req) response.geturl() response.info() response.getcode() cat_img = response.read() with open('cat_500_600.jpg',’wb') as f: f.write(cat_img)
#有道翻译 #network Method(GET POST) preview #Headers #Remote Address: Request URL: Request Method: #Request Headers(客户端发送请求的header User-Agent是否非人类访问) #Form Data: #隐藏 检查headers > user agent
#ip访问频率 1减少频率 2使用代理 import urllib.request import urllib.parse import json
import time
while True:
content = input("请输入需要翻译的内容(输入'q!'退出程序)") if content == 'q!'
break url = "http://fanyi.youdao.com..." data = {} data['type'] = 'AUTO' data['i'] ='i love' data['doctype'] = 'json' data['xmlVersion'] = '1.6' data['keyfrom'] = content data['ue'] = 'UTF-8' data['typoResult'] = 'true' data = urllib.parse.urlencode(data).encode('utf-8')
'''1
head = {}
head['User-Agent'] = 'Mozilla/5.0...' response = urllib.request.urlopen(url, data, head)
'''
req = urllib.request.Request(url,data)
req.add_header('User-agent','Mozilla/5.0') '''代理
1参数是一个字典{'类型':'代理ip:端口号'}
proxy_support =urllib.request.ProxyHandler({})
2定制创建一个opener
opener = urllib.request.build_opener(proxy_support)
3a安装opener urllib.request.install_opener(opener)
3b调用opner opener.open(url)
''' html = req.read().decode('utf-8') #json轻量级数据交换格式 target = json.loads(html) #type(target) dict print("翻译结果:"+target['translateResult'][0][0]['tgt'])
time.sleep(5)
#测试代理 import urllib.request import random url = 'http://www.whatismyip.com.tw' ip_list = ['','',''] proxy_support = urllib.request.ProxyHandler({'http':random.choice(iplist)}) opener = urllib.request.build_opener(proxy_support) opener.addheaders = [('User-Agent':'Mozzilla')] urllib.request.install_opener(open) response = urllib.request.urlopen(url) html = response.read().decode('utf-8')