''' GET请求方式 POST请求方式 超时timeout,异常处理 响应类型(响应码,响应头...) POST请求添加Headers 代理方法 cookie添加 读取 ---------- parse 包下 ----------- urlparse 解析网址 urlunparse 拼接网址 urlencode GET参数化(比较有用) ''' import urllib.request import urllib.parse # -------------------------- GET 方式(不加data) # response = urllib.request.urlopen('http://www.baidu.com') # print(response.read().decode('utf-8')) # ------------------------- POST方式 (加data) # data = bytes(urllib.parse.urlencode({'word': 'hello'}), encoding='utf8') # response = urllib.request.urlopen('http://httpbin.org/post', data=data) # print(response.read().decode('utf8')) # ------------------------- 超时timeout # response = urllib.request.urlopen('http://httpbin.org/get', timeout=1) # print(response.read()) # import socket # import urllib.error # try: # response = urllib.request.urlopen('http://httpbin.org/get', timeout=0.1) # except urllib.error.URLError as e: # if isinstance(e.reason, socket.timeout): # print('TIME OUT') # --------------------------- 响应类型 # response = urllib.request.urlopen('http://www.python.org') # print(type(response)) # <class 'http.client.HTTPResponse'> # -------------------------- 状态码 响应头 # response = urllib.request.urlopen('http://www.python.org') # print(response.status) # 响应码 # print(response.getheaders()) # 响应头 # print(response.getheader('Server')) # 响应的服务 # 获取响应内容 # response = urllib.request.urlopen('http://www.python.org') # print(response.read().decode('utf-8')) # read() 获取bytes类型 # ------------------------------ 加入Headers, 发送一个POST 请求 # from urllib import parse, request # url = 'http://httpbin.org/post' # headers = { # 'User-Agent': 'Mozilla/4.0 (compatible;MSIE 5.5; Windows NT)', # 'Host': 'httpbin.org' # } # dict = { # 'name': 'Germey' # } # data = bytes(parse.urlencode(dict), encoding='utf8') # req = request.Request(url=url, data=data, headers=headers, method='POST') # response = request.urlopen(req) # print(response.read().decode('utf-8')) # - ----------------------- 代理 # proxy_handler = urllib.request.ProxyHandler({ # 'http': 'http://127.0.0.1:2222', # 'https': 'https://127.0.0.1:2211' # }) # opener = urllib.request.build_opener(proxy_handler) # response = opener.open('http://www.baidu.com') # print(response.read()) # ------------------------- cookie # import http.cookiejar, urllib.request # cookie = http.cookiejar.CookieJar() # handler = urllib.request.HTTPCookieProcessor(cookie) # opener = urllib.request.build_opener(handler) # response = opener.open('http://www.baidu.com') # for item in cookie: # print(item.name+'='+item.value) # 打印key-value ############ cookie 保存为txt # import http.cookiejar, urllib.request # filename = 'cookie.txt' # cookie = http.cookiejar.LWPCookieJar(filename) # handler = urllib.request.HTTPCookieProcessor(cookie) # opener = urllib.request.build_opener(handler) # response = opener.open('http://www.baidu.com') # cookie.save(ignore_discard=True, ignore_expires=True) ############## 读取cookie # import http.cookiejar, urllib.request # cookie = http.cookiejar.LWPCookieJar() # cookie.load('cookie.txt',ignore_discard=True ,ignore_expires=True) # handler = urllib.request.HTTPCookieProcessor(cookie) # opener = urllib.request.build_opener(handler) # response = opener.open('http://ww.baidu.com') # print(response.read().decode('utf-8')) ############# 异常处理 # from urllib import request, error # try: # response = request.urlopen('http://wwwwwwww.com') # except error.URLError as e: # print(e.reason, 'xxx') ############# 异常处理2 # from urllib import request, error # try: # response = request.urlopen('http://www.baidu.com/aa/aass') # except error.HTTPError as e: # print(e.reason, e.code, e.headers, sep=' ') # except error.URLError as e: # print(e.reason) # else: # print('Request Successfully') ########### 异常类型3 # import socket, urllib.request, urllib.error # try: # response = urllib.request.urlopen('https://www.baidu.com',timeout=0.01) # except urllib.error.URLError as e: # print(type(e.reason)) # if isinstance(e.reason, socket.timeout): # 判断异常是什么类型 # print('TIME OUT') # --------------------- URL解析 from urllib.parse import urlparse # 一个参数 # result = urlparse('http://www.baidu.com/index.html;user?id=5#comment') # print(type(result), result) # 指定协议, 如果没有取https, 有就用url带的 # result = urlparse(scheme='https',url='http://www.baidu.com/index.html;user?id=5#comment') # print(type(result), result) # allow_fragments=False 一般不会用,把锚链接部分移动到参数(没有参数在往前移动#XXXX) # result = urlparse(allow_fragments=False, url='http://www.baidu.com/index.html;user?id=5#comment') # print(result) # ---------------------------------- urlunparse 拼接网站 # from urllib.parse import urlunparse # data = ['http', 'www.baidu.com', 'index.html', 'user', 'a=1', 'comment'] # print(urlunparse(data)) # ---------------------------- urljoin from urllib.parse import urljoin # 拼接 # print(urljoin('http://www.baidu.com', 'Faq.html')) # 以第二个位基准 # print(urljoin('http://www.baidu.com', 'https://www.baidu.com/aaa')) # 拼接 # print(urljoin('http://www.baidu.com', '?a=1')) ################ urlencode 参数化get请求参数 # from urllib.parse import urlencode # params = { # 'name': 'kaige', # 'age': '22' # } # base_url = 'http://www.baidu.com?' # url = base_url+ urlencode(params) # print(url)