#-*-coding:utf-8-*-
import urllib
import urllib2
import cookielib
##urllib
url="http://www.qq.com"
header={"User-Agent":"Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.133 Safari/537.36"}
def urllib_study(url):
urldata=urllib.urlopen(url)
print(urldata.read().decode('gbk'))##网页内容
print(urldata.getcode())##http状态码
print(urldata.geturl())## request url
print(urldata.info()) ##header info urldata.info().getparam('charset')获取header 编码
urllib.urlretrieve(url,'D:\pic\')##下载文件
def urlretrieve_study(url,savepath):
def callback(a,b,c):
down_progess=100.0*a*b/c
if down_progess>100:
down_progess=100
print '%.2f%%'%down_progess
urllib.urlretrieve(url,savepath,callback)
#urlretrieve_study(url,'D:\pics\qq.html')
##urllib2
def urllib2_study(url):
urldata=urllib2.urlopen(url)
#-*-coding:utf-8-*- import urllib2 import random url="" useragent=["Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.133 Safari/537.36", ] def get_content(url,useragent): """ @ """ random_header=random.choice(useragent) req=urllib2.Request(url) req.add_header('User-Agent',random_header) req.add_header('Host',) req.add_header("Refer",) req.add_header('GET',) ret=urllib2.urlopen(req).read() return ret
print(dir(urldata))##urldata.read.decode('gbk') urldata.getcode() urldata.info() urldata.geturl()
cookie=cookielib.CookieJar()
opener=urllib2.build_opener(urllib2.HTTPCookieProcessor(cookie))
data=opener.open(url)##urllib2.urlopen其实也是调用opener.open
print(data.read().decode('gbk'))
def urllib2_post(url):
values={'name':'howhy','age':32}
data=urllib.urlencode(values)
req=urllib2.Request(url,data,header)
response=urllib2.urlopen(req)
the_page=response.read()
def handler():##一般handler htp=urllib2.HTTPHandler() opener=urllib2.build_opener(htp) req=urllib2.Request("http://www.baidu.com/") print(opener.open(req).read()) def proxy():##代理handler hand=urllib2.ProxyHandler({"http":"219.141.153.41:80"}) opener=urllib2.build_opener(hand) req=urllib2.Request("http://www.baidu.com/") print(opener.open(req).read())
def authhandler():##基本认证handler htppwd=urllib2.HTTPPasswordMgrWithDefaultRealm() htppwd.add_password(None,"auth web","username","password") htp=urllib2.HTTPBasicAuthHandler(htppwd) opener=urllib2.build_opener(htp) ##此时可以加多个handler
opener.addheaders=[("User-Agent","dsffsdfdsfd"),()] req=urllib2.Request("http://auth web") print(opener.open(req).read())