zoukankan      html  css  js  c++  java
  • urllib urllib2

    #-*-coding:utf-8-*-
    import urllib
    import urllib2
    import cookielib
    ##urllib
    url="http://www.qq.com"
    header={"User-Agent":"Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.133 Safari/537.36"}

    def urllib_study(url):
    urldata=urllib.urlopen(url)
    print(urldata.read().decode('gbk'))##网页内容
    print(urldata.getcode())##http状态码
    print(urldata.geturl())## request url
    print(urldata.info()) ##header info urldata.info().getparam('charset')获取header 编码
    urllib.urlretrieve(url,'D:\pic\')##下载文件
    def urlretrieve_study(url,savepath):
    def callback(a,b,c):
    down_progess=100.0*a*b/c
    if down_progess>100:
    down_progess=100
    print '%.2f%%'%down_progess
    urllib.urlretrieve(url,savepath,callback)

    #urlretrieve_study(url,'D:\pics\qq.html')
    ##urllib2
    def urllib2_study(url):
    urldata=urllib2.urlopen(url)
     
    #-*-coding:utf-8-*-
    import urllib2
    import random
    url=""
    useragent=["Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.133 Safari/537.36",
    
    ]
    def get_content(url,useragent):
        """
        @
        """
        random_header=random.choice(useragent)
        req=urllib2.Request(url)
        req.add_header('User-Agent',random_header)
        req.add_header('Host',)
        req.add_header("Refer",)
        req.add_header('GET',)
        ret=urllib2.urlopen(req).read()
        return ret
    

      

    print(dir(urldata))##urldata.read.decode('gbk') urldata.getcode() urldata.info() urldata.geturl()
    cookie=cookielib.CookieJar()
    opener=urllib2.build_opener(urllib2.HTTPCookieProcessor(cookie))
    data=opener.open(url)##urllib2.urlopen其实也是调用opener.open
    print(data.read().decode('gbk'))
    def urllib2_post(url):
    values={'name':'howhy','age':32}
    data=urllib.urlencode(values)
    req=urllib2.Request(url,data,header)
    response=urllib2.urlopen(req)
    the_page=response.read()
    def handler():##一般handler
        
        htp=urllib2.HTTPHandler()
        opener=urllib2.build_opener(htp)
        req=urllib2.Request("http://www.baidu.com/")
        print(opener.open(req).read())
    
    def proxy():##代理handler
        hand=urllib2.ProxyHandler({"http":"219.141.153.41:80"})
        opener=urllib2.build_opener(hand)
        req=urllib2.Request("http://www.baidu.com/")
        print(opener.open(req).read())
    def authhandler():##基本认证handler
        htppwd=urllib2.HTTPPasswordMgrWithDefaultRealm()
        htppwd.add_password(None,"auth web","username","password")
        htp=urllib2.HTTPBasicAuthHandler(htppwd)
        opener=urllib2.build_opener(htp) ##此时可以加多个handler
    opener.addheaders=[("User-Agent","dsffsdfdsfd"),()] req
    =urllib2.Request("http://auth web") print(opener.open(req).read())
  • 相关阅读:
    智器SmartQ T7实体店试用体验
    BI笔记之SSAS库Process的几种方案
    PowerTip of the Day from powershell.com上周汇总(八)
    PowerTip of the Day2010071420100716 summary
    PowerTip of the Day from powershell.com上周汇总(十)
    PowerTip of the Day from powershell.com上周汇总(六)
    重新整理Cellset转Datatable
    自动加密web.config配置节批处理
    与DotNet数据对象结合的自定义数据对象设计 (二) 数据集合与DataTable
    在VS2003中以ClassLibrary工程的方式管理Web工程.
  • 原文地址:https://www.cnblogs.com/howhy/p/7027681.html
Copyright © 2011-2022 走看看