zoukankan      html  css  js  c++  java
  • urllib urllib2

    #-*-coding:utf-8-*-
    import urllib
    import urllib2
    import cookielib
    ##urllib
    url="http://www.qq.com"
    header={"User-Agent":"Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.133 Safari/537.36"}

    def urllib_study(url):
    urldata=urllib.urlopen(url)
    print(urldata.read().decode('gbk'))##网页内容
    print(urldata.getcode())##http状态码
    print(urldata.geturl())## request url
    print(urldata.info()) ##header info urldata.info().getparam('charset')获取header 编码
    urllib.urlretrieve(url,'D:\pic\')##下载文件
    def urlretrieve_study(url,savepath):
    def callback(a,b,c):
    down_progess=100.0*a*b/c
    if down_progess>100:
    down_progess=100
    print '%.2f%%'%down_progess
    urllib.urlretrieve(url,savepath,callback)

    #urlretrieve_study(url,'D:\pics\qq.html')
    ##urllib2
    def urllib2_study(url):
    urldata=urllib2.urlopen(url)
     
    #-*-coding:utf-8-*-
    import urllib2
    import random
    url=""
    useragent=["Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.133 Safari/537.36",
    
    ]
    def get_content(url,useragent):
        """
        @
        """
        random_header=random.choice(useragent)
        req=urllib2.Request(url)
        req.add_header('User-Agent',random_header)
        req.add_header('Host',)
        req.add_header("Refer",)
        req.add_header('GET',)
        ret=urllib2.urlopen(req).read()
        return ret
    

      

    print(dir(urldata))##urldata.read.decode('gbk') urldata.getcode() urldata.info() urldata.geturl()
    cookie=cookielib.CookieJar()
    opener=urllib2.build_opener(urllib2.HTTPCookieProcessor(cookie))
    data=opener.open(url)##urllib2.urlopen其实也是调用opener.open
    print(data.read().decode('gbk'))
    def urllib2_post(url):
    values={'name':'howhy','age':32}
    data=urllib.urlencode(values)
    req=urllib2.Request(url,data,header)
    response=urllib2.urlopen(req)
    the_page=response.read()
    def handler():##一般handler
        
        htp=urllib2.HTTPHandler()
        opener=urllib2.build_opener(htp)
        req=urllib2.Request("http://www.baidu.com/")
        print(opener.open(req).read())
    
    def proxy():##代理handler
        hand=urllib2.ProxyHandler({"http":"219.141.153.41:80"})
        opener=urllib2.build_opener(hand)
        req=urllib2.Request("http://www.baidu.com/")
        print(opener.open(req).read())
    def authhandler():##基本认证handler
        htppwd=urllib2.HTTPPasswordMgrWithDefaultRealm()
        htppwd.add_password(None,"auth web","username","password")
        htp=urllib2.HTTPBasicAuthHandler(htppwd)
        opener=urllib2.build_opener(htp) ##此时可以加多个handler
    opener.addheaders=[("User-Agent","dsffsdfdsfd"),()] req
    =urllib2.Request("http://auth web") print(opener.open(req).read())
  • 相关阅读:
    win7 IIS配置及设置
    JS高效关键字搜索转
    CLR读书笔记第四章 类型基础
    SQL语句执行顺序
    JS常用方法转
    js 设置url参数转
    随机生成 字体大小转
    jquery获得select option的值 和对select option的操作转自(紫寒)
    前端开发者基本要求转
    2 Request对象的一些属性等
  • 原文地址:https://www.cnblogs.com/howhy/p/7027681.html
Copyright © 2011-2022 走看看