zoukankan      html  css  js  c++  java
  • python2.x urllib2和urllib的使用

    1.最简单用法

      urlopen(url, data=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT,...)

     1 import urllib2
     2 import urllib
     3 
     4 
     5 response = urllib2.urlopen("http://www.baidu.com")
     6 
     7 print 'getcode():',response.getcode()
     8 print 'geturl():',response.geturl()
     9 print 'url:',response.url
    10 print 'headers:
    ',response.headers
    11 print 'msg:',response.msg
    12 
    13 #-------------------------------------out--------------------------------------
    14 getcode(): 200
    15 geturl(): http://www.baidu.com
    16 url: http://www.baidu.com
    17 headers:
    18 Date: Thu, 29 Dec 2016 06:28:36 GMT
    19 Content-Type: text/html; charset=utf-8
    20 Transfer-Encoding: chunked
    21 Connection: Close
    22 Vary: Accept-Encoding
    23 Set-Cookie: BAIDUID=9A1E663B4C3AB33D11266F0D865A1F59:FG=1; expires=Thu, 31-Dec-37 23:55:55 GMT; max-age=2147483647; path=/; domain=.baidu.com
    24 Set-Cookie: BIDUPSID=9A1E663B4C3AB33D11266F0D865A1F59; expires=Thu, 31-Dec-37 23:55:55 GMT; max-age=2147483647; path=/; domain=.baidu.com
    25 Set-Cookie: PSTM=1482992916; expires=Thu, 31-Dec-37 23:55:55 GMT; max-age=2147483647; path=/; domain=.baidu.com
    26 Set-Cookie: BDSVRTM=0; path=/
    27 Set-Cookie: BD_HOME=0; path=/
    28 Set-Cookie: H_PS_PSSID=21858_1464_21112_17001_21553_20930; path=/; domain=.baidu.com
    29 P3P: CP=" OTI DSP COR IVA OUR IND COM "
    30 Cache-Control: private
    31 Cxy_all: baidu+0ba0b09e0fa305471b5e3b42c352570f
    32 Expires: Thu, 29 Dec 2016 06:27:54 GMT
    33 X-Powered-By: HPHP
    34 Server: BWS/1.1
    35 X-UA-Compatible: IE=Edge,chrome=1
    36 BDPAGETYPE: 1
    37 BDQID: 0x889c1bcd00004be7
    38 BDUSERID: 0
    39 
    40 msg: OK
    View Code

     获取html内容

    1 print response.read()     #以str字符串形式返回整个页面
    2 print response.readline() #每执行一次返回一行
    3 print response.readlines() #以列表形式返回
    View Code

    2.  构造Request 设置headers

     1 def set_headers():
     2     #构造Request,设置headers
     3     #__init__(self, url, data=None, headers={},origin_req_host=None, unverifiable=False)
     4     import urllib2
     5     headers = {'User-Agent':'liubi-Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.116 Safari/537.36'}
     6     request = urllib2.Request("http://localhost:5000/urllib2testget",headers=headers)
     7 
     8     response = urllib2.urlopen(request)
     9     print request.headers
    10     #追加一个header
    11     request.add_header("addheader","nice")
    12     response = urllib2.urlopen(request)
    13     print request.headers
    14 
    15 set_headers()
    16 
    17 #--------------------------------输出:
    18 
    19 {'User-agent': 'liubi-Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.116 Safari/537.36'}
    20 {"a": "1", "": "2"}
    21 ------------------------------------------------
    22 {'Addheader': 'nice', 'User-agent': 'liubi-Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.116 Safari/537.36'}
    23 {"a": "1", "": "2"}
    View Code

    3.发送get请求,发送post请求

     1 def get_post():
     2     #get方式
     3     import urllib2
     4     import urllib
     5     headers = {'User-Agent':'liu bi'}
     6     values = {"username":"diaosir_get","password":"diao123_get"}
     7     data = urllib.urlencode(values)
     8     print '---------------------get:'
     9     url = "http://localhost:5000/urllib2testget"
    10     get_url=url+"?"+data
    11     request = urllib2.Request(get_url,headers=headers)
    12     response = urllib2.urlopen(request)
    13     print json.loads(response.read())
    14     print '---------------------post:'
    15     url = "http://localhost:5000/urllib2testpost"
    16     request = urllib2.Request(url,data,headers=headers)
    17     response = urllib2.urlopen(request)
    18     print json.loads(response.read())
    19 
    20 get_post()
    21 
    22 #---------------------------------------------------------输出:
    23 ---------------------get:
    24 {u'username': u'diaosir_get', u'password': u'diao123_get'}
    25 ---------------------post:
    26 {u'username': u'diaosir_get', u'password': u'diao123_get'}
    post&get

    4.代理模式设置

    def set_proxies():
        #1.proxy_handler
        #2.创建operner
        #3.安装opener[非必须]
        #4.拿operner去请求url
        enable_proxy = True
        proxy_handler = urllib2.ProxyHandler({"http":'http://120.24.73.165:3128'})
        null_proxy_handler = urllib2.ProxyHandler({})
        if enable_proxy:
            opener = urllib2.build_opener(proxy_handler)#挂载opener
        else:
            opener = urllib2.build_opener(null_proxy_handler)
        request = urllib2.Request('http://www.baidu.com')
        print '---------------------不使用代理'
        response = urllib2.urlopen(request)
        print response.getcode(),request.host
        print '---------------------使用代理'
        response = opener.open(request)
        print response.getcode(),request.host
    
    #----------------------------------------------------------输出
    ---------------------不使用代理
    200 www.baidu.com
    ---------------------使用代理
    200 120.24.73.165:3128
    View Code

    5.debug模式, 代码中urllib2.build_opener中的httpsHandler需要去掉,

     1 def debug_set():
     2     #代理,调试
     3     import  urllib2,urllib
     4     proxy_handler = urllib2.ProxyHandler({"http":'http://192.168.1.108:89'})
     5 
     6     #debuglog的使用
     7     httpHandler = urllib2.HTTPHandler(debuglevel=1)
     8     opener = urllib2.build_opener(httpHandler, httpsHandler,)
     9     urllib2.install_opener(opener) 
    10     request = urllib2.Request('http://127.0.0.1:5000/urllib2testget?a=2&b=3',headers={'User-Agent':'liubi00'})
    11     response = opener.open(request)
    12     print response.getcode(),response.read()
    13 
    14 
    15 
    16 
    17 #-------------------------------------------输出:
    18 send: 'GET /urllib2testget?a=2&b=3 HTTP/1.1
    Accept-Encoding: identity
    Host: 127.0.0.1:5000
    Connection: close
    User-Agent: liubi00
    
    '
    19 reply: 'HTTP/1.0 200 OK
    '
    20 header: Content-Type: text/html; charset=utf-8
    21 header: Content-Length: 20
    22 header: Server: Werkzeug/0.11.11 Python/2.7.12
    23 header: Date: Fri, 30 Dec 2016 15:12:40 GMT
    24 200 {"a": "2", "b": "3"}
    View Code

    6.获取cookie存到cookie.txt

    import cookielib
    import  urllib2
    
    def get_cookie():
        filename = 'cookie.txt'
        #声明一个MozillaCookieJar对象实例来保存cookie,之后写入文件
        cookie = cookielib.MozillaCookieJar(filename)
        #利用urllib2库的HTTPCookieProcessor对象来创建cookie处理器
        handler = urllib2.HTTPCookieProcessor(cookie)
        #通过handler来构建opener
        opener = urllib2.build_opener(handler,)
        request = urllib2.Request('http://www.baidu.com')
        request.add_header('User-Agent','fuckyou')
        response = opener.open(request)
        #保存cookie到文件
        cookie.save(ignore_discard=True, ignore_expires=True)
        print response.getcode()
    
    get_cookie()
    
    #----------------------------------------------输出:
    200
    View Code

    7.通过cookie请求,更多查看http://www.cnblogs.com/sysu-blackbear/p/3629770.html

     1 import cookielib
     2 import urllib2
     3 def use_cookie():
     4     #cookie--从cookies.txt读取cookies,携带cookies请求
     5     cookie_file = 'cookie.txt'
     6     #创建MozillaCookieJar实例对象
     7     cookie = cookielib.MozillaCookieJar(cookie_file)
     8     #从文件中读取cookie内容到变量
     9     cookie.load( ignore_discard=True, ignore_expires=True)
    10     #创建请求的request
    11     req = urllib2.Request("http://www.baidu.com")
    12     #利用urllib2的build_opener方法创建一个opener
    13     opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cookie))
    14     response = opener.open(req)
    15     print response.read()
    View Code

    8.异常处理

     1 def deal_errors():
     2     #异常处理
     3     import urllib2
     4     #HTTPError
     5     req = urllib2.Request('http://blog.csdn.net/cqcre')
     6     try:
     7         urllib2.urlopen(req)
     8     except urllib2.HTTPError, e:
     9         print e.code
    10         print e.reason
    11 
    12     #URLError
    13     requset = urllib2.Request('http://www.xxxxx.com')
    14     try:
    15         urllib2.urlopen(requset)
    16     except urllib2.URLError, e:
    17         print e.reason
    18 
    19     #HTTPERROR&URLERROR
    20     req = urllib2.Request('http://blog.csdn.net/cqcre')
    21     try:
    22         urllib2.urlopen(req)
    23     except urllib2.URLError, e:
    24         if hasattr(e,"code"):
    25             print e.code
    26         if hasattr(e,"reason"):
    27             print e.reason
    28     else:
    29         print "OK"
    View Code
  • 相关阅读:
    grep命令详解
    Git命令详解(一)-个人使用
    android intent和intent action大全
    android 监控EditText的变化
    第86章、系统服务之TELEPHONY_SERVICE(从零开始学Android)
    android中getSystemService详解
    关于android各种双卡手机获取imei,imsi的处置(mtk,展讯,高通等)
    Android 获取运营商信息(完整版)-解决高通,MTK等双卡问题
    Android 移动缩放的ImageView
    Android 读写SD卡的文件
  • 原文地址:https://www.cnblogs.com/diaosir/p/6233240.html
Copyright © 2011-2022 走看看