zoukankan      html  css  js  c++  java
  • urllib

    urllib

    from urllib.request import Request, urlopen
    
    url = "http://www.baidu.com"
    header = {
        'user-agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36'
    }
    
    request = Request(url, headers=header)
    
    response = urlopen(request)
    
    info = response.read().decode()
    
    print(info)
    

    response方法

    #响应状态码
    response.getcode()
    
    #返回数据的实际url,防止重定向
    response.geturl()
    
    #响应报头
    response.info()
    

    request方法

    #获取请求头
    request.get_header('User-Agent')
    

    动态UA

    from fake_useragent import UserAgent
    
    ua = UserAgent()
    print(ua.ie)   #随机打印ie浏览器任意版本
    print(ua.firefox) #随机打印firefox浏览器任意版本
    print(ua.chrome)  #随机打印chrome浏览器任意版本
    print(ua.random)  #随机打印任意厂家的浏览器
    

    post参数

    from urllib.parse import urlencode
    f_data = {
    	pa = 123
    }
    
    f_data = urlencode(f_data)
    request = Request(url, headers=header, data=data.encode())
    

    https忽略CA认证的证书(不是CA认证的而是个人的)

    import ssl
    context = ssl._create_unverified_context()
    
    response = urlopen(request,context=context)
    

    代理

    from urllib.request import Request, urlopen, build_opener, ProxyHandler
    
    url = "http://www.baidu.com"
    header = {
        'user-agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36'
    }
    
    request = Request(url, headers=header)
    handler = ProxyHandler({'http': 'zx:123@127.0.0.1:80'})
    opener = build_opener(handler)
    response = opener.open(request)
    
    info = response.read().decode()
    
    print(info)
    
    from urllib.request import Request, urlopen
    from fake_useragent import UserAgent
    from urllib.parse import urlencode
    from urllib.request import HTTPCookieProcessor,build_opener
    # 登录
    login_url = "http://www.sxt.cn/index/login/login"
    headers = {
        "User-Agent": UserAgent().chrome,
    }
    form_data = {
        "user": "17703181473",
        "password": "123456"
    }
    f_data = urlencode(form_data).encode()
    request = Request(login_url, headers=headers, data=f_data)
    
    #保存cookies,用于下次访问
    handler = HTTPCookieProcessor()
    opener = build_opener(handler)
    response = opener.open(request)
    
    # 访问页面
    info_url = "http://www.sxt.cn/index/user.html"
    request = Request(info_url, headers=headers)
    
    response = opener.open(request)
    print(response.read().decode())
    

    cookiejar

    from urllib.request import Request, build_opener, HTTPCookieProcessor
    from fake_useragent import UserAgent
    from http.cookiejar import MozillaCookieJar
    from urllib.parse import urlencode
    
    
    # 登录
    # 保存cookie到文件中
    def get_cookie():
        login_url = "http://www.sxt.cn/index/login/login"
        headers = {
            "User-Agent": UserAgent().chrome
        }
        form_data = {
            "user": "17703181473",
            "password": "123456"
        }
        f_data = urlencode(form_data).encode()
        request = Request(login_url, headers=headers, data=f_data)
        
        cookie_jar = MozillaCookieJar()
        handler = HTTPCookieProcessor(cookie_jar)
        opener = build_opener(handler)
        response = opener.open(request)
        #保存cookies到本地文件,过期依然保存
        cookie_jar.save("cookie.txt", ignore_expires=True, ignore_discard=True)
    
    
    def use_cookie():
        info_url = "http://www.sxt.cn/index/user.html"
        headers = {
            "User-Agent": UserAgent().chrome
        }
        request = Request(info_url, headers=headers)
        cookie_jar = MozillaCookieJar()
        #取本地cookies
        cookie_jar.load("cookie.txt", ignore_discard=True, ignore_expires=True)
        handler = HTTPCookieProcessor(cookie_jar)
        opener = build_opener(handler)
        response = opener.open(request)
        print(response.read().decode())
    
    
    # 获取cookie从文件中
    # 访问页面
    if __name__ == '__main__':
        # get_cookie()
        use_cookie()
    

    URLError

    from urllib.request import Request, urlopen
    from fake_useragent import UserAgent
    from urllib.error import URLError
    
    url = "http://www.sx123t.cn/index/login/login123"
    
    headers = {
        "User-Agent": UserAgent().chrome
    }
    try:
        req = Request(url, headers=headers)
        resp = urlopen(req)
        print(resp.read().decode())s
    except URLError as e:
    	
        if e.args == ():
            print(e.code)
        else:
            print(e.args[0].errno)
    print("访问完成")
    
    
  • 相关阅读:
    安卓开发_浅谈TimePicker(时间选择器)
    eclipse显示代码行数
    Java数据解析---JSON
    Java数据解析---PULL
    Java数据解析---SAX
    统计机器学习(目录)
    FP Tree算法原理总结
    梯度下降(Gradient Descent)小结
    用scikit-learn和pandas学习线性回归
    用scikit-learn学习BIRCH聚类
  • 原文地址:https://www.cnblogs.com/zx125/p/12865278.html
Copyright © 2011-2022 走看看