zoukankan      html  css  js  c++  java
  • request的基本使用用法

    urllib.request基本使用用法

    (1)基本用法

        url = "http://www.baidu.com/"
        # 获取response对象
        response = urllib.request.urlopen(url)
        # 读取内容 bytes类型
        data = response.read()
        # 转化为字符串
        str_data = data.decode("utf-8")
        # 字符串转化为bytes
        str_name = "baidu"
        bytes_name =str_name.encode("utf-8")
    

    (2)url参数中含有汉字的需要转译

        url = "http://www.baidu.com/s?wd="
        name = "python中含有汉字"
        final_url = url + name
        #网址里面包含了汉字;ascii是没有汉字的;url转译
        #将包含汉字的网址进行转译
        encode_new_url = urllib.parse.quote(final_url,safe=string.printable)
        # 使用代码发送网络请求
        response = urllib.request.urlopen(encode_new_url)
        print(response)
        #读取内容
        data = response.read().decode()
        #保存到本地
        with open("02-encode.html","w",encoding="utf-8")as f:
            f.write(data)
    

    (3)传入字典类型的参数

        url = "http://www.baidu.com/s?"
    
        params = {
            "wd":"中文",
            "key":"zhang",
            "value":"san"
        }
        
        # 字典类型转译成参数
        str_params = urllib.parse.urlencode(params)
        final_url = url + str_params
    
        # 将带有中文的url 转译成计算机可以识别的url
        end_url = urllib.parse.quote(final_url,safe=string.printable)
    
        response = urllib.request.urlopen(end_url)
    
        data = response.read().decode("utf-8")
        print(data)
    

    (4)添加header

    第一种添加header的方式

        url = "https://www.baidu.com"
        # 添加请求头的信息
        headers = {
            # 浏览器的版本
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36',
            'name':'chen'
        }
        # 创建请求对象
        request = urllib.request.Request(url, headers=headers)
    

    第二种添加header的方式:动态添加

        url = "https://www.baidu.com"
        # 创建请求对象
        request = urllib.request.Request(url)
        # 动态添加请求头信息
        request.add_header("User-Agent",
                           "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36")
        # 请求网络数据
        response = urllib.request.urlopen(request)
        data = response.read().decode("utf-8")
        # 获取到完整的url
        final_url = request.get_full_url()
        print(final_url)
        with open("baidu.html", "w", encoding="utf-8") as f:
            f.write(data)
            
        # 获取请求头的信息
        request_headers = request.headers
        print(request_headers)
        user_agent = request.get_header('User-agent')
        print(user_agent)
        # 响应头
        print(response.headers)
    
    

    (5)使用代理

        url = 'https://www.cnblogs.com/chenshy'
        # 添加代理
        proxy = {
            'http': '119.102.25.91:9999'
        }
        # 代理处理器
        proxy_handler = urllib.request.ProxyHandler(proxy)
        # 创建自己的opener
        opener = urllib.request.build_opener(proxy_handler)
        # 拿着代理ip发送请求
        data = opener.open(url).read().decode("utf-8")
        print(data)
    

    (6) cookie

    a.在头部添加cookie

        url = 'https://www.yaozh.com/member/'
        headers = {'User_Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36',
                   'Cookie':'acw_tc=707c9fd115550786016938465e492bb70702a65dacd78c0969a841171ddf8d; PHPSESSID=t4rb1af4vmks8gete5oqfd6ub7; _ga=GA1.2.521923122.1555078606; _gid=GA1.2.523976398.1555078606; Hm_lvt_65968db3ac154c3089d7f9a4cbb98c94=1555078606; MEIQIA_VISIT_ID=1JlnZOkmbJhJwfRjhyv0gTMf14i; MEIQIA_EXTRA_TRACK_ID=1JlnZL47AayFGs373mZAapsuPKv; yaozh_logintime=1555078687; yaozh_user=729821%09lifelover; yaozh_userId=729821; _gat=1; Hm_lpvt_65968db3ac154c3089d7f9a4cbb98c94=1555078691; yaozh_uidhas=1; yaozh_mylogin=1555078693; acw_tc=707c9fd115550786016938465e492bb70702a65dacd78c0969a841171ddf8d; MEIQIA_VISIT_ID=1JlnZOkmbJhJwfRjhyv0gTMf14i; MEIQIA_EXTRA_TRACK_ID=1JlnZL47AayFGs373mZAapsuPKv'}
        request = urllib.request.Request(url,headers=headers)
        response = urllib.request.urlopen(request)
        data = response.read().decode('utf-8')
        print(data)
    

    b.登录之后获取cookie ,cookiejar的使用

    import urllib.request
    from http import cookiejar
    from urllib import parse
    
    def login():
        # 1.代码登录 获取cookie 带着cookies请求个人中心
        url = 'https://www.yaozh.com/login'
        # 登录的参数
        login_form_data = {
            'username':  'lifelover',
            'pwd': 'chen19960319',
            'formhash': 'F456373F7B',
            'backurl': 'https%3A%2F%2Fwww.yaozh.com%2F'
        }
        # 发送登录请求
        cook_jar =  cookiejar.CookieJar()
        cookie_handler = urllib.request.HTTPCookieProcessor(cook_jar)
        opener = urllib.request.build_opener(cookie_handler)
    
        # 带着参数,发送post请求
        headers = {'User_Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36'}
        # 1.需要转译,转码  2.需要byte类型
        login_str = urllib.parse.urlencode(login_form_data).encode('utf-8')
        request = urllib.request.Request(url,headers=headers,data=login_str)
        # 如果登录成功,cookiejar自动保存cookie
        response = opener.open(request)
    
        # 带着cookie请求个人中心
        center = 'https://www.yaozh.com/member/'
        center_request = urllib.request.Request(center,headers=headers)
        response = opener.open(center_request)
        data = response.read().decode('utf-8')
        with open('test.html','w',encoding='utf-8') as f:
            f.write(data)
    
  • 相关阅读:
    android开发学习4
    macOS login: Could not determine audit condition 问题解决
    java代码编程规范
    学习进度报告(二)
    学习进度报告(一)
    android开发笔记
    数组
    软件工程第二周开课博客
    第一周学习总结
    用户体验评价
  • 原文地址:https://www.cnblogs.com/chenshy/p/10704185.html
Copyright © 2011-2022 走看看