zoukankan      html  css  js  c++  java
  • requests模块的使用

    requests模块

    • 什么是request模块:requests是python原生一个基于网络请求的模块,模拟浏览器发起请求。

    requests-get请求

    # get请求
    import requests
    # 指定url
    url = 'https://www.sogou.com/'
    
    # 发起get请求:get方法会返回请求成功的响应对象
    response = requests.get(url)
    if response.status_code == 200:
        with open('sougo.html','w') as f:
            f.write(response.text)
    else:
        print('页面获取失败')
    

    response常用属性

    # get请求
    import requests
    # 指定url
    url = 'https://www.sogou.com/'
    
    # 发起get请求:get方法会返回请求成功的响应对象
    response = requests.get(url)
    if response.status_code == 200:
        # print(response.text) # 文本
        print(response.status_code) # 返回一个响应状态码
        print(response.content) # content获取的是response对象中二进制(byte)类型的页面数据
        print(response.headers) # 获取响应头信息
        print(response.url) # 获取请求的url
    else:
        print('页面获取失败')
    

    携带参数的get请求

    • 方式1
    import requests
    # 指定url,参数不需要进行编码处理
    url = 'https://www.sogou.com/web?query=周杰伦&ie=utf-8'
    
    # 发起get请求:get方法会返回请求成功的响应对象
    response = requests.get(url)
    if response.status_code == 200:
        with open('jay.html','wb') as f:
            f.write(response.content)
    else:
        print('页面获取失败')
    
    • 方式2
    import requests
    url = 'https://www.sogou.com/web'
    
    params = {
        'query':'周杰伦',
        'ie':'utf-8'
    }
    response = requests.get(url,params=params)
    if response.status_code == 200:
        with open('jay.html','wb') as f:
            f.write(response.content)
    else:
        print('页面获取失败')
    

    get请求自定义请求头信息

    # 自定义请求头信息
    import requests
    url = 'https://www.sogou.com/web'
    # 自定义的请求头信息放在该字典中,然后发请求的时候传到headers参数中
    headers = {
        'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36',
    }
    params = {
        'query':'林宥嘉',
        'ie':'utf-8'
    }
    response = requests.get(url=url,params=params,headers=headers)
    print(response.status_code)
    

    requests-post请求

    # post请求
    
    # 指定url
    url = 'https://github.com/session'
    data = {
        'commit': 'Sign in',
        'utf8': '✓',
        'authenticity_token': 'IRdX8jflo9hKJAZ9mOzQBNnVnOFD7z9MfKvSYCOvrVN4uWz/LDQ81b6wWWy4d8YrvYobfiuLYS92zoK6XgH/LQ==',
        'login': '1032298871@qq.com',
        'password': '09212427zlh'
    }
    headers = {
        'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36',
    }
    response = requests.post(url=url,data=data,headers=headers)
    with open('github.html','w',encoding='utf-8') as f:
        f.write(response.text)
    

    requests模块ajax的get请求

    # 基于ajax的get请求
    import requests
    url = 'https://movie.douban.com/j/new_search_subjects?'
    data = {
        'sort': 'U',
        'range':'0,10',
        'tags': '电影',
        'start': '40'
    }
    headers = {
        'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36',
    }
    response = requests.get(url=url,data=data,headers=headers)
    # ajax返回的数据类型是json字符串类型
    print(response.text)
    

    requests模块ajax的post请求

    # 基于ajax的post请求
    import requests
    import json
    url = 'https://fanyi.baidu.com/sug'
    data = {
        'kw': '西瓜'
    }
    headers = {
        'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36',
    }
    response = requests.post(url=url,headers=headers,data=data)
    json_text =response.text
    json_data = json.loads(json_text)
    print(json_data)
    

    爬取多页数据

    # 爬取带有分页的数据
    import requests
    import os
    
    if not os.path.exists('./page'):
        os.mkdir('page')
    
    url = 'https://zhihu.sogou.com/zhihu?'
    work= input('想搜索什么内容')
    page_number = input('想获取前几页的内容')
    for page in range(1,int(page_number)+1):
        print(page)
        params = {
            'query': work,
            'sut': '13598',
            'lkt': '1,1546144033954,1546144033954',
            'sst0': '1546144034930',
            'page': page,
            'ie': 'utf8'
        }
        response = requests.get(url=url,params=params)
        page_text = response.text
        page_file = './page/%s%s.html'%(work,page)
        with open(page_file,'w',encoding='utf-8') as f:
            f.write(page_text)
    

    requests模块高级:

    cookie作用:服务器端使用cookie来记录客户端的状态信息

    import requests
    
    session = requests.session()
    #1.发起登录请求:将cookie获取,切存储到session对象中
    login_url = 'https://accounts.douban.com/login'
    data = {
        "source": "None",
        "redir": "https://www.douban.com/people/185687620/",
        "form_email": "15027900535",
        "form_password": "bobo@15027900535",
        "login": "登录",
    }
    headers={
        'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36',
        }
    #使用session发起post请求
    login_response = session.post(url=login_url,data=data,headers=headers)
    
    #2.对个人主页发起请求(session(cookie)),获取响应页面数据
    url = 'https://www.douban.com/people/185687620/'
    response = session.get(url=url,headers=headers)
    page_text = response.text
    
    with open('./douban110.html','w',encoding='utf-8') as fp:
        fp.write(page_text)
    

    requests使用ip代理

    # 使用代理ip爬取百度搜索ip
    import requests
    
    url = 'http://www.baidu.com/s?ie=UTF-8&wd=ip'
    
    # 传入的代理ip是个字典,key是协议,value是ip:端口
    proxy = {
        'http':'115.28.209.249:3128'
    }
    response = requests.get(url=url,proxies=proxy)
    with open('daili.html','w') as f:
        f.write(response.text)
    
  • 相关阅读:
    字符串替换
    字符串查找
    字符串比较
    字节与字符串相互转换
    1365. How Many Numbers Are Smaller Than the Current Number
    1486. XOR Operation in an Array
    1431. Kids With the Greatest Number of Candies
    1470. Shuffle the Array
    1480. Running Sum of 1d Array
    【STM32H7教程】第56章 STM32H7的DMA2D应用之刷色块,位图和Alpha混合
  • 原文地址:https://www.cnblogs.com/wualin/p/10202916.html
Copyright © 2011-2022 走看看