zoukankan      html  css  js  c++  java
  • requests模块

    一.Requests模块:

    安装:pip3 install requests

    1.Requests:非常强大。

    (1)指定url

    (2)发请求

    (3)数据解析

    (4)持久化存储

    2.Get

    3.Post

    4.Ajaxget

    5.Ajaxpost

    6.综合练习:百度贴吧

    # 基本使用和响应对象的常见属性
    
    import requests
    
    url='https://www.taobao.com'
    
    response = requests.get(url=url)
    
    # print(response.text)  #字符串
    # print(response.content) #二进制
    # print(response.json()) #数据中的json格式的数据
    # print(response.encoding) #服务端的编码格式
    # print(response.status_code) #服务端的响应状态码
    # print(response.headers) #服务端的响应头信息
    first.py
    # 写入文件
    
    import requests
    
    url='https://www.taobao.com'
    
    response = requests.get(url)
    
    data = response.text
    
    with open('./taobao.html','w',encoding='utf-8') as f:
        f.write(data)
        print('over')
    IO.py
    # get + 参数
    
    import requests
    
    url = 'http://www.baidu.com/s' #注意https 会闪
    
    wd = input('enter a word:')
    param = {
        'ie':'utf-8',
        'wd':wd
    }
    
    response = requests.get(url=url,params=param)
    data = response.text
    
    filename = wd+'.html'
    
    with open(filename,'w',encoding='utf-8') as f: # 注意编码,不然会乱码
        f.write(data)
        print('over')
    get.py
    # 百度翻译
    # post + 参数
    import requests
    #如下两行代码表示的是忽略证书(SSLError)
    import ssl
    ssl._create_default_https_context = ssl._create_unverified_context
    
    url = 'https://fanyi.baidu.com/sug'
    
    data = {
        'kw':'dog'
    }
    
    header = {
        'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36'
    }
    
    response = requests.post(url=url,data=data)
    print(response.text)  #返回json结果  在线解析
    post.py
    import requests
    
    url = 'https://movie.douban.com/j/chart/top_list'
    
    param= {
        'type':'24',
        'interval_id':'100:90',
        'action':'',
        'start':'20',
        'limit':'20'
    }
    
    header = {
        'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36'
    }
    
    response = requests.get(url=url,params=param,headers=header)
    
    print(response.url)
    print(response.text)
    ajax-get.py
    import requests
    
    url = 'http://www.kfc.com.cn/kfccda/ashx/GetStoreList.ashx?op=keyword'
    
    header = {
        'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36'
    }
    
    address = input('enter a address:')
    start_page = int(input('enter a start pagenum: '))
    end_page = int(input('enter a end pagenum:'))
    
    for page in range(start_page,end_page+1):
        data = {
          'cname':'',
          'pid':'',
          'keyword':address,
          'pageIndex':str(page),
          'pageSize':'10'
        }
        response = requests.post(url=url,headers=header,data=data)
        print(response.text)
    ajax-post.py
    import requests
    
    name = input('enter name:')
    start_page = int(input('enter start page:'))
    end_page = int(input('enter end page:'))
    
    url = 'http://tieba.baidu.com/f'
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36'
    }
    
    for page in range(start_page, end_page + 1):
        print('开始爬取第%s页的数据' % page)
        param = {
            'kw': name,
            'ie': 'utf-8',
            'pn': (page - 1) * 50
        }
    
        # 不能用header,否则页面加载有问题,不知道原因
        # response = requests.get(url=url,params=param,headers=headers)
        response = requests.get(url=url, params=param)
    
        # 持久化操作
        filename = name + '_' + str(page) + '.html'
        with open(filename, 'w', encoding='utf-8') as f:
            f.write(response.text)
            print('结束爬取第%s页的数据' % page)
    百度贴吧练习
    #实现人人网的登录操作
    import requests
    #input('enter a code:')
    #获取session对象,通过session发起的请求,该请求中会自动携带cookie
    session=requests.session()
    #指定url 抓包获取url
    url = 'http://www.renren.com/ajaxLogin/login?1=1&uniqueTimestamp=201883913543'
    
    data = {
            'email': '17701256561',
            'icode': '',
            'origURL': 'http://www.renren.com/home',
            'domain': 'renren.com',
            'key_id': '1',
            'captcha_type': 'web_login',
            'password': '7b456e6c3eb6615b2e122a2942ef3845da1f91e3de075179079a3b84952508e4',
            'rkey': '44fd96c219c593f3c9612360c80310a3',
            'f': 'https%3A%2F%2Fwww.baidu.com%2Flink%3Furl%3Dm7m_NSUp5Ri_ZrK5eNIpn_dMs48UAcvT-N_kmysWgYW%26wd%3D%26eqid%3Dba95daf5000065ce000000035b120219',   
            
    }
    
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36'
    }
    #第一次通过session发起了请求,该请求中一定会携带cookie
    response = session.post(url=url,headers=headers,data=data)
    
    #再次发起请求,访问二级子页面
    url_ = 'http://www.renren.com/289676607/profile'
    response_ = session.get(url=url_,headers=headers)
    
    with open('./second.html','w',encoding='utf-8') as fp:
        fp.write(response_.text)
    cookie.py
    #代码设置代理
    import requests
    import random
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36'
    }
    url = 'http://www.baidu.com/s'
    param = {
        'ie':'utf-8',
        'wd':'ip'
    }
    
    
    # 代理ip通过 ProxyThorn获取
    proxy1 = {
        "http": "112.115.57.20:3128"
    }
    proxy2 = {
        'http': '121.41.171.223:3128'
    }
    proxy3 = {
        'http': '121.41.171.223:3128'
    }
    proxys = [proxy1,proxy2,proxy3]
    
    proxy = random.choice(proxys)
    
    
    response = requests.get(url=url,headers=headers,params=param,proxies=proxy)
    
    print(response.text)
    
    #还原代理IP成自己本机IP
    requests.get(url,proxies={'http':''})
    代理
  • 相关阅读:
    15.mysql数据库操作与Paramiko模块
    14.Gevent协程 SelectPollEpoll异步IO与事件驱动 Python连接Mysql数据库操作 RabbitMQ队列 RedisMemcached缓存 Paramiko SSH Twsited网络框架
    13 线程threading模块 join 线程锁之LockRlock信号量 将线程变为守护进程 Event事件  queue队列 生产者消费者模型 Queue队列 开发一个线程池 进程 语法 进程间通讯 进程池  多进程multiprocessing
    12.异常 isinstance 反射
    2.semantic-ui的官网文档说明
    11.Socket网络编程
    10.面向对象高级语法部分 经典类vs新式类   静态方法、类方法、属性方法 类的特殊方法 反射 异常处理 Socket开发基础
    9.对象 面向对象的特性:封装、继承、多态 类、方法、 #数据描述
    8.模块介绍 time &datetime模块 random os sys shutil json & picle shelve xml处理 yaml处理 configparser hashlib subprocess logging模块 re正则表达式
    1.sematic ui 安装
  • 原文地址:https://www.cnblogs.com/xujinjin18/p/9704761.html
Copyright © 2011-2022 走看看