zoukankan      html  css  js  c++  java
  • <爬虫> requests模块

    一、get请求

    import requests
    
    url = 'http://www.baidu.com/'
    
    headers1 = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.3; Win64; x64) '
                             'AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36'}
    
    '''发送get请求'''
    r = requests.get(url,headers=headers1)
    print(r)      #响应对象
    r.encoding = 'utf8'
    print(r.text)   #字符串形式查看响应
    
    '''带参数的get:传原始参数即可,自动转码,自动拼接url'''
    url = 'https://www.baidu.com/s?'
    data = {'ie':'utf8',
            'wd':'美国'}
    
    r = requests.get(url,headers=headers1,params=data)
    with open('baidu.html','wb') as fp:
        fp.write(r.content)
    
    '''r.text           字符串形式查看响应
       r.content        字节类型查看响应
       r.encoding       查看或者设置编码
       r.status_code    查看状态码
       r.headers        查看响应头
       r.url            查看请求的url'''

    二、post请求

    import requests
    
    '''必应翻译'''
    
    url = 'https://cn.bing.com/tlookupv3?isVertical=1&&IG=15AA57077E2A43C1A35CBA989989D08D&IID=translator.5038.46/'
    
    headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.3; Win64; x64) '
                             'AppleWebKit/537.36 (KHTML, like Gecko) '
                             'Chrome/73.0.3683.86 Safari/537.36'}
    
    formdata = {'from': 'en',
                'to': 'zh-Hans',
                'text': 'lion'}
    
    '''发送post请求'''
    r =  requests.post(url=url,headers=headers,data=formdata)
    print(r)        #响应对象
    print(r.json())   #查看json格式数据
    
    
    # with open('baidu.html','wb') as fp:
    #     fp.write(r.content)

    三、使用代理

    import requests
    
    #查询ip地址的网页
    url = 'https://www.baidu.com/s?ie=utf-8&f=8&rsv_bp=1&rsv_idx=2&tn=baiduhome_pg&wd=IP'
    
    proxy = {'http':'http://119.23.248.167:8080'}
    
    headers = {'user-agent':'Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko)'
                            ' Chrome/75.0.3770.100 Safari/537.36'}
    
    r =  requests.get(url,headers=headers,proxies=proxy)
    
    with open('daili.html','wb') as fp:
        fp.write(r.content)

    四、cookie登录

    import requests
    
    '''登录人人网:抓包获取url接口和formdata,用post请求发送'''
    
    '''如果碰到会话相关的问题,要先创建一个会话'''
    s = requests.Session()
    '''往下所有请求都通过s.get()进行'''
    
    url = 'http://www.renren.com/ajaxLogin/login?1=1&uniqueTimestamp=2019641626542'
    
    headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.3; Win64; x64) '
                             'AppleWebKit/537.36 (KHTML, like Gecko) '
                             'Chrome/73.0.3683.86 Safari/537.36'}
    
    formdata = {'email':'18404904721',
                'icode':'',
                'origURL':'http://www.renren.com/home',
                'domain':'renren.com',
                'key_id':1,
                'captcha_type':'web_login',
                'password':    '62a9c3375228ff329d57dc88ed0a3bc3fda0e3970e4f0ddb00562f7c8cc76316',
                'rkey':    '00b732e9c4b8d408b74655e15dd43a81',
                'f':'https%3A%2F%2Fwww.baidu.com%2Flink%3Furl%3D43V5wyB58bk-wUHTFWFH94lsWfrtUQQh0HJ1zcm7kFQBvWjnNQDwwtVUR1o9aeRV%26wd%3D%26eqid%3Dcfb26d2d000cf393000000035d1db824',
                'cookie':'anonymid=jxczgs3yw3oby9; ln_uact=18404904721; ln_hurl=http://head.xiaonei.com/photos/0/0/men_main.gif; jebe_key=2819f31f-79cc-428e-b61e-8b968e2beda4%7C920b82268747e02c45f3056eeda651c7%7C1561538325515%7C1%7C1561538325729; _r01_=1; depovince=ZGQT; JSESSIONID=abcGpDO8A0q87bIF9f8Uw; ick_login=7f3c3b5e-d7e8-4100-9231-80eef18b096d; first_login_flag=1; jebecookies=c36453f0-2b07-4452-bbc5-232414d8630a|||||'}
    
    '''发送登录请求'''
    r = s.post(url=url,headers=headers,data=formdata)
    print(r.text)
    
    '''再次发送请求:登录成功后,s自带cookie'''
    get_url = 'http://www.renren.com/971302264/profile'
    r2 = s.get(url=get_url,headers=headers)
    with open('ren.html','wb') as fp:
        fp.write(r2.content)
    
    '''注:登录人人网时第一次不需要验证码,该段代码可用,
           如果密码输错再次登录,则需要验证码,
           formdata中的icode就是验证码,动态生成。
           在不需要验证码的登录中,该流程是可借鉴的'''
  • 相关阅读:
    (转)浅析epoll-为何多路复用I/O要使用epoll
    (转)C++对象的内存布局
    (转)C++ 虚函数表解析
    VS2008文件编码格式修改
    ubuntu与windows相关配置内容
    (转)windows宿主机,ubuntu虚拟机下的上网设置(有线网络和无线网络)
    第10章 名字控制
    php 代码重用
    php 变量
    php in_array 和 str_replace
  • 原文地址:https://www.cnblogs.com/Finance-IT-gao/p/11136961.html
Copyright © 2011-2022 走看看