zoukankan      html  css  js  c++  java
  • 使用python中urllib.request.Request()来构建ua

    1.代码案例=构建http请求头

    #coding=utf-8
    import urllib.request
    import urllib.parse
    url = "http://www.baidu.com/"
    headers={
        'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36'
    };
    
    request  = urllib.request.Request(url=url,headers=headers);
    response = urllib.request.urlopen(request);
    print(response.read().decode());

     2.代码案例=使用post请求调用百度接口

    #使用post调用百度翻译接口
    #coding=utf-8
    import urllib.request
    import urllib.parse
    word="china";
    post_url = "https://fanyi.baidu.com/sug";
    form_data={
        'kw': word
    };
    headers={
        'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36'
    };
    request = urllib.request.Request(post_url,headers);
    form_data = urllib.parse.urlencode(form_data).encode();
    response = urllib.request.urlopen(request,form_data);
    print(response.read().decode());

     3.使用百度翻译接口案例

    #coding=utf-8
    import urllib.request
    import urllib.parse
    form_data={
        'kw': 'wolf',
    };
    post_url = "https://fanyi.baidu.com/sug";
    headers = {
        'Host': 'fanyi.baidu.com',
        'Connection': 'keep-alive',
        'Pragma': 'no-cache',
        'Cache-Control': 'no-cache',
        'Accept': 'application/json, text/javascript, */*; q=0.01',
        'Origin': 'https://fanyi.baidu.com',
        'X-Requested-With': 'XMLHttpRequest',
        'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36',
        'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
        'Referer': 'https://fanyi.baidu.com/translate?aldtype=16047&query=&keyfrom=baidu&smartresult=dict&lang=auto2zh',
        'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8,und;q=0.7',
        'Cookie': 'BAIDUID=78CF95260BCDB8D770F51009AABFDD42:FG=1; BIDUPSID=78CF95260BCDB8D770F51009AABFDD42; PSTM=1585656674; REALTIME_TRANS_SWITCH=1; FANYI_WORD_SWITCH=1; HISTORY_SWITCH=1; SOUND_SPD_SWITCH=1; SOUND_PREFER_SWITCH=1; BDORZ=B490B5EBF6F3CD402E515D22BCDA1598; H_PS_PSSID=31356_1447_21097_31425_31341_31464_30824_26350_31164_31472_22158; delPer=0; PSINO=3; Hm_lvt_64ecd82404c51e03dc91cb9e8c025574=1588584480,1588693180; Hm_lpvt_64ecd82404c51e03dc91cb9e8c025574=1588693180; __yjsv5_shitong=1.0_7_5362048e36179917959b4ef57e3e3a96d953_300_1588693181749_111.196.240.198_f74edd74; yjs_js_security_passport=607757242e95a02e997e4fc526bc95ce8671c676_1588693182_js',
    }
    request = urllib.request.Request(post_url,headers);
    form_data = urllib.parse.urlencode(form_data).encode();
    response = urllib.request.urlopen(post_url,form_data);
    print(response.read().decode());

     4.使用肯德基接口获取肯德基

    #coding=utf8
    import urllib.request
    import urllib.parse
    post_data={
        'cname':'', 
        'pid':'', 
        'keyword': '北京',
        'pageIndex': '1',
        'pageSize': '10',
    };
    headers={
        'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36'
    }
    url = "http://www.kfc.com.cn/kfccda/ashx/GetStoreList.ashx?op=keyword";
    post_data = urllib.parse.urlencode(post_data).encode();
    request = urllib.request.Request(url,headers);
    response = urllib.request.urlopen(request,post_data);
    print(response.read().decode());

     5.使用python爬取百度贴吧案例

    #coding=utf8
    #使用python爬取百度贴吧内容练习
    import urllib.request
    import urllib.parse
    import os
    import time
    url = "https://tieba.baidu.com/f?";
    headers={
        'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36'
    };
    flod="./baiduba";
    if not os.path.exists(flod):
        os.mkdir(flod);
    for page in range(1,4):
        pn = (page-1) * 50;
        data={
            "kw" : "python",
            "ie" : "utf-8",
            "pn" : pn,
        };
        print(""+str(page)+ "开始下载...");
        query_string = urllib.parse.urlencode(data);
        url_info = url+query_string;
        request = urllib.request.Request(url=url,headers=headers);
        response = urllib.request.urlopen(request);
        filename = "python_"+str(page)+".html";
        filepath = flod+"/"+filename;
        with open(filepath,"wb") as fp:
            fp.write(response.read());
        print(""+str(page)+ "结束下载...");
        time.sleep(3);
  • 相关阅读:
    Python基础09 面向对象的进一步拓展
    Python快速教程 (手册)
    Python基础03 序列
    Python基础04 运算
    Python基础08 面向对象的基本概念
    Python基础07 函数
    Python基础10 反过头来看看
    Python基础05 缩进和选择
    Python进阶02 文本文件的输入输出
    Python进阶01 词典
  • 原文地址:https://www.cnblogs.com/zh718594493/p/12826945.html
Copyright © 2011-2022 走看看