zoukankan      html  css  js  c++  java
  • 爬虫学习(四)——post请求爬取

    百度翻译爬取数据

    import urllib.request
    import urllib.parse
    post_url = "https://fanyi.baidu.com/sug"
    headers = {"User-Agent":"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36"}
    word= input( "请输入要翻译的内容:" )
    data = {"kw":word}


    # 对表单数据进行处理时,先转换成为字符串,在转换成为字节
    # 只转化成为字符串形式
    data1 = urllib.parse.urlencode(data)
    print(data1)
    # 输出结果
    # word=%E5%AE%9D%E8%B4%9D 字符串格式


    # 转换成为字节格式
    data = urllib.parse.urlencode(data).encode("utf8")
    print(data)
    # 显示结果:
    # b'word=%E5%AE%9D%E8%B4%9D' 字节格式


    request = urllib.request.Request(post_url,headers=headers)
    response = urllib.request.urlopen(request,data = data)
    print(response.read().decode("utf8"))

    百度翻译爬取数据

    import urllib.request
    import urllib.parse

    apiurl = "https://fanyi.baidu.com/v2transapi"

    在爬取目标网站时,先找准目标网站的接口,和需要传递的数据
    data = {
    'from': 'en',
    'to': 'zh',
    'query': 'baby',
    'transtype': 'realtime',
    'simple_means_flag': '3',
    'sign': '814534.560887',
    'token': '8b44713bb18ae29ba380245d18270565',
    }
    data1= urllib.parse.urlencode( data ).encode( "utf8" )

    #post请求重点是请求头信息要详细,关键的属性不能省

    headers = {

    #'Accept': '*/*',
    #'Accept-Encoding': 'gzip, deflate, br',
    #'Accept-Language': 'zh-CN,zh;q=0.9',
    #'Connection': 'keep-alive',
    #'Content-Length': '121',
    #'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
    'Cookie': 'PSTM=1528269920; BIDUPSID=7EE884F5F31114F0BCDC2588805B747F; REALTIME_TRANS_SWITCH=1; FANYI_WORD_SWITCH=1; HISTORY_SWITCH=1; SOUND_SPD_SWITCH=1; SOUND_PREFER_SWITCH=1; to_lang_often=%5B%7B%22value%22%3A%22en%22%2C%22text%22%3A%22%u82F1%u8BED%22%7D%2C%7B%22value%22%3A%22zh%22%2C%22text%22%3A%22%u4E2D%u6587%22%7D%5D; from_lang_often=%5B%7B%22value%22%3A%22zh%22%2C%22text%22%3A%22%u4E2D%u6587%22%7D%2C%7B%22value%22%3A%22en%22%2C%22text%22%3A%22%u82F1%u8BED%22%7D%5D; BDORZ=B490B5EBF6F3CD402E515D22BCDA1598; BAIDUID=C15EE352EEB61222BDA4C2F95822E5EF:SL=0:NR=10:FG=1; pgv_pvi=4516305920; delPer=0; H_PS_PSSID=1436_21101_28206_28131_26350_28139; PSINO=2; locale=zh; Hm_lvt_64ecd82404c51e03dc91cb9e8c025574=1546425466,1546425533,1546425602,1546484054; Hm_lpvt_64ecd82404c51e03dc91cb9e8c025574=1546487219',
    'Host': 'fanyi.baidu.com',
    'Origin': 'https://fanyi.baidu.com',
    'Referer': 'https://fanyi.baidu.com/',
    'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36',
    'X-Requested-With': 'XMLHttpRequest',

    }
    request = urllib.request.Request( url=apiurl, headers=headers )
    response = urllib.request.urlopen( request, data=data1 )
    print(response.read().decode( "utf8" ) )

    ajax的post请求
    爬取KFC的餐厅地址
    import urllib.request
    import urllib.parse
    url = "http://www.kfc.com.cn/kfccda/ashx/GetStoreList.ashx?op=keyword"
    headers = {"User-Agent":"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36"}
    # 表单输入地址信息
    keyword =input("请输入需要查询的城市")
    #请求网页需要进行传递的参数
    data = {
    "cname":"",
    "pid": "",
    "keyword": keyword,
    "pageIndex": "1",
    "pageSize": "10",
    }
    # 解析参数,编程字节格式
    data = urllib.parse.urlencode(data).encode("utf8")
    # 构建请求头信息
    request = urllib.request.Request(url,headers = headers)
    # 发送请求,获取相应数据,重点是附带参数的post请求。
    response = urllib.request.urlopen(request,data=data)
    # 读取响应信息
    print(response.read().decode("utf8"))





     

  • 相关阅读:
    SQL通用数据类型
    SQL基础
    软件测试相关(1)
    C语言——判断
    C语言新手教程——计算
    并查集
    洛谷-P1551 亲戚
    洛谷-P1536 村村通
    洛谷-P1525 [NOIP2010 提高组] 关押罪犯
    洛谷-P2814 家谱
  • 原文地址:https://www.cnblogs.com/kuangkuangduangduang/p/10366222.html
Copyright © 2011-2022 走看看