zoukankan      html  css  js  c++  java
  • 电商 抓取京东的商品评价

    测试

    import requests
    import json
    import pandas as pd
    import time
    
    
    # getRtVisitor.json
    
    session = requests.Session()  # 创建一个session对象
    headers = {
    'accept':'*/*',
    'accept-encoding':'gzip, deflate, br',
    'accept-language':'en-US,en;q=0.9,zh-CN;q=0.8,zh;q=0.7',
    'cookie':'shshshfpa=a3598646-29b8-1252-9170-400aa207959b-1584512334; shshshfpb=aYj%2FWfZWkAwZkeV9G%2FrkrBw%3D%3D; unpl=V2_ZzNtbUNfFkZ8D0IAeUtdDWIAElRKVEQdJgxHAXgeDwViUEBaclRCFnQUR1BnGlwUZwUZXkRcRhFFCEdkeBBVAWMDE1VGZxBFLV0CFSNGF1wjU00zQwBBQHcJFF0uSgwDYgcaDhFTQEJ2XBVQL0oMDDdRFAhyZ0AVRQhHZHsdVQBjAxFfQFNBFXQPR1d6GVoMYAIibUVncyV2Dk5UchhsBFcCIh8WC0QccQhAVTYZWAxiBxJeQFVHF3UJQVV4GFwDbgQTbUNnQA%3d%3d; __jdv=76161171|baidu-pinzhuan|t_288551095_baidupinzhuan|cpc|0f3d30c8dba7459bb52f2eb5eba8ac7d_0_09db865e3c0942189269b50d26b14bc6|1590053328146; areaId=4; ipLoc-djd=4-0-0-0; __jda=122270672.1584512335310602017860.1584512335.1589161682.1590053328.3; __jdb=122270672.2.1584512335310602017860|3.1590053328; __jdc=122270672; shshshfp=b7055073ada49988c862ed9a444b4489; shshshsID=b20578247c3a807e62d02c0eab39b3f2_2_1590053336977; 3AB9D23F7A4B3C9B=Y3EFH7YHU3O2D2R6YWJLBZ2NHIUNNDY2I4BDM6SOC22F7XTTK6TZJI2OYQG2JSVJ2DCA3NVX36HZKYHQAQAGD6BXDA; __jdu=1590053342910164535585; JSESSIONID=A1044629E7D2EA753EEC8CA2677BC058.s1',
    'referer':'https://item.jd.hk/1968962771.html',
    'sec-fetch-mode':'no-cors',
    'sec-fetch-site':'cross-site',
    'user-agent':'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.79 Safari/537.36'
    }
    
    
    def loadPage(page): 
        strPage=str(page)   
        strToken='6f5921e86'
        strLastPara='1584581855701'
        url = 'https://sclub.jd.com/comment/productPageComments.action?callback=jQuery506026&productId=1968962771&score=3&sortType=5&page=3&pageSize=10&pin=null&_=1590053396136'
        req = session.get(url, headers=headers)  #发起get请求
        text = req.text    
        with open(str(page)+".txt", "w") as f:
            f.write(text)
        return json.loads(text)    
    
    
    str2 = "123   Runoob      23";   # 去除首尾空格
    print(str2.strip('123'))
    
    
    
    # 数据源
    #loadPage(1)
    
  • 相关阅读:
    2017年3月9日上午学习
    3.17上午
    3.16上午
    3.16下午
    3.15
    2017.3.14
    3.14
    217.3.13上午
    2017.4.7-morning
    2017.4.6-afternoon
  • 原文地址:https://www.cnblogs.com/guxingy/p/12932397.html
Copyright © 2011-2022 走看看