zoukankan      html  css  js  c++  java
  • Python爬取爱奇艺【老子传奇】评论数据

    # -*- coding: utf-8 -*-
    import requests
    import os
    import csv
    import time
    import random
    
    
    base_url = 'http://api-t.iqiyi.com/feed/get_feeds?authcookie=97RRnhwyZA35LUddm35Yh4x5m2Vv9DJtUOuFHAeFVinDJg17wOO5Em1Se5vGm2vqn8SoSb24&device_id=a35b964e5084125fb7dfab30205fe32b&m_device_id=969312df66b3063f0ad005ebce2181f1&agenttype=118&wallId=214740047&feedTypes=1%2C7&count=20&top=1&hasRecomFeed=1&needTotal=1&baseTvId=646605300&version=1&qypid=01010011010000000000&isVideoPage=true&tvid=646605300&categoryid=2&albumid=206289501&upOrDown=1&snsTime={snstime}&t={t}&'
    cookies = {'Cookie':'xxx'}
    headers = {'User-Agent':'xxx'}
    
    
    resp = requests.get(base_url.format(t=int(time.time()*1000), snstime=int(time.time())), headers=headers, cookies=cookies)
    jsondata = resp.json()
    data = jsondata.get("data")
    feeds = data.get('feeds')
    feedId = ''
    path = os.getcwd()+"/laozichuanqi.csv"
    csvfile = open(path, 'a+', encoding='utf-8', newline='')
    writer = csv.writer(csvfile)
    writer.writerow(('name', 'description'))
    feedId = ''
    for feed in feeds:
        feedId = feed.get("feedId")
        name = feed.get("name")
        description = feed.get("description")
        print(name+"--"+description)
        writer.writerow((name, description))
    
    # print(feedId)
    url = base_url+"feedId={feedId}"
    
    for i in range(105):
        realurl = url.format(feedId=feedId, t=int(time.time()*1000+random.random()*1000), snstime=int(time.time()+random.random()*100))
        resp = requests.get(realurl, headers=headers, cookies=cookies)
        jsondata = resp.json()
        data = jsondata.get("data")
        feeds = data.get('feeds')
        print(feedId)
        print(len(feeds))
        print(realurl)
        time.sleep(15)
        # exit()
        if data.get("counts") == 0:
            break
        for feed in feeds:
            feedId = feed.get("feedId")
            print(feedId)
            name = feed.get("name")
            description = feed.get("description")
            print(name + "--" + description)
            writer.writerow((name, description))
    
    csvfile.close()

    以上代码有些问题:

    例如,爬取会循环,不再继续往下爬。

     问题查找中。。。

    未完待续。。。

  • 相关阅读:
    日志框架之Slf4j整合Logback
    使用SLF4J和Logback
    Java日志框架SLF4J和log4j以及logback的联系和区别
    docker部署apollo
    mysql8.0设置忽略大小写后无法启动
    将项目迁移到kubernetes平台是怎样实现的
    kubectl port-forward
    linux服务器安全配置最详解
    CentOS7.3下部署Rsyslog+LogAnalyzer+MySQL中央日志服务器
    统计linux 下当前socket 的fd数量
  • 原文地址:https://www.cnblogs.com/zhzhang/p/7230711.html
Copyright © 2011-2022 走看看