zoukankan html css js c++ java

Python爬取爱奇艺【老子传奇】评论数据

# -*- coding: utf-8 -*-
import requests
import os
import csv
import time
import random


base_url = 'http://api-t.iqiyi.com/feed/get_feeds?authcookie=97RRnhwyZA35LUddm35Yh4x5m2Vv9DJtUOuFHAeFVinDJg17wOO5Em1Se5vGm2vqn8SoSb24&device_id=a35b964e5084125fb7dfab30205fe32b&m_device_id=969312df66b3063f0ad005ebce2181f1&agenttype=118&wallId=214740047&feedTypes=1%2C7&count=20&top=1&hasRecomFeed=1&needTotal=1&baseTvId=646605300&version=1&qypid=01010011010000000000&isVideoPage=true&tvid=646605300&categoryid=2&albumid=206289501&upOrDown=1&snsTime={snstime}&t={t}&'
cookies = {'Cookie':'xxx'}
headers = {'User-Agent':'xxx'}


resp = requests.get(base_url.format(t=int(time.time()*1000), snstime=int(time.time())), headers=headers, cookies=cookies)
jsondata = resp.json()
data = jsondata.get("data")
feeds = data.get('feeds')
feedId = ''
path = os.getcwd()+"/laozichuanqi.csv"
csvfile = open(path, 'a+', encoding='utf-8', newline='')
writer = csv.writer(csvfile)
writer.writerow(('name', 'description'))
feedId = ''
for feed in feeds:
    feedId = feed.get("feedId")
    name = feed.get("name")
    description = feed.get("description")
    print(name+"--"+description)
    writer.writerow((name, description))

# print(feedId)
url = base_url+"feedId={feedId}"

for i in range(105):
    realurl = url.format(feedId=feedId, t=int(time.time()*1000+random.random()*1000), snstime=int(time.time()+random.random()*100))
    resp = requests.get(realurl, headers=headers, cookies=cookies)
    jsondata = resp.json()
    data = jsondata.get("data")
    feeds = data.get('feeds')
    print(feedId)
    print(len(feeds))
    print(realurl)
    time.sleep(15)
    # exit()
    if data.get("counts") == 0:
        break
    for feed in feeds:
        feedId = feed.get("feedId")
        print(feedId)
        name = feed.get("name")
        description = feed.get("description")
        print(name + "--" + description)
        writer.writerow((name, description))

csvfile.close()

以上代码有些问题：

例如，爬取会循环，不再继续往下爬。

问题查找中。。。

未完待续。。。

查看全文

相关阅读:
容斥相关
 动态点分治
 杜教筛
 上下界网络流的理解
 putty securecrt
借款费用
 在线画图工具
 费用包括（营业成本）和（期间费用）
信用减值损失
 采用权益法核算的长期股权投资，被投资单位宣告发放现金股利

原文地址：https://www.cnblogs.com/zhzhang/p/7230711.html