zoukankan      html  css  js  c++  java
  • python(17) 获取acfun弹幕,评论和视频信息

    每天一点linux命令:新建文件夹

    一,使用python获得acfun的所有番剧的信息,评论,弹幕

     1 #! /usr/bin/env python
     2 # -*- coding=utf-8 -*-
     3 import re
     4 import requests
     5 import sys
     6 import json
     7 reload(sys)
     8 sys.setdefaultencoding("utf-8")
     9 num = 1
    10 head = {'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:43.0) Gecko/20100101 Firefox/43.0'} #防陷阱
    11 def dm(ht):
    12     oldURL= 'http://danmu.aixifan.com/V2/' + ht + '?pageSize=500&pageNo=0'
    13     #print oldURL
    14     for i in range(1,5):
    15         newURL = re.sub('pageNo=d+','pageNo=%d'%i,oldURL,re.S)
    16         print newURL
    17         html = requests.get(newURL,headers = head)
    18         type = sys.getfilesystemencoding()
    19         aa = json.loads(html.text)
    20         #print len(aa[1])
    21         try:
    22             for i in range(0,501):
    23                 print aa[2][i]['m']
    24         except Exception,e:
    25           break
    26 def PL(ht):
    27      url = 'http://www.acfun.tv/comment/bangumi/web/list?bangumiId=' + ht #评论首地址,可获得评论数,评论的
    28      print url
    29      jscontent = requests.get(url,headers = head).content
    30      jsDict = json.loads(jscontent)
    31      pag =  jsDict['data']['totalPage']
    32      print pag
    33      nurl = url + '&pageNo=1'
    34      for i in range(1,pag+1):
    35          ourl = re.sub('pageNo=d+','pageNo=%d'%i,nurl,re.S)
    36          jscontent = requests.get(ourl,headers = head).content
    37          jsDict = json.loads(jscontent)
    38 
    39 def geturl():
    40     ourl = 'http://www.acfun.tv/bangumi/bangumi/page?pageSize=42&isWeb=1&pageNo=1&sort=1'
    41     for i in range(1,8):
    42         nurl = re.sub('pageNo=d+','pageNo=%d'%i,ourl,re.S)
    43         print nurl
    44         jscontent = requests.get(nurl,headers = head).content
    45         jsDict = json.loads(jscontent)
    46         for j in range(1,42):
    47            info( str(jsDict['data']['list'][j]['id']) )
    48            break
    49         break
    50 def info(ht):
    51     url = "http://www.acfun.tv/v/ab" + ht
    52     sc = "http://www.acfun.tv/bangumi/stow/isStowed?bangumiId=" + ht           #收藏数
    53     pl = "http://www.acfun.tv/bangumi/count/bangumi_view.aspx?bangumiId="+ht   #评论数
    54     html = requests.get(url)
    55     htpl = requests.get(pl)
    56     title = re.findall('h3 class="title">(.*?)</h3><span',html.text,re.S)[0]
    57     print '名称:' + title
    58     up = re.findall('</h3><span class="last">(.*?)</span>',html.text,re.S)[0]
    59     print '更新:'+ up
    60     pp = re.search('[(.*?)]',htpl.text,re.S).group(1)
    61     print '评论总数:' + pp
    62     jsconten = requests.get(sc,headers = head).content
    63     jsDict = json.loads(jsconten)
    64     print '收藏总数:' + str(jsDict['data']['stowCount'])
    65     jianjie = re.findall('pan class="desc">(.*?)</span>',html.text,re.S)[0]
    66     print '简介:' + jianjie
    67     page = re.findall('" data-count="(.*?)" data-index="',html.text,re.S)[0]
    68     page = int(page)
    69     nurl = url + '_1'
    70     for i in range(1,page+1):#有多少话 多少页
    71           nurl = re.sub('_d+','_%d'%i,nurl,re.S)#每个话的地址
    72           print nurl
    73           print '' + str(i) + '话弹幕:'
    74           html = requests.get(nurl)
    75           id = re.findall('data-vid="(.*?)" data-sid',html.text,re.S)[0]#获取每个话的弹幕,地址
    76           # dm(id)
    77           print '' + str(i) + '话评论:'
    78           PL(ht)
    79 if __name__ == "__main__":
    80     geturl()



  • 相关阅读:
    神医,全部的诡异动画,
    显示界面的,调节frame的代码 写到 viewwillappear,
    两个像素,
    人类的心理行为模式,---》阮一峰,
    浅谈IE11--web开发测试
    node中的console
    node服务器重定向
    服务端渲染&&客户端渲染
    node积累
    Apache网页文件目录模板
  • 原文地址:https://www.cnblogs.com/lovychen/p/5152281.html
Copyright © 2011-2022 走看看