zoukankan      html  css  js  c++  java
  • Java爬虫——B站弹幕爬取

    如何通过B站视频AV号找到弹幕对应的xml文件号

    首先爬取视频网页,将对应视频网页源码获得

     

     就可以找到该视频的av号aid=8678034

     还有弹幕序号,cid=14295428

     弹幕存放位置为  http://comment.bilibili.com/14295428.xml

     获得该链接内容即可。

     1 package BiliBili弹幕爬取;
     2 
     3 import org.apache.http.HttpEntity; 
     4 import org.apache.http.client.methods.CloseableHttpResponse;
     5 import org.apache.http.client.methods.HttpGet;
     6 import org.apache.http.impl.client.CloseableHttpClient;
     7 import org.apache.http.impl.client.HttpClients;
     8 import org.apache.http.util.EntityUtils;
     9 
    10 import java.util.regex.Matcher;
    11 import java.util.regex.Pattern;
    12 
    13 public class getBiliBiliBofqi {
    14     public static void getBofqi(String aid) throws Exception{
    15         CloseableHttpClient closeableHttpClient = HttpClients.createDefault() ;
    16         HttpGet httpGet = new HttpGet("https://www.bilibili.com/video/av"+aid+"/") ;
    17         CloseableHttpResponse httpResponse = closeableHttpClient.execute(httpGet) ;
    18         HttpEntity httpEntity = httpResponse.getEntity() ;
    19         String en= EntityUtils.toString(httpEntity) ;
    20 //"cid=16496518&aid=9979006&pre_ad="
    21         String con = "cid=(.*)?&aid=" ;
    22         Pattern ah = Pattern.compile(con);
    23         Matcher mr = ah.matcher(en);
    24         while(mr.find()) {
    25             String id = mr.group() ;
    26             String newUrl = id.replace("cid=","") ;
    27             String x = newUrl.replace("&aid=","") ;
    28             HttpGet httpGet1 = new HttpGet("http://comment.bilibili.com/"+x+".xml");
    29             CloseableHttpResponse httpResponse1 = closeableHttpClient.execute(httpGet1) ;
    30             HttpEntity httpEntity1 = httpResponse1.getEntity() ;
    31             String en1 = EntityUtils.toString(httpEntity1) ;
    32             String c = "">(.*?)<" ;
    33             Pattern a = Pattern.compile(c);
    34             Matcher m = a.matcher(en1);
    35             while(m.find()){
    36                 String speak = m.group().replace("">","") ;
    37                 speak = speak.replace("<","") ;
    38                 System.out.println(speak);
    39             }
    40         }
    41     }
    42     public static void main(String[] args) throws Exception{
    43         getBofqi("8678034");
    44     }
    45 }

     运行结果:

  • 相关阅读:
    vue项目报错
    vue3.x版本安装element-ui、axios及echarts图表插件
    vue3.x版本安装vue-cli建项目
    vue-cli2.x版本安装vue-cli建项目
    vue项目报错:Unexpected tab character (no-tabs)
    dede不同栏目调用不同banner图的方法
    dede 友情链接显示不全解决方法
    dede上传文件乱码问题解决
    修改文本框中提示文字
    解决Hbuilder打包的apk文件按手机返回键直接退出软件
  • 原文地址:https://www.cnblogs.com/LexMoon/p/JavaBi.html
Copyright © 2011-2022 走看看