查询历史弹幕 https://api.bilibili.com/x/v2/dm/history 时返回的是 ProtoBuf 格式的数据,需要用 protoc 和 google.protobuf 解析。
流程
首次使用,需要下载安装。https://github.com/protocolbuffers/protobuf/releases 笔者在 Windows 下,下载安装 win64 版本 protoc,并下载 python 版 protobuf 并编译。中间有个小坑是 Win10 自带的 python 在最后 install 步骤时会出现权限问题,加上 -user
即可。
先写一个 dm.proto
syntax = "proto3";
package dm;
message DmSegMobileReply{
repeated DanmakuElem elems=1;
}
message DanmakuElem{
int64 id = 1;
int32 progress = 2;
int32 mode = 3;
int32 fontsize = 4;
uint32 color = 5;
string midHash = 6;
string content = 7;
int64 ctime = 8;
int32 weight = 9;
string action = 10;
int32 pool = 11;
string idStr = 12;
}
利用 protoc 编译为 python 解析文件 protoc --python_out=./ ./dm.proto
最后我们引用它来解析那些获取到的 req.content
中的二进制流数据,得到一个 JSON 的形式。
Code
from google.protobuf.json_format import MessageToJson, Parse
import datetime
import requests
from bs4 import BeautifulSoup
import pandas as pd
import json
import csv
import dm_pb2
import myhtml
def createDatalist(datestart, dateend=None):
if dateend is None:
dateend = datetime.datetime.now().strftime('%Y-%m-%d')
datestart = datetime.datetime.strptime(datestart, '%Y-%m-%d')
dateend = datetime.datetime.strptime(dateend, '%Y-%m-%d')
date_list = []
date_list.append(datestart.strftime('%Y-%m-%d'))
while datestart < dateend:
datestart += datetime.timedelta(days=+1)
date_list.append(datestart.strftime('%Y-%m-%d'))
return date_list
headers = {
'User-Agent': ?
'Cookie': ?
}
def getDanmuHistory(url):
try:
danmu = requests.get(url, headers=headers)
DM = dm_pb2.DmSegMobileReply()
DM.ParseFromString(danmu.content)
danmu = MessageToJson(DM)
danmuobj = json.loads(danmu)
return danmuobj
except all:
return []
def getDanmuHistoryRange(cid, start):
ans = []
for time in createDatalist(start):
url = 'https://api.bilibili.com/x/v2/dm/web/history/seg.so?type=1&oid='+cid+'&date='+time
ans += getDanmuHistory(url)
print(time)
return ans
def getCidByBid(queryBid):
urlGetCid = "https://api.bilibili.com/x/player/pagelist?bvid=" +
queryBid + "&jsonp=jsonp"
strCidJson = myhtml.getRequestsContentUtf8(urlGetCid)
jsonCid = json.loads(strCidJson)
print(jsonCid["data"])
return str(jsonCid["data"][0]["cid"])
if __name__ == '__main__':
bid = "BV16K4y1h7eq"
cid = getCidByBid(bid)
ans = getDanmuHistoryRange(cid, '2021-05-01')
with open("output.json","w") as f:
f.write(json.dumps(ans))