浏览器抓取真实直播源地址(纯前端JS解析)
网上搜索各种平台的直播源地址都是满天飞,但是经常会有失效的时候,因为官方也会定期的升级系统修改各种参数或链接让直播源不能永久,所以敝人一直崇尚的是 授人以鱼不如授人以渔
,与其给直播源别人,不如教大家如何去爬取直播源,就算失效了也不怕。
0. 前言
本人业余时间喜欢用虎牙看直播,所以第一个便是想到如何抓取虎牙的直播源。
在抓取之前,需要了解视频直播源的分类和区别,可以自行了解hls
,flv
,m3u8
等知识。
Tips: 本教程只是教大家如何利用前端调试技巧和爬虫基本操作,不作为商业用途,各位童鞋耗子尾汁。
1. 浏览器抓取流程
首选打开虎牙官网,随便找个直播间:https://m.huya.com/949527
,这里是使用的手机端的网页(因为手机端的简单)
随便看了下,没有ajax
请求,那么地址定是随页面带进来了,现在大部分直播网页都是SSR
(服务器端渲染),所以只能去页面源代码找找:
好家伙!直接就找到了一个很想地址的东西 liveLineUrl
,是一个m3u8的地址:
这个网站可以测试播放源是不是好的,来!试一下!
就很完美!
但是就这么简单的吗?
我又试了一下我经常看的【一起看】的直播间,来看看电影啥的,结果:
这是咋回事。。。然后对比下前后两个链接发现了问题,下面是【一起看】的链接:
然后想到 liveLineUrl
这个参数不是全局变量吗,控制台打印看一下,再仔细对比发现参数变了
有个fm
参数已经变成了seqid
:
先试下控制台打印的能不能播放:
行,司马懿出来了,现在只用分析如何破解参数即可。
2. 参数解析
Ctrl + Shift + F
搜索 liveLineUrl
, 然后找到这里处理url
的js
,打个断点调试一下,看看怎么处理的:
断点进入 Object(m.default)(window.liveLineUrl)
可以看到这里就是处理参数的地方,最后返回的就是解析后的参数字符串:
我整理了下解析函数,重新实现了一下:
function parseUrl(url){
let params = url.split("?")[1];
params = params.split("&");
let paramsObj = {};
for (let i = 0; i < params.length; i++) {
let item = params[i].split("=");
2 === item.length && (paramsObj[item[0]] = item[1])
}
let mainUrl = url.split("?")[0];
let r = mainUrl.split("/");
let streamName = r[r.length - 1].replace(/.(flv|m3u8)/g, "");
let {fm: fm, wsTime: wsTime, wsSecret: u, ...others} = paramsObj;
let fmParse = Base64.decode(decodeURIComponent(fm));
let p = fmParse.split("_")[0];
let time = parseInt(1e4 * (new Date).getTime() + 1e4 * Math.random());
let newWsSecret = `${p}_0_${streamName}_${time}_${wsTime}`;
newWsSecret = md5.hex(newWsSecret);
let y = "";
Object.keys(others).forEach(e=>{
y += `&${e}=${others[e]}`
});
return `${mainUrl}?wsSecret=${newWsSecret}&wsTime=${wsTime}&u=0&seqid=${time}${y}`;
}
其中用到了Base64
和MD5
相关函数:
// md5下载:https://raw.githubusercontent.com/emn178/js-md5/master/src/md5.js
let Base64 = {
_keyStr: "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=",
encode: function(e) {
var t = "";
var n, r, i, s, o, u, a;
var f = 0;
e = Base64._utf8_encode(e);
while (f < e.length) {
n = e.charCodeAt(f++);
r = e.charCodeAt(f++);
i = e.charCodeAt(f++);
s = n >> 2;
o = (n & 3) << 4 | r >> 4;
u = (r & 15) << 2 | i >> 6;
a = i & 63;
if (isNaN(r)) {
u = a = 64
} else if (isNaN(i)) {
a = 64
}
t = t + this._keyStr.charAt(s) + this._keyStr.charAt(o) + this._keyStr.charAt(u) + this._keyStr.charAt(a)
}
return t
},
decode: function(e) {
var t = "";
var n, r, i;
var s, o, u, a;
var f = 0;
e = e.replace(/[^A-Za-z0-9+/=]/g, "");
while (f < e.length) {
s = this._keyStr.indexOf(e.charAt(f++));
o = this._keyStr.indexOf(e.charAt(f++));
u = this._keyStr.indexOf(e.charAt(f++));
a = this._keyStr.indexOf(e.charAt(f++));
n = s << 2 | o >> 4;
r = (o & 15) << 4 | u >> 2;
i = (u & 3) << 6 | a;
t = t + String.fromCharCode(n);
if (u != 64) {
t = t + String.fromCharCode(r)
}
if (a != 64) {
t = t + String.fromCharCode(i)
}
}
t = Base64._utf8_decode(t);
return t
},
_utf8_encode: function(e) {
e = e.replace(/rn/g, "n");
var t = "";
for (var n = 0; n < e.length; n++) {
var r = e.charCodeAt(n);
if (r < 128) {
t += String.fromCharCode(r)
} else if (r > 127 && r < 2048) {
t += String.fromCharCode(r >> 6 | 192);
t += String.fromCharCode(r & 63 | 128)
} else {
t += String.fromCharCode(r >> 12 | 224);
t += String.fromCharCode(r >> 6 & 63 | 128);
t += String.fromCharCode(r & 63 | 128)
}
}
return t
},
_utf8_decode: function(e) {
var t = "";
var n = 0;
var r = c1 = c2 = 0;
while (n < e.length) {
r = e.charCodeAt(n);
if (r < 128) {
t += String.fromCharCode(r);
n++
} else if (r > 191 && r < 224) {
c2 = e.charCodeAt(n + 1);
t += String.fromCharCode((r & 31) << 6 | c2 & 63);
n += 2
} else {
c2 = e.charCodeAt(n + 1);
c3 = e.charCodeAt(n + 2);
t += String.fromCharCode((r & 15) << 12 | (c2 & 63) << 6 | c3 & 63);
n += 3
}
}
return t
}
}
3. 源码及播放器实现
来吧,直接上全部代码:
<!doctype html>
<html lang="zh-CN">
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no">
<meta name="applicable-device" content="pc,mobile">
<link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/video.js@7.4.1/dist/video-js.min.css">
<title>虎牙直播</title>
</head>
<body>
<video id="player" class="video-js vjs-16-9 vjs-big-play-centered" controls preload="auto" data-setup="{}">
<source src="https://bitdash-a.akamaihd.net/content/sintel/hls/playlist.m3u8" type="application/x-mpegURL">
<p class="vjs-no-js">
To view this video please enable JavaScript, and consider upgrading to a web browser that
<a href="https://videojs.com/html5-video-support/" target="_blank">supports HTML5 video</a>
</p>
</video>
<script src="https://cdn.jsdelivr.net/npm/video.js@7.4.1/dist/video.min.js"></script>
<script src="https://cdn.jsdelivr.net/npm/@videojs/http-streaming@1.10.3/dist/videojs-http-streaming.min.js"></script>
<script src="js/md5.js"></script>
<script>
let Base64 = {
_keyStr: "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=",
encode: function(e) {
var t = "";
var n, r, i, s, o, u, a;
var f = 0;
e = Base64._utf8_encode(e);
while (f < e.length) {
n = e.charCodeAt(f++);
r = e.charCodeAt(f++);
i = e.charCodeAt(f++);
s = n >> 2;
o = (n & 3) << 4 | r >> 4;
u = (r & 15) << 2 | i >> 6;
a = i & 63;
if (isNaN(r)) {
u = a = 64
} else if (isNaN(i)) {
a = 64
}
t = t + this._keyStr.charAt(s) + this._keyStr.charAt(o) + this._keyStr.charAt(u) + this._keyStr.charAt(a)
}
return t
},
decode: function(e) {
var t = "";
var n, r, i;
var s, o, u, a;
var f = 0;
e = e.replace(/[^A-Za-z0-9+/=]/g, "");
while (f < e.length) {
s = this._keyStr.indexOf(e.charAt(f++));
o = this._keyStr.indexOf(e.charAt(f++));
u = this._keyStr.indexOf(e.charAt(f++));
a = this._keyStr.indexOf(e.charAt(f++));
n = s << 2 | o >> 4;
r = (o & 15) << 4 | u >> 2;
i = (u & 3) << 6 | a;
t = t + String.fromCharCode(n);
if (u != 64) {
t = t + String.fromCharCode(r)
}
if (a != 64) {
t = t + String.fromCharCode(i)
}
}
t = Base64._utf8_decode(t);
return t
},
_utf8_encode: function(e) {
e = e.replace(/rn/g, "n");
var t = "";
for (var n = 0; n < e.length; n++) {
var r = e.charCodeAt(n);
if (r < 128) {
t += String.fromCharCode(r)
} else if (r > 127 && r < 2048) {
t += String.fromCharCode(r >> 6 | 192);
t += String.fromCharCode(r & 63 | 128)
} else {
t += String.fromCharCode(r >> 12 | 224);
t += String.fromCharCode(r >> 6 & 63 | 128);
t += String.fromCharCode(r & 63 | 128)
}
}
return t
},
_utf8_decode: function(e) {
var t = "";
var n = 0;
var r = c1 = c2 = 0;
while (n < e.length) {
r = e.charCodeAt(n);
if (r < 128) {
t += String.fromCharCode(r);
n++
} else if (r > 191 && r < 224) {
c2 = e.charCodeAt(n + 1);
t += String.fromCharCode((r & 31) << 6 | c2 & 63);
n += 2
} else {
c2 = e.charCodeAt(n + 1);
c3 = e.charCodeAt(n + 2);
t += String.fromCharCode((r & 15) << 12 | (c2 & 63) << 6 | c3 & 63);
n += 3
}
}
return t
}
}
function parseUrl(url){
let params = url.split("?")[1];
params = params.split("&");
let paramsObj = {};
for (let i = 0; i < params.length; i++) {
let item = params[i].split("=");
2 === item.length && (paramsObj[item[0]] = item[1])
}
let mainUrl = url.split("?")[0];
let r = mainUrl.split("/");
let streamName = r[r.length - 1].replace(/.(flv|m3u8)/g, "");
let {fm: fm, wsTime: wsTime, wsSecret: u, ...others} = paramsObj;
let fmParse = Base64.decode(decodeURIComponent(fm));
let p = fmParse.split("_")[0];
let time = parseInt(1e4 * (new Date).getTime() + 1e4 * Math.random());
let newWsSecret = `${p}_0_${streamName}_${time}_${wsTime}`;
newWsSecret = md5.hex(newWsSecret);
let y = "";
Object.keys(others).forEach(e=>{
y += `&${e}=${others[e]}`
});
return `${mainUrl}?wsSecret=${newWsSecret}&wsTime=${wsTime}&u=0&seqid=${time}${y}`;
}
let e = parseUrl("//al.hls.huya.com/src/1423787831-1423787831-6115122170587774976-2847699118-10057-A-0-1-imgplus_2000.m3u8?wsSecret=f9aaf4fcbe42e724d152c265cf1837fb&wsTime=5ff71b32&fm=RFdxOEJjSjNoNkRKdDZUWV8kMF8kMV8kMl8kMw%3D%3D&ctype=tars_mobile&txyp=o%3Aj10%3B&fs=bgct&&sphdcdn=al_7-tx_3-js_3-ws_7-bd_2-hw_2&sphdDC=huya&sphd=264_*-265_*&t=103");
let t = videojs("#player");
t.src(e);
t.play();
</script>
</body>
</html>
看看诸葛亮弹琴退仲达
:
4. 总结
- 目前发现虎牙【一起看】栏目下的需要把
url
做第二次解析,普通直播间可以直接拿来播放; - 有人可能说为啥要用移动端的页面解析,而不用PC端的,其实也可以的,分析源码可以找到一个
config
对象,里面就包含了所需的信息,看到这些参数熟悉不?(同样的【一起看】栏目的需要二次解析):