前提
与浏览器DOM具有一致接口的JsDOM https://github.com/jsdom/jsdom
与jQuery有类似接口的CheerIO https://github.com/cheeriojs/cheerio
都可以做DOM操作
JSOM
文档:https://airbnb.io/enzyme/docs/guides/jsdom.html
一段抓取网页数据的代码:
const fs = require('fs');
const { JSDOM } = require('jsdom');
const jquery = require('jquery');
const { log, table, } = console;
function get(url, callback) {
try {
JSDOM.fromURL(url).then(jsenv => {
const $ = jquery(jsenv.window);
const title = $('a#thread_subject')[0];
let result = title.innerHTML + '
';
const ct = $('div#postlist > div > table .t_fsz')[0];
result += ct.querySelector('td.t_f').innerHTML;
result = result.replace(/(?:<brs*>| )/g, '') .replace(/<img src="([^"]*)" .*>/g, '[img]$1[/img]') .replace(/<font.*>.*</font>
*/g, '
') .replace(/<a.*>
*/g, '') .replace(/(?:</a>
|
)/g, '
');
try {
const link = ct.querySelector('p.attnm > a').href;
result += '
下载地址:' + link;
} catch(e) {}
callback(result);
});
} catch (e) {
console.log(e);
}
}