zoukankan      html  css  js  c++  java
  • nodejs express cheerio request爬虫

    const express = require('express')
    const cheerio = require('cheerio')
    const request = require("request")
    const app = express()
    
    app.get("/:key", function (req, res) {
        let spider = new Spider()
        console.log(req.params.key)
        spider.fetch("http://www.baidu.com/s?wd=" + req.params.key, (err, $) => {
            spider.parse(err, $, res)
        })
    })
    app.post('/postData', function (req, res) {  //这里参数加上刚刚的解析的而且这里不是get了
        let result = req.body
        res.send(result);
    })
    app.listen(3000, () => {
        console.log("开启服务,端口3000")
    })
    
    
    class Spider {
        fetch(url, callback) {
            request({url: url, encoding: null}, (err, response, body) => {
                if (!err && response.statusCode === 200) {
                    callback(null, cheerio.load('<body>' + body + '</body>'));
                } else {
                    callback(err, cheerio.load('<body></body>'));
                }
            })
        }
    
        parse(err, $, res) {
            if (!err) {
                let result = $('body').find("#content_left").html()
                res.send(result)
            }
        }
    
    }
    
  • 相关阅读:
    flv mime IIS设置
    正则表达式
    MATLAB out of memory
    Cyclic prefix
    Windows Live Writer Test
    zz排序算法的稳定性
    MATLAB的分数和小数
    young矩阵学习
    Python初体验(二)
    利用VS2010调用Excel的宏
  • 原文地址:https://www.cnblogs.com/c-x-a/p/11840102.html
Copyright © 2011-2022 走看看