zoukankan      html  css  js  c++  java
  • node简单爬虫request简单运用

    const request = require('request');
    const iconv = require('iconv-lite');
    const jsdom = require('jsdom').JSDOM;
    const fs = require('fs')
    const BS_URL = 'https://www.menworld.org'
    
    request({
        url: `${BS_URL}/fanhao/`,//请求路径
        method: "GET",//请求方式,默认为get
        headers: {//设置请求头
            "content-type": "application/json",
        },
        encoding: null
        // body: JSON.stringify(requestData)//post参数字符串
    }, function (error, response, body) {
        if (!error && response.statusCode == 200) {
            fs.mkdir('img', err => console.log(err))
            let buf = iconv.decode(body, 'gb2312').toString(); //解码gb2312
            let dom = new jsdom(buf);
    
            let box = dom.window.document.getElementsByClassName('lml_top')
            for (let i = 0; i < box.length; i++) {
                let href = box[i].getElementsByTagName('a')[0].href;
    
                request({
                    url: `${BS_URL + href}`,
                    encoding: null
                }, (err, res, body) => {
                    if (!error && response.statusCode == 200) {
                        let buf = iconv.decode(body, 'gb2312').toString(); //解码gb2312
                        let dom = new jsdom(buf);
    
                        let box = dom.window.document.getElementsByClassName('article')
    
    
                        for (let j = 0; j < box.length; j++) {
                            let img = box[j].getElementsByTagName('img')[0].src;
                            request(img).pipe(fs.createWriteStream(`./img/${i}.jpg`))
                        }
    
                    }
                })
    
            }
    
        }
    });
  • 相关阅读:
    求解:块级元素的宽度自适应问题
    list 小练习
    codevs1017乘积最大
    codevs1048石子归并
    luogu1387 最大正方形
    BZOJ1305: [CQOI2009]dance跳舞
    linux下分卷tar.bz文件的合并并解压缩
    ubuntu命令查补
    认识与学习BASH(中)
    认识与学习BASH
  • 原文地址:https://www.cnblogs.com/kjtt/p/11498340.html
Copyright © 2011-2022 走看看