zoukankan      html  css  js  c++  java
  • node抓取图片

    const https = require('https')
    const http = require('http') /* 方式二时使用*/
    const fs = require('fs')
    const cheerio = require('cheerio')
    const request = require('request')
    const path = require('path');
    
    const imgDir = path.join(__dirname, 'img');
    
    
    
    let url = 'https://www.3dmgame.com/gl/3749617.html'
    // const title = "猎人"
    
    const list1 = [
      {url:"https://www.3dmgame.com/gl/3748911.html", title:"./img/战士"},
      {url:"https://www.3dmgame.com/gl/3749617.html", title:"./img/猎人"},
      {url:"https://www.3dmgame.com/gl/3749938.html", title:"./img/机器人"},
    ];
    
    const getImg = (url, title) => {
      fs.mkdirSync(`${title}`, {recursive: true});//同步创建目录
      https.get(url, (res) => {
        // 安全判断
        const { statusCode } = res
        const contentType = res.headers['content-type']
        console.log(statusCode, contentType)
    
        let err = null
        if (statusCode !== 200) {
          err = new Error('请求状态错误')
        } else if (!/^text/html/.test(contentType)) {
          err = new Error('请求类型错误')
        }
    
        if (err) {
          console.log(err)
          res.resume() //重置缓存
          return false
        }
    
        let resData = ''
        res.on('data', (data) => {
          resData += data.toString('utf8')
        })
        res.on('end', () => {
          //将请求数据保存在本地
          let $ = cheerio.load(resData)
          console.error($('img').length);
    
          let id = 0;
          $('img').each((index, el) => {
            let imgUrl = $(el).attr('src')
            // console.log($(el).attr('src'))
            if (imgUrl) {
    
              // let filename = imgUrl.split('/').pop()
              // /* 方式一*/
              // // request('http:'+imgUrl).pipe(fs.createWriteStream(imgDir + '/' + filename));
              // /* 方式二*/
              // var req = http.get('http:'+imgUrl, function (res) {
              //     var imgData = "";
              //     res.setEncoding("binary"); //一定要设置response的编码为binary否则会下载下来的图片打不开
              //     res.on("data", function (chunk) {
              //       imgData += chunk;
              //     });
              //     res.on("end", function () {
              //      let filename = imgUrl.split('/').pop()
              //       fs.writeFile(imgDir + '/' + filename, imgData, "binary", function (err) {
              //         if (err) {
              //           console.log("保存失败");
              //         }
              //         console.log("保存成功");
              //       });
              //     });
              //     res.on("error", function (err) {
              //       console.log("请求失败");
              //     });
              //   });
    
              if (!imgUrl.includes("https://img.3dmgame.com/uploads/images/news")) {
                return;
              }
    
              const ext = imgUrl.substring(imgUrl.length - 4, imgUrl.length);
              console.error(`ext=${ext}`);
              if (imgUrl.substring(imgUrl.length - 4, imgUrl.length) === ".jpg") {
                return;
              }
              console.error(imgUrl);
              var writeStream = fs.createWriteStream(`${title}//${++id}_${imgUrl.substring(imgUrl.length - 10, imgUrl.length - 4)}.png`);
              var readStream = request(imgUrl);
              readStream.pipe(writeStream);
              readStream.on('end', function () {
                console.log('文件下载成功');
              });
              readStream.on('error', function () {
                console.log(1);
                // console.log("错误信息:"+ err)
              })
              writeStream.on("finish", function () {
                console.log("文件写入成功");
                writeStream.end();
              });
            }
    
          });
          console.log('数据传输完毕')
        })
      }).on('error', (err) => {
        console.log('请求错误')
      })
    }
    
    for(let item of list1){
      getImg(item.url, item.title);
    }

    这里主要是抓取网页上的所有图片,然后过滤图片。

  • 相关阅读:
    ios 重构笔记
    ios uiwindow笔记
    ios静态库笔记
    ios app提交之前需要哪几个证书
    int、long、long long取值范围
    字节概述
    序列化概述
    LeetCode 最大连续子数列值
    198. LeetCode 打家劫舍
    git自定义关键字
  • 原文地址:https://www.cnblogs.com/gongzhuiau/p/15241474.html
Copyright © 2011-2022 走看看