zoukankan      html  css  js  c++  java
  • node.js爬取ajax接口数据

    爬取页面数据与爬取接口数据,我还是觉得爬取接口数据更加简单一点,主要爬取一些分页的数据。

    爬取步骤:

    1.明确目标接口地址,举个例子 : https://www.vcg.com/api/common/searchImage?phrase=%E6%98%A5%E5%A4%A9&graphicalStyle%5B0%5D=1&page=1 网上随便找到  视觉中国的一个网址

    这个网址上的图片非常好看 

    2.接口返回的数据都是json数据。很统一,处理起来也很便捷。撇开奇葩接口不说。

    3.只需要伪造好请求头就可访问,这是重点。

    接口参数是     

    原始数据

    下面是我爬取代码

    //引入自己封装的链接数据模块
    let mysql = require('./connectdatabase.js');
    //SuperAgent是一个轻量级、灵活的、易读的、低学习曲线的客户端请求代理模块,使用在NodeJS环境中。
    let superagent = require('superagent');
    var fs = require('fs');
    
    // .set( "Accept", "application/json")
    //     .set( "Cookie", "acw_tc=276aedea15548801849954091e3094245e35937d42d912140fa37630751eeb; clientIp=122.235.188.119; source=baidusem; sajssdk_2015_cross_new_user=1; _ga=GA1.2.109353021.1554880168; _gid=GA1.2.1994909913.1554880168; Hm_lvt_0af14db9b5993b4879812c54f6cf147d=1554880168; sensorsdata2015jssdkcross=%7B%22distinct_id%22%3A%2216a06147ef76c7-04ba8d1655b9ab-191f7059-2073600-16a06147ef8279%22%2C%22%24device_id%22%3A%2216a06147ef76c7-04ba8d1655b9ab-191f7059-2073600-16a06147ef8279%22%2C%22props%22%3A%7B%22%24latest_referrer%22%3A%22%22%2C%22%24latest_referrer_host%22%3A%22%22%2C%22%24latest_traffic_source_type%22%3A%22%E7%9B%B4%E6%8E%A5%E6%B5%81%E9%87%8F%22%2C%22%24latest_search_keyword%22%3A%22%E6%9C%AA%E5%8F%96%E5%88%B0%E5%80%BC_%E7%9B%B4%E6%8E%A5%E6%89%93%E5%BC%80%22%2C%22%24latest_utm_source%22%3A%22baidusem%22%2C%22%24latest_utm_medium%22%3A%22cpc%22%2C%22%24latest_utm_campaign%22%3A%22%E7%AB%99%E5%86%85%E6%90%9C%E7%B4%A2%E8%AF%8D-%E6%A0%B8%E5%BF%83%22%2C%22%24latest_utm_content%22%3A%22%E7%AB%99%E5%86%85%E6%90%9C%E7%B4%A2%E8%AF%8D-%E5%9B%BE%E7%89%87%22%2C%22%24latest_utm_term%22%3A%22%E5%9B%BE%E7%89%87%22%7D%7D; Hm_lpvt_0af14db9b5993b4879812c54f6cf147d=1554880467")
    //     .set( "X-Forwarded-For","122.235.188.119")
    //     .set( "Host", "www.vcg.com")
    //     .set( "Accept-Encoding","gzip, deflate, br")
    //     .set( "Referer", url)
    function get(url) {
      superagent.get(url)
        .set(
          {
            "Accept": "application/json",
            "Accept-Encoding": "gzip, deflate, br",
            "Host": "www.vcg.com",
            "Referer": 'https://www.vcg.com/api/common/search?phrase=%E6%98%A5%E5%A4%A9&graphicalStyle%5B0%5D=1&page=1',
            "X-Forwarded-For": "122.235.188.119",
            "Cookie": "acw_tc=276aedea15548801849954091e3094245e35937d42d912140fa37630751eeb; clientIp=122.235.188.119; source=baidusem; sajssdk_2015_cross_new_user=1; _ga=GA1.2.109353021.1554880168; _gid=GA1.2.1994909913.1554880168; Hm_lvt_0af14db9b5993b4879812c54f6cf147d=1554880168; sensorsdata2015jssdkcross=%7B%22distinct_id%22%3A%2216a06147ef76c7-04ba8d1655b9ab-191f7059-2073600-16a06147ef8279%22%2C%22%24device_id%22%3A%2216a06147ef76c7-04ba8d1655b9ab-191f7059-2073600-16a06147ef8279%22%2C%22props%22%3A%7B%22%24latest_referrer%22%3A%22%22%2C%22%24latest_referrer_host%22%3A%22%22%2C%22%24latest_traffic_source_type%22%3A%22%E7%9B%B4%E6%8E%A5%E6%B5%81%E9%87%8F%22%2C%22%24latest_search_keyword%22%3A%22%E6%9C%AA%E5%8F%96%E5%88%B0%E5%80%BC_%E7%9B%B4%E6%8E%A5%E6%89%93%E5%BC%80%22%2C%22%24latest_utm_source%22%3A%22baidusem%22%2C%22%24latest_utm_medium%22%3A%22cpc%22%2C%22%24latest_utm_campaign%22%3A%22%E7%AB%99%E5%86%85%E6%90%9C%E7%B4%A2%E8%AF%8D-%E6%A0%B8%E5%BF%83%22%2C%22%24latest_utm_content%22%3A%22%E7%AB%99%E5%86%85%E6%90%9C%E7%B4%A2%E8%AF%8D-%E5%9B%BE%E7%89%87%22%2C%22%24latest_utm_term%22%3A%22%E5%9B%BE%E7%89%87%22%7D%7D; Hm_lpvt_0af14db9b5993b4879812c54f6cf147d=1554880467",
          }
        )
        .end(function (err, res) {
          // 抛错拦截
          if (err) {
            throw Error(err);
            return
          }
          // console.log(res);
          fs.appendFile("./接口数据.txt", JSON.stringify(res.text), 'utf-8', function (err) {
            if (err) {
              console.log(err);
            }
          });
        });
    }
    
    get('https://www.vcg.com/api/common/searchImage?phrase=%E6%98%A5%E5%A4%A9&graphicalStyle%5B0%5D=1&page=1');
    View Code

    这样就爬取到了第一页的数据   并且在本地创建了一个接口数据的txt文件 。

    这样一个简单的接口数据就已经爬下来了,对于数据的处理就不多废话,各取所需。

    本人qq : 981900309  加备注博客园

  • 相关阅读:
    浮点数大于0
    坐标变换
    实战c++中的vector系列--正确释放vector的内存(clear(), swap(), shrink_to_fit())
    计算sigma
    ECharts 在winform中使用(访问JS)
    Linux用户锁定、解锁及锁定查看
    vue或者js中平均分割数组
    python 文件读写with open模式r,r+,w,w+,a,a+的区别
    vue 三元表达式当出现elif
    后端排序时去掉element表格排序的null状态
  • 原文地址:https://www.cnblogs.com/caihua0405/p/10683949.html
Copyright © 2011-2022 走看看