zoukankan      html  css  js  c++  java
  • node爬取页面元素

    
    
    /**
    * Created by on 2018/12/25.
    */
    const http = require("https");
    const fs = require('fs');

    //读取html插件
    const cheerio = require("cheerio");

    //读取的文件路径
    const filePath = './山炮表格.xlsx';

    //表格插件
    const xlsx = require('node-xlsx');


    const sheets = xlsx.parse('./山炮表格.xlsx');//获取到所有sheets


    let allUserId = [];
    sheets.forEach(function (sheet) {
    // console.log(sheet['name']);
    for (var rowId in sheet['data']) {
    // console.log(rowId);
    var row = sheet['data'][rowId];
    // console.log(row);
    allUserId.push(row);
    }
    });
    // console.log(allUserId);

    //微博搜索用户页
    const findUserUrl = 'https://s.weibo.com/user';
    let hasVUser = []
    let index = 1;
    function getV(allIds, index) {
    let theCurrentId = allIds[index][1];
    let theUrl = 'https://s.weibo.com/user?q=' + theCurrentId + '&Refer=SUer_box';
    console.log(theUrl)
    http.get(''+theUrl, res => {
    let html = "";
    // 获取页面数据
    res.on("data", function (data) {
    html += data;
    });
    // 数据获取结束
    res.on("end", function () {
    let $ = cheerio.load(html);
    // console.log($('.icon-vip').length);
    if($('.icon-vip').length>0){
    hasVUser.push(theCurrentId);
    }
    if(index<allIds.length-1){
    index++
    getV(allIds,index);
    }else{
    console.log(hasVUser,'所有带v')
    }
    });
    });
    }

    getV(allUserId,index);
    });
    {
      "dependencies": {},
      "devDependencies": {
        "cheerio": "^1.0.0-rc.2",
        "fs": "0.0.1-security",
        "node-xlsx": "^0.12.1"
      }
    }
  • 相关阅读:
    GDB Practice
    GCC常用命令
    使用VS2010 C#编写ActiveX控件
    [.NET] 使用 .NET Framework 開發 ActiveX Control
    VC2005开发MFC ActiveX控件
    Register DLL and OCX
    COM组件开发实践
    Java Invoke C and C++ Using JNI
    Unable to cast object of type 'System.Int32' to type 'System.String'.
    SharePoint wiki 分类获取所有的
  • 原文地址:https://www.cnblogs.com/1rookie/p/10176216.html
Copyright © 2011-2022 走看看