zoukankan      html  css  js  c++  java
  • java爬虫(八)使用node.js获取network中api接口内信息并用java的jsoup重写该方法

    1.电脑安装node.js  点击官网传送门

    2.在浏览器中(我用了一个谷歌内核的浏览器)找到自己url api 右键-->copy-->copy as nodejs fetch

    (打开网页的审查元素后如果找不多url尝试刷新页面)

     3.将代码粘贴进js文件中(我用的记事本)需要对代码进行简单的修改

    修改的内容为:头部定义fetch变量,尾部输出结果,代码如下:

    粘贴出来的代码:

    fetch("http://ehall.tjut.edu.cn/publicapp/sys/zxzxapp/consult/queryConsultingList.do", {
      "headers": {
        "accept": "*/*",
        "accept-language": "zh-CN,zh;q=0.9",
        "content-type": "application/x-www-form-urlencoded; charset=UTF-8",
        "x-requested-with": "XMLHttpRequest",
        "cookie": "EMAP_LANG=zh; _WEU=0lGU9nUPyZ9qx*Rn4K9rs02ZG7l70bbzRFO5mUgwGPNPYxv1E*sNledwwPblS1lfd5Ik_YiYW3vIpD3LYICxSVT8oAgUTnE3MSHiJQzRFU7hbwhsp2gIy0OWEnvRY2eX8lSq0pRdz_2.; iPlanetDirectoryPro=kAXrTrPpkhEvxffxPsHPss; amp.locale=undefined; route=8da53839b22816a2e9746dc2f57870c1; MOD_AUTH_CAS=MOD_AUTH_ST-1453563-6C0UWnfbrqXCCeeFLV9u1609300648527-21wH-cas; zg_did=%7B%22did%22%3A%20%22174575a6eca420-0b1ea05958cdee-51a2f73-1fa400-174575a6ecb376%22%7D; zg_=%7B%22sid%22%3A%201609300649292%2C%22updated%22%3A%201609300649297%2C%22info%22%3A%201608798758280%2C%22superProperty%22%3A%20%22%7B%7D%22%2C%22platform%22%3A%20%22%7B%7D%22%2C%22utm%22%3A%20%22%7B%7D%22%2C%22referrerDomain%22%3A%20%22ehall.tjut.edu.cn%22%2C%22cuid%22%3A%20%22203128301%22%2C%22zs%22%3A%200%2C%22sc%22%3A%200%2C%22firstScreen%22%3A%201609300649292%7D; asessionid=f025e83f-cdc2-4206-8bef-d521c5dfb7d2; JSESSIONID=8nexyyMRuvuF9w5FdY-wnfhPlVhKKLLKtmAHz5m3FQur3psQlbvJ!1969776676"
      },
      "referrer": "http://ehall.tjut.edu.cn/publicapp/sys/zxzxapp/index.do",
      "referrerPolicy": "no-referrer-when-downgrade",
      "body": "consultZone=ALL&search=&consultState=0&pageNumber=1&pageSize=10",
      "method": "POST",
      "mode": "cors"
    });

    修改后的代码:

    const fetch = require('node-fetch')
    fetch("http://ehall.tjut.edu.cn/publicapp/sys/zxzxapp/consult/queryConsultingList.do", {
      "headers": {
        "accept": "*/*",
        "accept-language": "zh-CN,zh;q=0.9",
        "content-type": "application/x-www-form-urlencoded; charset=UTF-8",
        "x-requested-with": "XMLHttpRequest",
        "cookie": "EMAP_LANG=zh; _WEU=0lGU9nUPyZ9qx*Rn4K9rs02ZG7l70bbzRFO5mUgwGPNPYxv1E*sNledwwPblS1lfd5Ik_YiYW3vIpD3LYICxSVT8oAgUTnE3MSHiJQzRFU7hbwhsp2gIy0OWEnvRY2eX8lSq0pRdz_2.; iPlanetDirectoryPro=kAXrTrPpkhEvxffxPsHPss; amp.locale=undefined; route=8da53839b22816a2e9746dc2f57870c1; MOD_AUTH_CAS=MOD_AUTH_ST-1453563-6C0UWnfbrqXCCeeFLV9u1609300648527-21wH-cas; zg_did=%7B%22did%22%3A%20%22174575a6eca420-0b1ea05958cdee-51a2f73-1fa400-174575a6ecb376%22%7D; zg_=%7B%22sid%22%3A%201609300649292%2C%22updated%22%3A%201609300649297%2C%22info%22%3A%201608798758280%2C%22superProperty%22%3A%20%22%7B%7D%22%2C%22platform%22%3A%20%22%7B%7D%22%2C%22utm%22%3A%20%22%7B%7D%22%2C%22referrerDomain%22%3A%20%22ehall.tjut.edu.cn%22%2C%22cuid%22%3A%20%22203128301%22%2C%22zs%22%3A%200%2C%22sc%22%3A%200%2C%22firstScreen%22%3A%201609300649292%7D; asessionid=f025e83f-cdc2-4206-8bef-d521c5dfb7d2; JSESSIONID=8nexyyMRuvuF9w5FdY-wnfhPlVhKKLLKtmAHz5m3FQur3psQlbvJ!1969776676"
      },
      "referrer": "http://ehall.tjut.edu.cn/publicapp/sys/zxzxapp/index.do",
      "referrerPolicy": "no-referrer-when-downgrade",
      "body": "consultZone=ALL&search=&consultState=0&pageNumber=1&pageSize=10",
      "method": "POST",
      "mode": "cors"
    }).then(res=>res.json()).then(json=>console.log(json))

    4.运行代码

    (初次使用会报没有node-fetch这个包直接在cmd中用命令安装:npm install node-fetch即可)

    cmd命令行中有两种node.js的运行方式

    第一种:用node命令进入环境 然后逐句编写运行

    第二种:用运行写好的node.js文件

    在cmd中使用node+文件名.js即可

     5.只用jsoup重写该方法

    通过不停地注释代码查看运行结果,我们发现了很多冗余参数,经过删减后的node.js代码如下

    const fetch = require('node-fetch')
    
    fetch("http://ehall.tjut.edu.cn/publicapp/sys/zxzxapp/consult/queryConsultingList.do", {
      "headers": {
        "content-type": "application/x-www-form-urlencoded; charset=UTF-8",
        "cookie": "EMAP_LANG=zh; _WEU=0lGU9nUPyZ9qx*Rn4K9rs02ZG7l70bbzRFO5mUgwGPNPYxv1E*sNledwwPblS1lfd5Ik_YiYW3vIpD3LYICxSVT8oAgUTnE3MSHiJQzRFU7hbwhsp2gIy0OWEnvRY2eX8lSq0pRdz_2.; iPlanetDirectoryPro=kAXrTrPpkhEvxffxPsHPss; amp.locale=undefined; route=8da53839b22816a2e9746dc2f57870c1; MOD_AUTH_CAS=MOD_AUTH_ST-1453563-6C0UWnfbrqXCCeeFLV9u1609300648527-21wH-cas; zg_did=%7B%22did%22%3A%20%22174575a6eca420-0b1ea05958cdee-51a2f73-1fa400-174575a6ecb376%22%7D; zg_=%7B%22sid%22%3A%201609300649292%2C%22updated%22%3A%201609300649297%2C%22info%22%3A%201608798758280%2C%22superProperty%22%3A%20%22%7B%7D%22%2C%22platform%22%3A%20%22%7B%7D%22%2C%22utm%22%3A%20%22%7B%7D%22%2C%22referrerDomain%22%3A%20%22ehall.tjut.edu.cn%22%2C%22cuid%22%3A%20%22203128301%22%2C%22zs%22%3A%200%2C%22sc%22%3A%200%2C%22firstScreen%22%3A%201609300649292%7D; asessionid=f025e83f-cdc2-4206-8bef-d521c5dfb7d2; JSESSIONID=8nexyyMRuvuF9w5FdY-wnfhPlVhKKLLKtmAHz5m3FQur3psQlbvJ!1969776676"
      },
      "body": "consultState=0&pageNumber=1&pageSize=10",
      "method": "POST",
    }).then(res=>res.json()).then(json=>console.log(json))

    使用jsoup转写后如下:

    转写过程中遇到的问题:

    1.网页头文件中form data中的数据需要用.data( )进行赋值

    2.报错:Jsoup Unhandled content type 原因是头部信息中的部分类型不符合要求

    解决方法:添加头部信息  .ignoreContentType(true)

    package debug;
    
    import org.jsoup.Jsoup;
    import org.jsoup.nodes.Document;
    
    public class Myhttpclient {
        
        public static void querryhtml(String loginUrl) throws Exception{
    
            Document document = Jsoup.connect(loginUrl)
                    // 手动设置cookies
                    .header("Content-Type","application/x-www-form-urlencoded; charset=UTF-8")
                    .ignoreContentType(true)
                    .header("Cookie","EMAP_LANG=zh; _WEU=0lGU9nUPyZ9qx*Rn4K9rs02ZG7l70bbzRFO5mUgwGPNPYxv1E*sNledwwPblS1lfd5Ik_YiYW3vIpD3LYICxSVT8oAgUTnE3MSHiJQzRFU7hbwhsp2gIy0OWEnvRY2eX8lSq0pRdz_2.; iPlanetDirectoryPro=kAXrTrPpkhEvxffxPsHPss; amp.locale=undefined; route=8da53839b22816a2e9746dc2f57870c1; MOD_AUTH_CAS=MOD_AUTH_ST-1453563-6C0UWnfbrqXCCeeFLV9u1609300648527-21wH-cas; zg_did=%7B%22did%22%3A%20%22174575a6eca420-0b1ea05958cdee-51a2f73-1fa400-174575a6ecb376%22%7D; zg_=%7B%22sid%22%3A%201609300649292%2C%22updated%22%3A%201609300649297%2C%22info%22%3A%201608798758280%2C%22superProperty%22%3A%20%22%7B%7D%22%2C%22platform%22%3A%20%22%7B%7D%22%2C%22utm%22%3A%20%22%7B%7D%22%2C%22referrerDomain%22%3A%20%22ehall.tjut.edu.cn%22%2C%22cuid%22%3A%20%22203128301%22%2C%22zs%22%3A%200%2C%22sc%22%3A%200%2C%22firstScreen%22%3A%201609300649292%7D; asessionid=f025e83f-cdc2-4206-8bef-d521c5dfb7d2; JSESSIONID=8nexyyMRuvuF9w5FdY-wnfhPlVhKKLLKtmAHz5m3FQur3psQlbvJ!1969776676")
    //                .header("User-Agent","Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198Safari/537.36")
                    .data("consultState","0")
                    .data("pageSize","10")
                    .data("pageNumber","1")
                    .post();    
            
            System.out.println(document);    
        }
    }

    运行结果:

     7.目前唯一的问题就是cookies的有效时间问题,解决方法:通过定时登陆主页获取cookies来传递给该api

  • 相关阅读:
    css+ul+li方式 横向再纵向排列
    b表中有的塞给a表
    .net remoting的两种实现方式 cow
    Prism之Module cow
    2012项目总结 cow
    WCF学习笔记 cow
    也谈委托,事件和回调 cow
    理清apply(),call()的区别和关系 cow
    CLR via C#学习之线程栈,托管堆,值类型和引用类型 cow
    细说系列笔记 cow
  • 原文地址:https://www.cnblogs.com/StarZhai/p/14210851.html
Copyright © 2011-2022 走看看