zoukankan      html  css  js  c++  java
  • jsoup抓取借书记录

      1 package tushuguan; 
      2 
      3 import java.io.IOException;  
      4 import java.util.ArrayList;  
      5 import java.util.HashMap;  
      6 import java.util.Iterator;  
      7 import java.util.List;  
      8 import java.util.Set;  
      9   
     10 import org.apache.http.Header;  
     11 import org.apache.http.HeaderElement;  
     12 import org.apache.http.HttpEntity;  
     13 import org.apache.http.HttpResponse;  
     14 import org.apache.http.NameValuePair;  
     15 import org.apache.http.ParseException;  
     16 import org.apache.http.client.ClientProtocolException;  
     17 import org.apache.http.client.entity.UrlEncodedFormEntity;  
     18 import org.apache.http.client.methods.HttpGet;  
     19 import org.apache.http.client.methods.HttpPost;  
     20 import org.apache.http.client.params.ClientPNames;  
     21 import org.apache.http.impl.client.DefaultHttpClient;  
     22 import org.apache.http.message.BasicNameValuePair;  
     23 import org.apache.http.util.EntityUtils;  
     24 import org.jsoup.Jsoup;  
     25 import org.jsoup.nodes.Document;  
     26 import org.jsoup.nodes.Element;  
     27 import org.jsoup.select.Elements;  
     28   
     29 public class tushuguan {  
     30    private static String LoginUrl = "http://222.200.98.171:81/login.aspx";  
     31    private static String Host = "http://222.200.98.171:81";  
     32    private static String mainUrl = "";  
     33    private static String borrowedBooksUrl = "";  
     34    private static String cookie = "";  
     35    private static String location = "";  
     36  
     37    /** 
     38     * @param args 
     39     */  
     40    public static void main(String[] args) {  
     41        // TODO Auto-generated method stub  
     42        getMyBorrowedBooks();  
     43    }  
     44  
     45    public static void getMyBorrowedBooks() {  
     46        try {  
     47            Document document = Jsoup.parse(login());  
     48            Elements elements1 = document  
     49                    .getElementsContainingOwnText("当前借阅情况和续借");// 通过text关键字找到所要的<a>标签  
     50            String url = elements1.first().attr("href");  
     51            for(int i=1;i<=4;i++){
     52            borrowedBooksUrl = "http://222.200.98.171:81/user/bookborrowedhistory.aspx?page="+i;// 取值和mainUrl进行拼凑组织借阅情况地址  
     53            System.out.println("链接如下:"+borrowedBooksUrl);
     54            getBookBorrowedData(getHtml(borrowedBooksUrl));  
     55            }
     56  
     57        } catch (IOException e) {  
     58            // TODO Auto-generated catch block  
     59            e.printStackTrace();  
     60        }  
     61    }  
     62  
     63    /** 
     64     * 获取借书情况具体数据(List<BookEntity>) 
     65     *  
     66     * @param src 
     67     * @return List<BookEntity> 
     68     */  
     69    private static List<BookEntity> getBookBorrowedData(String src) {  
     70        List<BookEntity> data = new ArrayList<BookEntity>();  
     71        Document document = Jsoup.parse(src);  
     72        Element element = document.select("[id=UserMasterRight]").first()  
     73                .getElementsByTag("table").first();  
     74        Elements elements2 = element.getElementsByTag("tr");  
     75        for (Element temp2 : elements2) {  
     76            Elements elements3 = temp2.getElementsByTag("td");  
     77            BookEntity entity = new tushuguan().new BookEntity()  
     78                    .setIsFullData(elements3.get(4).text())  
     79                    .setData2Return(elements3.get(1).text())  
     80                    .setName(elements3.get(2).text())  
     81                    .setData2Borrowed(elements3.get(0).text());  
     82            data.add(entity);  
     83  
     84        }  
     85        data.remove(0);  
     86        System.out.println("借书情况
    ");  
     87  
     88        for (BookEntity temp : data) {  
     89            System.out.println(temp.getName() + "
    " + temp.getData2Borrowed()  
     90                    + "
    " + temp.getData2Return() + "
    "  
     91                    + temp.getIsFullData());  
     92        }  
     93        return data;  
     94  
     95    }  
     96  
     97    /** 
     98     * 图书馆登陆 
     99     *  
    100     * @param context 
    101     * @return 返回登陆后的界面Html代码 
    102     * @throws ClientProtocolException 
    103     * @throws IOException 
    104     */  
    105    public static String login() throws ClientProtocolException, IOException {  
    106        List<NameValuePair> parmasList = new ArrayList<NameValuePair>();  
    107        parmasList = initLoginParmas("3113003802", "092137");  
    108        HttpPost post = new HttpPost(LoginUrl);  
    109        post.getParams().setParameter(ClientPNames.HANDLE_REDIRECTS, false);  
    110        // 阻止自动重定向,目的是获取第一个ResponseHeader的Cookie和Location  
    111        post.setHeader("Content-Type",  
    112                "application/x-www-form-urlencoded;charset=gbk");  
    113        // 设置编码为GBK  
    114        post.setEntity(new UrlEncodedFormEntity(parmasList, "GBK"));  
    115        HttpResponse response = new DefaultHttpClient().execute(post);  
    116        cookie = response.getFirstHeader("Set-Cookie").getValue();  
    117        // 取得cookie并保存起来  
    118        // System.out.println("cookie= " + cookie);  
    119        location = response.getFirstHeader("Location").getValue();  
    120        // 重定向地址,目的是连接到主页  
    121        mainUrl = Host + location;  
    122        // 构建主页地址  
    123        String html = getHtml(mainUrl);  
    124        return html;  
    125  
    126    }  
    127  
    128    /** 
    129     * 获取网页HTML源代码 
    130     *  
    131     * @param url 
    132     * @return  
    133     * @throws ParseException 
    134     * @throws IOException 
    135     */  
    136  
    137    private static String getHtml(String url) throws ParseException,  
    138            IOException {  
    139        // TODO Auto-generated method stub  
    140        HttpGet get = new HttpGet(url);  
    141        if ("" != cookie) {  
    142            get.addHeader("Cookie", cookie);  
    143        }  
    144        HttpResponse httpResponse = new DefaultHttpClient().execute(get);  
    145        HttpEntity entity = httpResponse.getEntity();  
    146        return EntityUtils.toString(entity);  
    147    }  
    148  
    149    /** 
    150     * 初始化参数 
    151     *  
    152     * @param userName 
    153     * @param passWord 
    154     * @return  
    155     * @throws ParseException 
    156     * @throws IOException 
    157     */  
    158    public static List<NameValuePair> initLoginParmas(String userName,  
    159            String passWord) throws ParseException, IOException {  
    160        List<NameValuePair> parmasList = new ArrayList<NameValuePair>();  
    161        HashMap<String, String> parmasMap = getLoginFormData(LoginUrl);  
    162        Set<String> keySet = parmasMap.keySet();  
    163  
    164        for (String temp : keySet) {  
    165            if (temp.contains("Username")) {  
    166                parmasMap.put(temp, userName);  
    167            } else if (temp.contains("txtPas")) {  
    168                parmasMap.put(temp, passWord);  
    169            }  
    170        }  
    171  
    172        Set<String> keySet2 = parmasMap.keySet();  
    173        System.out.println("表单内容:");  
    174        for (String temp : keySet2) {  
    175            System.out.println(temp + " = " + parmasMap.get(temp));  
    176        }  
    177        for (String temp : keySet2) {  
    178            parmasList.add(new BasicNameValuePair(temp, parmasMap.get(temp)));  
    179        }  
    180  
    181        // System.out.println("initParams 
    " + parmasMap);  
    182  
    183        return parmasList;  
    184  
    185    }  
    186  
    187    /** 
    188     * 获取登录表单input内容 
    189     *  
    190     * @param url 
    191     * @return  
    192     * @throws IOException 
    193     * @throws ParseException 
    194     */  
    195    public static HashMap<String, String> getLoginFormData(String url)  
    196            throws ParseException, IOException {  
    197        Document document = Jsoup.parse(getHtml(url));  
    198        Elements element1 = document.getElementsByTag("form");// 找出所有form表单  
    199        Element element = element1.select("[method=post]").first();// 筛选出提交方法为post的表单  
    200        Elements elements = element.select("input[name]");// 把表单中带有name属性的input标签取出  
    201        HashMap<String, String> parmas = new HashMap<String, String>();  
    202        for (Element temp : elements) {  
    203            parmas.put(temp.attr("name"), temp.attr("value"));// 把所有取出的input,取出其name,放入Map中  
    204        }  
    205        return parmas;  
    206    }  
    207  
    208    class BookEntity {  
    209        /** 
    210         * 书名 
    211         *  
    212         */  
    213        private String name;  
    214        /** 
    215         * 可借数 
    216         */  
    217        private String leandableNum;  
    218        /** 
    219         * 索引号 
    220         */  
    221        private String callNumber;  
    222        /** 
    223         * 作者 
    224         */  
    225        private String writer;  
    226        /** 
    227         * 出版社 
    228         */  
    229        private String publisher;  
    230        /** 
    231         * 还书时间 
    232         */  
    233        private String data2Return;  
    234        /** 
    235         * 借书时间 
    236         */  
    237        private String data2Borrowed;  
    238        /** 
    239         * 是否续满 
    240         */  
    241        private String isFullData;  
    242  
    243        public BookEntity() {  
    244  
    245        }  
    246  
    247        public String getName() {  
    248            return name;  
    249        }  
    250  
    251        public String getLeandableNum() {  
    252            return leandableNum;  
    253        }  
    254  
    255        public String getCallNumber() {  
    256            return callNumber;  
    257        }  
    258  
    259        public String getWriter() {  
    260            return writer;  
    261        }  
    262  
    263        public String getPublisher() {  
    264            return publisher;  
    265        }  
    266  
    267        public BookEntity setName(String name) {  
    268            this.name = name;  
    269            return this;  
    270        }  
    271  
    272        public BookEntity setLeandableNum(String leandableNum) {  
    273            this.leandableNum = leandableNum;  
    274            return this;  
    275        }  
    276  
    277        public BookEntity setCallNumber(String callNumber) {  
    278            this.callNumber = callNumber;  
    279            return this;  
    280        }  
    281  
    282        public BookEntity setWriter(String writer) {  
    283            this.writer = writer;  
    284            return this;  
    285        }  
    286  
    287        public BookEntity setPublisher(String publisher) {  
    288            this.publisher = publisher;  
    289            return this;  
    290        }  
    291  
    292        public String getData2Return() {  
    293            return data2Return;  
    294        }  
    295  
    296        public String getData2Borrowed() {  
    297            return data2Borrowed;  
    298        }  
    299  
    300        public String getIsFullData() {  
    301            return isFullData;  
    302        }  
    303  
    304        public BookEntity setData2Return(String data2Return) {  
    305            this.data2Return = data2Return;  
    306            return this;  
    307        }  
    308  
    309        public BookEntity setData2Borrowed(String data2Borrowed) {  
    310            this.data2Borrowed = data2Borrowed;  
    311            return this;  
    312        }  
    313  
    314        public BookEntity setIsFullData(String isFullData) {  
    315            this.isFullData = isFullData;  
    316            return this;  
    317        }  
    318  
    319    }  
    320  
    321 }  

    结果如下:

    表单内容:
    __VIEWSTATE = /wEPDwULLTE0MjY3MDAxNzcPZBYCZg9kFgoCAQ8PFgIeCEltYWdlVXJsBRt+XGltYWdlc1xoZWFkZXJvcGFjNGdpZi5naWZkZAICDw8WAh4EVGV4dAUt5bm/5Lic5bel5Lia5aSn5a2m5Zu+5Lmm6aaG5Lmm55uu5qOA57Si57O757ufZGQCAw8PFgIfAQUcMjAxNeW5tDEy5pyIMjHml6UgIOaYn+acn+S4gGRkAgQPZBYEZg9kFgQCAQ8WAh4LXyFJdGVtQ291bnQCCBYSAgEPZBYCZg8VAwtzZWFyY2guYXNweAAM55uu5b2V5qOA57SiZAICD2QWAmYPFQMTcGVyaV9uYXZfY2xhc3MuYXNweAAM5YiG57G75a+86IiqZAIDD2QWAmYPFQMOYm9va19yYW5rLmFzcHgADOivu+S5puaMh+W8lWQCBA9kFgJmDxUDCXhzdGIuYXNweAAM5paw5Lmm6YCa5oqlZAIFD2QWAmYPFQMUcmVhZGVycmVjb21tZW5kLmFzcHgADOivu+iAheiNkOi0rWQCBg9kFgJmDxUDE292ZXJkdWVib29rc19mLmFzcHgADOaPkOmGkuacjeWKoWQCBw9kFgJmDxUDEnVzZXIvdXNlcmluZm8uYXNweAAP5oiR55qE5Zu+5Lmm6aaGZAIID2QWAmYPFQMbaHR0cDovL2xpYnJhcnkuZ2R1dC5lZHUuY24vAA/lm77kuabppobpppbpobVkAgkPZBYCAgEPFgIeB1Zpc2libGVoZAIDDxYCHwJmZAIBD2QWBAIDD2QWBAIBDw9kFgIeDGF1dG9jb21wbGV0ZQUDb2ZmZAIHDw8WAh8BZWRkAgUPZBYGAgEPEGRkFgFmZAIDDxBkZBYBZmQCBQ8PZBYCHwQFA29mZmQCBQ8PFgIfAQWlAUNvcHlyaWdodCAmY29weTsyMDA4LTIwMDkuIFNVTENNSVMgT1BBQyA0LjAxIG9mIFNoZW56aGVuIFVuaXZlcnNpdHkgTGlicmFyeS4gIEFsbCByaWdodHMgcmVzZXJ2ZWQuPGJyIC8+54mI5p2D5omA5pyJ77ya5rex5Zyz5aSn5a2m5Zu+5Lmm6aaGIEUtbWFpbDpzenVsaWJAc3p1LmVkdS5jbmRkZBFPBFe3T/k7AJVSx8iKDmNVbdHT
    ctl00$ContentPlaceHolder1$txtPas_Lib = 你猜你猜
    ctl00$ContentPlaceHolder1$btnLogin_Lib = 登录
    ctl00$ContentPlaceHolder1$txtlogintype = 0
    ctl00$ContentPlaceHolder1$txtUsername_Lib = 3113003802
    __EVENTVALIDATION = /wEWBQKs47i8AwKOmK5RApX9wcYGAsP9wL8JAqW86pcIDebecgohSzUlmvgecvTU4k49zAw=
    链接如下:http://222.200.98.171:81/user/bookborrowedhistory.aspx?page=1
    借书情况
    
    回乡记 [专著]/贺雪峰主编
    2015-09-01
    2015-10-15
    A3226253
    土木工程CAD基础 [专著]:AutoCAD软件基础教程=CAD foundation of civil engineering:AutoCAD software basic course/邓芃主编
    2015-07-20
    2015-10-15
    A3138201
    李光耀传 [专著]/凌翔著
    2015-07-20
    2015-10-15
    A3210306
    工程CAD基础理论与上机操作习题集 [专著]/于奕峰,杨松林主编
    2015-07-20
    2015-10-15
    A3258522
    消失的17岁 [专著]/(美) 诺瓦·伦·苏玛著=17 & gone/Nova Ren Suma;刘丽洁译
    2015-06-03
    2015-09-01
    A3213437
    汤姆叔叔的小屋 [专著]=Uncle tom's cabin:插图·中文导读英文版/(美)比彻·斯托夫人著;王勋,纪飞等编译
    2015-03-27
    2015-06-01
    A3002490
    商务口译 [专著]=Business interpreting/刘建珠主编
    2015-03-27
    2015-06-01
    A3003500
    2014年季度精选集 [汇编]·春季卷/《读者·乡土人文版》编辑部主编
    2015-03-27
    2015-06-01
    A3210150
    可口可乐不规则营销 [专著]/(美)洛威尔著;龙文元译
    2015-03-17
    2015-06-16
    A1501833
    工程经济学 [专著]/关罡, 郝彤主编
    2015-03-17
    2015-04-29
    A3109697
    链接如下:http://222.200.98.171:81/user/bookborrowedhistory.aspx?page=2
    借书情况
    
    讴歌母爱 关注人生 [专著]:冰心小说全集/冰 心著
    2015-03-03
    2015-06-01
    A5143376
    林徽因小说:九十九度中/林徽因[著];陈学勇编选
    2015-03-03
    2015-06-01
    A5188772
    骆驼祥子·黑白李 [专著]/老舍著
    2015-03-03
    2015-06-01
    A0957524
    1937年的爱情 [专著]/叶兆言著
    2014-11-27
    2015-01-10
    A1509614
    理工大风流往事 [专著]/zt著
    2014-11-27
    2014-12-16
    A1847222
    酒殇 [专著]:一个酒业王国的兴衰/杨小凡著
    2014-11-27
    2015-01-10
    A1948680
    那时年少 [专著]/一草著
    2014-11-27
    2014-12-16
    A2992422
    不能承受的生命之轻 [专著]/(捷克斯洛伐克)米兰·昆德拉(Milan Kundera)著=L'insoutenable legerete de l'etre/许钧译
    2014-11-18
    2015-01-10
    A0520872
    读者精华本 [汇编]/万文海主编
    2014-11-18
    2015-01-10
    A1547276
    谁在让子弹飞 [专著]/曹保印著
    2014-11-18
    2014-12-16
    A3147373
    链接如下:http://222.200.98.171:81/user/bookborrowedhistory.aspx?page=3
    借书情况
    
    孤独是不人道的 [专著]/郭鹏著
    2014-11-18
    2014-12-16
    A3147367
    且听风吟 [专著]/(日)村上春树著;林少华译
    2014-09-23
    2014-11-10
    A2516969
    可怕的巧合 [专著]/石岩编著
    2014-09-23
    2014-11-13
    A3158433
    你好,总统 [专著]:乌戈·查韦斯与他的委内瑞拉=Comandante:inside Hugo Chavez's venezuela/(英)洛里·卡洛尔(Rory Carroll)著;徐天鹏译
    2014-09-23
    2014-11-13
    A3129490
    肝胆相照 [专著]:吴孟超传/方鸿辉著
    2014-09-23
    2014-11-10
    A3139385
    林徽因经典作品 [专著]:你是人间的四月天九十九度中/林徽因著
    2014-05-19
    2014-07-10
    A2386519
    梁思成的山河岁月 [专著]/林与舟编著
    2014-05-19
    2014-05-27
    A1210449
    人物中国 [汇编]/龚莉主编;《人物中国》编委会编
    2014-05-19
    2014-07-10
    A2603584
    百年大案追踪 [专著]/郭学德,崔爱鹏,李海涛著
    2014-04-24
    2014-06-11
    A0283139
    聚焦名人名案 [专著]/窦欣平,叶知秋著
    2014-04-24
    2014-06-11
    A0547714
    链接如下:http://222.200.98.171:81/user/bookborrowedhistory.aspx?page=4
    借书情况
    
    孙子兵法经典故事 [专著]/李济生编著
    2014-04-24
    2014-06-17
    A0565277
    危险游戏 [汇编]:典型犯罪案例评说/郭春孚,张翔鹰主编
    2014-04-24
    2014-06-03
    A1360621
    家庭常用药物手册 [专著]/白禾夏主编
    2014-03-17
    2014-03-25
    A0483737
    药用观赏植物栽培与利用 [专著]/张永清编著
    2014-03-17
    2014-04-13
    A0614935
    排毒不如无毒 [专著]:远离生活中的有毒物质/(美) 黛布拉·林恩·戴德著 ;常媛译=Toxic free: how to protect your health and home from the chemicals that are making you sick
    2014-03-17
    2014-03-25
    A3116154
    新版以案说法 [专著]/曾宪义总主编
    2014-02-27
    2014-03-17
    A1595640
    飞去的诗人:徐志摩传 [专著]/展望之,张方晦著
    2014-02-25
    2014-03-20
    A8152588
    高四凶猛 [专著]/耿萧著
    2014-02-25
    2014-02-27
    A0547642
    

    其实我是转载改了点东西而已:http://my.oschina.net/dfsfsdf/blog/116279?fromerr=jQsroe5A

    如果,您认为阅读这篇博客让您有些收获,不妨拿出手机微信扫一扫

    您的资助是我最大的动力!
    金额随意,欢迎来赏!

  • 相关阅读:
    JavaScript完整总结
    vue引入iframe的父子页面的数据传递
    随笔开发中笔记
    关于表格(table)的操作
    es6--6.字符串相关
    ES6--5.数组4个新增方法
    ES6--4.解构赋值
    sublime 远程连接服务器编辑
    ajaxSubmit
    修改 debian 时区
  • 原文地址:https://www.cnblogs.com/w1570631036/p/5065140.html
Copyright © 2011-2022 走看看