zoukankan      html  css  js  c++  java
  • Html解析本地搜网站

    业务类

     1 package code.lxy.test;
     2 
     3 import java.io.File;
     4 import java.io.FileNotFoundException;
     5 import java.io.FileOutputStream;
     6 import java.io.PrintWriter;
     7 
     8 import org.htmlparser.Node;
     9 import org.htmlparser.NodeFilter;
    10 import org.htmlparser.Parser;
    11 import org.htmlparser.tags.Div;
    12 import org.htmlparser.tags.LinkTag;
    13 import org.htmlparser.util.NodeList;
    14 import org.htmlparser.util.ParserException;
    15 
    16 public class HtmlParserDemo {
    17     public static void parserHtml(String htmlToParser)
    18             throws FileNotFoundException {
    19         PrintWriter writer = new PrintWriter(new FileOutputStream(new File(
    20                 "d:/test.text")));
    21         Parser parser = new Parser();
    22         try {
    23             parser.setURL(htmlToParser);
    24             parser.setEncoding("UTF-8");
    25             NodeFilter filter = new NodeFilter() {
    26                 @Override
    27                 public boolean accept(Node node) {
    28                     // TODO Auto-generated method stub
    29                     if (node instanceof Div) {
    30                         Div divNode = (Div) node;
    31                         // System.out.println(divNode.getAttribute("class"));
    32                         if (divNode.getAttribute("class") != null) {
    33                             if (divNode.getAttribute("class").endsWith("zuo01_bt")||divNode.getAttribute("class").endsWith("zuo01_con")) {
    34                                 return true;
    35                             }
    36                         }
    37                     }
    38                     return false;
    39                 }
    40             };
    41             NodeList nodelist = parser.extractAllNodesThatMatch(filter);
    42             for (int i = 0; i < nodelist.size(); i++) {
    43                 /*Div divNode=(Div) nodelist.elementAt(i);
    44                 System.out.println(divNode.toPlainTextString());*/
    45                 Div divnode=(Div) nodelist.elementAt(i);
    46                 String test=divnode.getAttribute("class");
    47                 if(divnode.getAttribute("class").equals("zuo01_bt"))
    48                 {
    49                     LinkTag linkTag=(LinkTag) divnode.childAt(1);
    50                     System.out.println(linkTag.getAttribute("title"));
    51                 }else{
    52                     System.out.println(divnode.toPlainTextString());
    53                 }
    54             }
    55             writer.close();
    56         } catch (ParserException e) {
    57             // TODO Auto-generated catch block
    58             e.printStackTrace();
    59         }
    60     }
    61 }

    测试类

    package code.lxy.main;
    
    import java.io.FileNotFoundException;
    
    import code.lxy.test.HtmlParserDemo;
    
    public class MainClass {
    
        /**
         * @param args
         * @throws FileNotFoundException 
         */
        public static void main(String[] args) throws FileNotFoundException {
            // TODO Auto-generated method stub
            HtmlParserDemo.parserHtml("http://www.locoso.com/cate/0sts2");
        }
    
    }

    结果输出显示

  • 相关阅读:
    C面试复习笔记
    Java面试复习笔记
    Jdk1.6 HTTPS访问问题解决办法
    百度地图轨迹回放,自定义路书,边走边画线
    简单的代码生成小工具(支持模板)
    card布局解决复杂操作的布局问题
    tabpanel如何隐藏页签表头以及基本用法总结
    ExtJS4.2下将表单元素放在菜单时不能进行拷贝的问题解决办法
    照片元数据信息以及在照片中写入gps信息
    带名称空间的xml数据查询
  • 原文地址:https://www.cnblogs.com/dependmyse/p/3020544.html
Copyright © 2011-2022 走看看