@Test public void gogo() throws IOException{ InputStream in = this.getClass().getClassLoader() .getResourceAsStream("content.html"); byte[] content = new byte[in.available()]; in.read(content); Document document; org.dom4j.Document document2; InputStream inputStream = null; try { inputStream = new ByteArrayInputStream(content); Tidy tidy = new Tidy(); init(tidy); document = tidy.parseDOM(inputStream, null); DOMReader domReader = new DOMReader(); document2 = domReader.read(document); XPath path = DocumentHelper.createXPath("/html/body/table[3]/tbody/tr/td/table/tbody/tr/td/table/tbody/tr[2]/td/div[4]"); //xpath 路径 Node node = path.selectSingleNode(document2); System.out.println(node.getUniquePath()); //xpath System.out.println(node.getNodeTypeName()); // Element System.out.println(node.getName()); //div System.out.println(node.asXML()); // 原始文本 System.out.println("kjl"+node.getText()); //null System.out.println(node.getStringValue()); //除去 标签 System.out.println(node.getPath()); // 不知道和 uniquepath 什么关系 System.out.println(node.getParent().asXML()); // 获取 dom 父节点 } finally { Closeables.close(inputStream, false); // guava api 这个测试时完全可以不要,只是我在测试时刚好加了这个dependency } }
测试环境 maven guava dom4j,好像不能上传文件啊我,那就不上传了