1. dom解析
DOM是用与平台和语言无关的方式表示XML文档的官方W3C标准。DOM是以层次结构组织的节点或信息片断的集合。这个层次结构允许开发人员在树中寻找特定信息。分析该
结构通常需要加载整个文档和构造层次结构,然后才能做任何工作。由于它是基于信息层次的,因而DOM被认为是基于树或基于对象的。
dom解析优点:
首先,由于树在内存中是持久的,因此可以修改它以便应用程序能对数据和结构作出更改。它还可以在任何时候在树中上下导航,而不是像SAX那样是一次性的处理。DOM使用起来也要简单得多。
实例:
节点信息类
public class Employee { private String name; private String sex; private int age; public String getName() { return name; } public void setName(String name) { this.name = name; } public String getSex() { return sex; } public void setSex(String sex) { this.sex = sex; } public int getAge() { return age; } public void setAge(int age) { this.age = age; } @Override public int hashCode() { final int prime = 31; int result = 1; result = prime * result + age; result = prime * result + ((name == null) ? 0 : name.hashCode()); result = prime * result + ((sex == null) ? 0 : sex.hashCode()); return result; } @Override public boolean equals(Object obj) { if (this == obj) return true; if (obj == null) return false; if (getClass() != obj.getClass()) return false; Employee other = (Employee) obj; if (age != other.age) return false; if (name == null) { if (other.name != null) return false; } else if (!name.equals(other.name)) return false; if (sex == null) { if (other.sex != null) return false; } else if (!sex.equals(other.sex)) return false; return true; } @Override public String toString() { return "Employee [name=" + name + ", sex=" + sex + ", age=" + age + "]"; } public Employee(String name, String sex, int age) { super(); this.name = name; this.sex = sex; this.age = age; } public Employee() { super(); } }dom解析类:
public class DomXmlParse { public Document document; public DomXmlParse() { init(); } public void init() { try { DocumentBuilderFactory documentBuilderFactory = DocumentBuilderFactory .newInstance(); DocumentBuilder documentBuilder; documentBuilder = documentBuilderFactory.newDocumentBuilder(); this.document = documentBuilder.newDocument(); } catch (ParserConfigurationException e) { e.printStackTrace(); System.out.println(e.getMessage()); } } public void createXml(List<Employee> list, String rootName, String fileName) { Element root = this.document.createElement(rootName); this.document.appendChild(root); if (null != list && list.size() > 0) { for (Employee detail : list) { Element employee = createEmploy(detail); root.appendChild(employee); } } TransformerFactory tf = TransformerFactory.newInstance(); try { Transformer transformer = tf.newTransformer(); DOMSource domSource = new DOMSource(this.document); transformer.setOutputProperty(OutputKeys.ENCODING, "gb2312"); transformer.setOutputProperty(OutputKeys.INDENT, "yes"); PrintWriter pw = new PrintWriter(new FileOutputStream(fileName)); StreamResult streamResult = new StreamResult(pw); transformer.transform(domSource, streamResult); System.out.println("创建xml文件成功"); } catch (TransformerConfigurationException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } catch (TransformerException e) { e.printStackTrace(); } } public Element createEmploy(Employee employee){ Element employeElement = this.document.createElement("employee"); Element nameElement = this.document.createElement("name"); nameElement.appendChild(this.document.createTextNode(employee.getName())); employeElement.appendChild(nameElement); Element sexElement = this.document.createElement("sex"); sexElement.appendChild(this.document.createTextNode(employee.getSex())); employeElement.appendChild(sexElement); Element ageElement = this.document.createElement("age"); ageElement.appendChild(this.document.createTextNode(String.valueOf(employee.getAge()))); employeElement.appendChild(ageElement); return employeElement; } public void parseXml(String fileName) { try { DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance(); DocumentBuilder db = dbf.newDocumentBuilder(); Document document = db.parse(fileName); NodeList nodeList = document.getChildNodes(); for (int i = 0; i < nodeList.getLength(); i++) { System.out.println("节点类型:"+nodeList.item(i).getNodeType()); System.out.println("节点名:"+nodeList.item(i).getNodeName() + ":" + "文本值"+nodeList.item(i).getTextContent()); Node nodeP = nodeList.item(i); NodeList nodeInfo = nodeP.getChildNodes(); for (int j = 0; j < nodeInfo.getLength(); j++) { System.out.println("节点类型:"+nodeInfo.item(j).getNodeType()); System.out.println("节点名:"+nodeInfo.item(j).getNodeName() + ":" + "文本值"+nodeInfo.item(j).getTextContent()); Node node = nodeInfo.item(j); NodeList enodeMeta = node.getChildNodes(); for (int k = 0; k < enodeMeta.getLength(); k++) { System.out.println("节点类型:"+enodeMeta.item(k).getNodeType()); System.out.println("节点名:"+enodeMeta.item(k).getNodeName() + ":" + "文本值"+enodeMeta.item(k).getTextContent()); } } } System.out.println("解析完毕"); } catch (FileNotFoundException e) { System.out.println(e.getMessage()); } catch (ParserConfigurationException e) { System.out.println(e.getMessage()); } catch (SAXException e) { System.out.println(e.getMessage()); } catch (IOException e) { System.out.println(e.getMessage()); } } }
测试
List<Employee> listEmployees = new ArrayList<Employee>(); @Before public void init(){ Employee employee1 = new Employee("张三", "m", 20); Employee employee2 = new Employee("李四", "d", 30); Employee employee3 = new Employee("王武", "x", 40); Employee employee4 = new Employee("大方", "g", 50); listEmployees.add(employee1); listEmployees.add(employee2); listEmployees.add(employee3); listEmployees.add(employee4); } @Test @SuppressWarnings({ "rawtypes", "unchecked" }) public void test1(){ DomXmlParse domXmlParse = new DomXmlParse(); String fileName = "D:"+File.separator+"test"+File.separator+"xml"+File.separator+"employeeDom.xml"; domXmlParse.createXml(listEmployees, "employees", fileName); domXmlParse.parseXml(fileName); }
生成的xml文档结构
<?xml version="1.0" encoding="GB2312" standalone="no"?> <employees> <employee> <name>张三</name> <sex>m</sex> <age>20</age> </employee> <employee> <name>李四</name> <sex>d</sex> <age>30</age> </employee> <employee> <name>王武</name> <sex>x</sex> <age>40</age> </employee> <employee> <name>大方</name> <sex>g</sex> <age>50</age> </employee> </employees>
2. sax解析
SAX解析器采用了基于事件的模型,它在解析XML文档的时候可以触发一系列的事件,当发现给定的tag的时候,它可以激活一个回调方法,告诉该方法制定的标签已经找到。
SAX对内存的要求通常会比较低,因为它让开发人员自己来决定所要处理的tag.特别是当开发人员只需要处理文档中所包含的部分数据时,SAX这种扩展能力得到了更好的体
现。但用SAX解析器的时候编码工作会比较困难,而且很难同时访问同一个文档中的多处不同数据。
sax解析事件回调类:
public class SaxHandler extends DefaultHandler { private HashMap<String, String> map = null; private List<HashMap<String, String>> list = null; /** * 正在解析的元素的标签 */ private String currentTag = null; /** * 正在解析的元素的值 */ private String currentValue = null; private String nodeName = null; public List<HashMap<String, String>> getList(){ return list; } public SaxHandler(String nodeName) { this.nodeName = nodeName; } @Override public void startDocument() throws SAXException { // TODO 当读到一个开始标签的时候,会触发这个方法 list = new ArrayList<HashMap<String,String>>(); } @Override public void startElement(String uri, String localName, String name, Attributes attributes) throws SAXException { // TODO 当遇到文档的开头的时候,调用这个方法 if(name.equals(nodeName)){ map = new HashMap<String, String>(); } if(attributes != null && map != null){ for(int i = 0; i < attributes.getLength();i++){ map.put(attributes.getQName(i), attributes.getValue(i)); } } currentTag = name; } @Override public void characters(char[] ch, int start, int length) throws SAXException { // TODO 这个方法用来处理在XML文件中读到的内容 if(currentTag != null && map != null){ currentValue = new String(ch, start, length); if(currentValue != null && !currentValue.trim().equals("") && !currentValue.trim().equals(" ")){ map.put(currentTag, currentValue); } } currentTag=null; currentValue=null; } @Override public void endElement(String uri, String localName, String name) throws SAXException { // TODO 在遇到结束标签的时候,调用这个方法 if(name.equals(nodeName)){ list.add(map); map = null; } super.endElement(uri, localName, name); } }
sax解析类:
public abstract class SaxXmlParse{ public static List<HashMap<String, String>> _readXml(InputStream input, String nodeName){ try { SAXParserFactory spf = SAXParserFactory.newInstance(); SAXParser parser = spf.newSAXParser(); SaxHandler handler = new SaxHandler(nodeName); parser.parse(input, handler); input.close(); return handler.getList(); } catch (ParserConfigurationException e) { e.printStackTrace(); } catch (SAXException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } return null; } }测试:
<span style="white-space:pre"> </span>@Test public void saxTest(){ String fileName = "D:"+File.separator+"test"+File.separator+"xml"+File.separator+"employeeDom.xml"; try { FileInputStream input = new FileInputStream(fileName); List<HashMap<String, String>> list = SaxXmlParse._readXml(input, "employees"); for(HashMap<String, String> p : list){ System.out.println(p.toString()); } } catch (FileNotFoundException e) { // TODO Auto-generated catch block e.printStackTrace(); } }
3. JDom解析
JDOM的目的是成为Java特定文档模型,它简化与XML的交互并且比使用DOM实现更快。由于是第一个Java特定模型,JDOM一直得到大力推广和促进。正在考虑通过“Java规
范请求JSR-102”将它最终用作“Java标准扩展”。从2000年初就已经开始了JDOM开发。
JDOM与DOM主要有两方面不同。首先,JDOM仅使用具体类而不使用接口。这在某些方面简化了API,但是也限制了灵活性。第二,API大量使用了Collections类,简化了那些
已经熟悉这些类的Java开发者的使用。
JDOM文档声明其目的是“使用20%(或更少)的精力解决80%(或更多)Java/XML问题”(根据学习曲线假定为20%)。JDOM对于大多数Java/XML应用程序来说当然是有的,
并且大多数开发者发现API比DOM容易理解得多。JDOM还包括对程序行为的相当广泛检查以防止用户做任何在XML中无意义的事。然而,它仍需要您充分理解XML以便做一些
超出基本的工作(或者甚至理解某些情况下的错误)。这也许是比学习DOM或JDOM接口都更有意义的工作。
JDom解析类:
public class JDomXmlParse { @SuppressWarnings("unchecked") public void parseXml(String fileName) { try { InputStream in = new FileInputStream(fileName); SAXBuilder saxBuilder = new SAXBuilder(); Document document = saxBuilder.build(in); Element root = document.getRootElement(); System.out.println(root.getName()); List<Element> nodes = root.getChildren(); for(Element element : nodes){ String name = element.getChildText("name"); String age = element.getChildText("age"); String sex = element.getChildText("sex"); System.out.println(name); System.out.println(age); System.out.println(sex); } } catch (FileNotFoundException e) { e.printStackTrace(); } catch (JDOMException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } } }
测试:
@Test public void jdomTest(){ String fileName = "D:"+File.separator+"test"+File.separator+"xml"+File.separator+"employeeDom.xml"; JDomXmlParse jDomXmlParse = new JDomXmlParse(); jDomXmlParse.parseXml(fileName); }
4. dom4j解析
DOM4J是一个非常非常优秀的Java XML API,具有性能优异、功能强大和极端易用使用的特点,同时它也是一个开放源代码的软件。如今你可以看到越来越多的Java软件都在
public class Dom4jXmlParse { @SuppressWarnings("rawtypes") public void parseXml(String fileName) throws DocumentException { SAXReader saxReader = new SAXReader(); Document document = saxReader.read(fileName); Element root = document.getRootElement(); System.out.println(root.getName()); for(Iterator iterator = root.elementIterator() ; iterator.hasNext();){ Element element = (Element) iterator.next(); System.out.println(element.getName()); for(Iterator childIterator = element.elementIterator() ; childIterator.hasNext();){ Element childElement = (Element) childIterator.next(); System.out.println(childElement.getName()+":"+childElement.getText()); } } } /** * 用Visitor模式解析 * @param fileName * @throws DocumentException */ public void visitorParseXml(String fileName) throws DocumentException{ SAXReader saxReader = new SAXReader(); Document document = saxReader.read(fileName); Visitor visitor = new Visitor(); document.accept(visitor); } }
dom4j支持用Visitor模式访问节点,属性,只要继承类VisitorSupport的Visitor,注意,这个Visitor是自动遍历所有子节点的。如果是root.accept(MyVisitor),将遍历
public class Visitor extends VisitorSupport{ @Override public void visit(Element element) { if(element.isTextOnly()){ System.out.println(element.getName()+"--"+element.getText()); }else{ System.out.println(element.getName()); } } @Override public void visit(Attribute attribute) { System.out.println(attribute.getName() + "--" +attribute.getText()); } }测试
@Test public void dom4jTest() throws DocumentException{ String fileName = "D:"+File.separator+"test"+File.separator+"xml"+File.separator+"employeeDom.xml"; Dom4jXmlParse dom4jXmlParse = new Dom4jXmlParse(); dom4jXmlParse.parseXml(fileName); } @Test /** * Visitor方式解析xml文件 * @throws DocumentException */ public void dom4jVisitorTest() throws DocumentException{ String fileName = "D:"+File.separator+"test"+File.separator+"xml"+File.separator+"employeeDom.xml"; Dom4jXmlParse dom4jXmlParse = new Dom4jXmlParse(); dom4jXmlParse.visitorParseXml(fileName); }