sax、dom是两种对xml文档进行解析的方法(没有具体实现,只是接口),所以只有它们是无法解析xml文档的;jaxp只是api,它进一步封装了sax、dom两种接口,并且提供了DomcumentBuilderFactory/DomcumentBuilder和SAXParserFactory/SAXParser(默认使用xerces解释器)。
如对DOM解析器还有疑问,请查看这里。目前在Java中用于解析XML的技术很多,主流的有DOM、SAX、JDOM、DOM4j,下面分别介绍这四种方式如何操作DOM。
university.xml
<?xml version="1.0" encoding="UTF-8"?> <university name="pku"> <college name="c1"> <class name="class1"> <student name="stu1" sex='male' age="21" /> <student name="stu2" sex='female' age="20" /> <student name="stu3" sex='female' age="20" /> </class> <class name="class2"> <student name="stu4" sex='male' age="19" /> <student name="stu5" sex='female' age="20" /> <student name="stu6" sex='female' age="21" /> </class> </college> <college name="c2"> <class name="class3"> <student name="stu7" sex='male' age="20" /> </class> </college> <college name="c3"> </college> </university>
dom读写xml 输出属性值,此方法我已经到上篇随笔中有详细介绍了,点这里查看.
TestDom.java
import java.io.File; import java.io.FileNotFoundException; import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.parsers.ParserConfigurationException; import javax.xml.transform.Transformer; import javax.xml.transform.TransformerConfigurationException; import javax.xml.transform.TransformerException; import javax.xml.transform.TransformerFactory; import javax.xml.transform.dom.DOMSource; import javax.xml.transform.stream.StreamResult; import org.w3c.dom.Document; import org.w3c.dom.Element; import org.w3c.dom.Node; import org.w3c.dom.NodeList; import org.w3c.dom.Text; import org.xml.sax.SAXException; /** * dom读写xml 输出属性值 * @author whwang */ public class TestDom { public static void read() { DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance(); try { DocumentBuilder builder = dbf.newDocumentBuilder(); InputStream in = TestDom.class.getClassLoader().getResourceAsStream("university.xml"); //读取src目录下文件 Document doc = builder.parse(in); Element root = doc.getDocumentElement(); // 获取根元素 if (root == null) return; System.err.println(root.getAttribute("name")); //获取根元素的name属性值 NodeList collegeNodes = root.getChildNodes();// 根元素下的所有子元素 if (collegeNodes == null) return; /** * 循环所有的根元素下的所有只节点 * 根元素下所有的college 节点 */ for(int i = 0; i < collegeNodes.getLength(); i++) { Node college = collegeNodes.item(i); if (college != null && college.getNodeType() == Node.ELEMENT_NODE) { System.err.println(" " + college.getAttributes().getNamedItem("name").getNodeValue()); //获取节点 name属性值 // 所有的class节点 NodeList classNodes = college.getChildNodes(); if (classNodes == null) continue; for (int j = 0; j < classNodes.getLength(); j++) { Node clazz = classNodes.item(j); if (clazz != null && clazz.getNodeType() == Node.ELEMENT_NODE) { System.err.println(" " + clazz.getAttributes().getNamedItem("name").getNodeValue()); // 所有的student节点 NodeList studentNodes = clazz.getChildNodes(); if (studentNodes == null) continue; for (int k = 0; k < studentNodes.getLength(); k++) { Node student = studentNodes.item(k); if (student != null && student.getNodeType() == Node.ELEMENT_NODE) { System.err.print(" " + student.getAttributes().getNamedItem("name").getNodeValue()); System.err.print(" " + student.getAttributes().getNamedItem("sex").getNodeValue()); System.err.println(" " + student.getAttributes().getNamedItem("age").getNodeValue()); } } } } } } } catch (ParserConfigurationException e) { e.printStackTrace(); } catch (FileNotFoundException e) { e.printStackTrace(); } catch (SAXException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } } /** * 读取本地XML文件 修改后另存为 */ public static void write() { DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance(); try { DocumentBuilder builder = dbf.newDocumentBuilder(); InputStream in = TestDom.class.getClassLoader().getResourceAsStream("university.xml"); Document doc = builder.parse(in); // 根节点 Element root = doc.getDocumentElement(); if (root == null) return; // 修改属性 root.setAttribute("name", "tsu"); //给根元素添加 name属性和属性值 NodeList collegeNodes = root.getChildNodes(); if (collegeNodes != null) { for (int i = 0; i <collegeNodes.getLength() - 1; i++) { // 删除节点 Node college = collegeNodes.item(i); if (college.getNodeType() == Node.ELEMENT_NODE) { String collegeName = college.getAttributes().getNamedItem("name").getNodeValue(); if ("c1".equals(collegeName) || "c2".equals(collegeName)) { root.removeChild(college); } else if ("c3".equals(collegeName)) { Element newChild = doc.createElement("class"); newChild.setAttribute("name", "c4"); college.appendChild(newChild); } } } } // 新增节点 Element addCollege = doc.createElement("college"); addCollege.setAttribute("name", "c5"); root.appendChild(addCollege); Text text = doc.createTextNode("text"); addCollege.appendChild(text); // 将修改后的文档保存到文件 TransformerFactory transFactory = TransformerFactory.newInstance(); Transformer transFormer = transFactory.newTransformer(); DOMSource domSource = new DOMSource(doc); File file = new File("src/dom-modify.xml"); if (file.exists()) { file.delete(); } file.createNewFile(); FileOutputStream out = new FileOutputStream(file); StreamResult xmlResult = new StreamResult(out); transFormer.transform(domSource, xmlResult); System.out.println(file.getAbsolutePath()); //获取文件的据对路径 } catch (ParserConfigurationException e) { e.printStackTrace(); } catch (SAXException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } catch (TransformerConfigurationException e) { e.printStackTrace(); } catch (TransformerException e) { e.printStackTrace(); } } public static void main(String[] args) { read(); //write(); } }
SAX不用将整个文档加载到内存,基于事件驱动的API(Observer模式),用户只需要注册自己感兴趣的事件即可。SAX提供EntityResolver, DTDHandler, ContentHandler, ErrorHandler接口,分别用于监听解析实体事件、DTD处理事件、正文处理事件和处理出错事件,与AWT类似,SAX还提供了一个对这4个接口默认的类DefaultHandler(这里的默认实现,其实就是一个空方法),一般只要继承DefaultHandler.
TestSAX.java
import java.io.IOException; import java.io.InputStream; import javax.xml.parsers.ParserConfigurationException; import javax.xml.parsers.SAXParser; import javax.xml.parsers.SAXParserFactory; import org.xml.sax.Attributes; import org.xml.sax.InputSource; import org.xml.sax.Locator; import org.xml.sax.SAXException; import org.xml.sax.SAXParseException; import org.xml.sax.helpers.DefaultHandler; /** * * @author whwang */ public class TestSAX { public static void main(String[] args) { read(); write(); } public static void read() { try { SAXParserFactory factory = SAXParserFactory.newInstance(); SAXParser parser = factory.newSAXParser(); InputStream in = TestSAX.class.getClassLoader().getResourceAsStream("university.xml"); parser.parse(in, new MyHandler()); } catch (ParserConfigurationException e) { e.printStackTrace(); } catch (SAXException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } } public static void write() { System.err.println("纯SAX对于写操作无能为力"); } } // 重写对自己感兴趣的事件处理方法 class MyHandler extends DefaultHandler { @Override public InputSource resolveEntity(String publicId, String systemId) throws IOException, SAXException { return super.resolveEntity(publicId, systemId); } @Override public void notationDecl(String name, String publicId, String systemId) throws SAXException { super.notationDecl(name, publicId, systemId); } @Override public void unparsedEntityDecl(String name, String publicId, String systemId, String notationName) throws SAXException { super.unparsedEntityDecl(name, publicId, systemId, notationName); } @Override public void setDocumentLocator(Locator locator) { super.setDocumentLocator(locator); } @Override public void startDocument() throws SAXException { System.err.println("开始解析文档"); } @Override public void endDocument() throws SAXException { System.err.println("解析结束"); } @Override public void startPrefixMapping(String prefix, String uri) throws SAXException { super.startPrefixMapping(prefix, uri); } @Override public void endPrefixMapping(String prefix) throws SAXException { super.endPrefixMapping(prefix); } @Override public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException { System.err.print("Element: " + qName + ", attr: "); print(attributes); } @Override public void endElement(String uri, String localName, String qName) throws SAXException { super.endElement(uri, localName, qName); } @Override public void characters(char[] ch, int start, int length) throws SAXException { super.characters(ch, start, length); } @Override public void ignorableWhitespace(char[] ch, int start, int length) throws SAXException { super.ignorableWhitespace(ch, start, length); } @Override public void processingInstruction(String target, String data) throws SAXException { super.processingInstruction(target, data); } @Override public void skippedEntity(String name) throws SAXException { super.skippedEntity(name); } @Override public void warning(SAXParseException e) throws SAXException { super.warning(e); } @Override public void error(SAXParseException e) throws SAXException { super.error(e); } @Override public void fatalError(SAXParseException e) throws SAXException { super.fatalError(e); } private void print(Attributes attrs) { if (attrs == null) return; System.err.print("["); for (int i = 0; i < attrs.getLength(); i++) { System.err.print(attrs.getQName(i) + " = " + attrs.getValue(i)); if (i != attrs.getLength() - 1) { System.err.print(", "); } } System.err.println("]"); } }
JDOM与DOM非常类似,它是处理XML的纯JAVA API,API大量使用了Collections类,且JDOM仅使用具体类而不使用接口。 JDOM 它自身不包含解析器。它通常使用 SAX2 解析器来解析和验证输入 XML 文档(尽管它还可以将以前构造的 DOM 表示作为输入)。它包含一些转换器以将 JDOM 表示输出成 SAX2 事件流、DOM 模型或 XML 文本文档。
TestJDom.java
import java.io.File; import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; import java.util.List; import org.jdom.Attribute; import org.jdom.Document; import org.jdom.Element; import org.jdom.JDOMException; import org.jdom.input.SAXBuilder; import org.jdom.output.XMLOutputter; /** * JDom读写xml * @author whwang */ public class TestJDom { public static void main(String[] args) { read(); write(); } public static void read() { try { boolean validate = false; SAXBuilder builder = new SAXBuilder(validate); InputStream in = TestJDom.class.getClassLoader().getResourceAsStream("university.xml"); Document doc = builder.build(in); // 获取根节点 <university> Element root = doc.getRootElement(); readNode(root, ""); } catch (JDOMException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } } @SuppressWarnings("unchecked") public static void readNode(Element root, String prefix) { if (root == null) return; // 获取属性 List<Attribute> attrs = root.getAttributes(); if (attrs != null && attrs.size() > 0) { System.err.print(prefix); for (Attribute attr : attrs) { System.err.print(attr.getValue() + " "); } System.err.println(); } // 获取他的子节点 List<Element> childNodes = root.getChildren(); prefix += " "; for (Element e : childNodes) { readNode(e, prefix); } } public static void write() { boolean validate = false; try { SAXBuilder builder = new SAXBuilder(validate); InputStream in = TestJDom.class.getClassLoader().getResourceAsStream("university.xml"); Document doc = builder.build(in); // 获取根节点 <university> Element root = doc.getRootElement(); // 修改属性 root.setAttribute("name", "tsu"); // 删除 boolean isRemoved = root.removeChildren("college"); System.err.println(isRemoved); // 新增 Element newCollege = new Element("college"); newCollege.setAttribute("name", "new_college"); Element newClass = new Element("class"); newClass.setAttribute("name", "ccccc"); newCollege.addContent(newClass); root.addContent(newCollege); XMLOutputter out = new XMLOutputter(); File file = new File("src/jdom-modify.xml"); if (file.exists()) { file.delete(); } file.createNewFile(); FileOutputStream fos = new FileOutputStream(file); out.output(doc, fos); } catch (JDOMException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } } }
dom4j是目前在xml解析方面是最优秀的(Hibernate、Sun的JAXM也都使用dom4j来解析XML),它合并了许多超出基本 XML 文档表示的功能,包括集成的 XPath 支持、XML Schema 支持以及用于大文档或流化文档的基于事件的处理
TestDom4j.java
import java.io.File; import java.io.FileWriter; import java.io.IOException; import java.io.InputStream; import java.util.List; import org.dom4j.Attribute; import org.dom4j.Document; import org.dom4j.DocumentException; import org.dom4j.DocumentHelper; import org.dom4j.Element; import org.dom4j.ProcessingInstruction; import org.dom4j.VisitorSupport; import org.dom4j.io.SAXReader; import org.dom4j.io.XMLWriter; /** * Dom4j读写xml * @author whwang */ public class TestDom4j { public static void main(String[] args) { read1(); // read2(); write(); } public static void read1() { try { SAXReader reader = new SAXReader(); InputStream in = TestDom4j.class.getClassLoader().getResourceAsStream("university.xml"); Document doc = reader.read(in); Element root = doc.getRootElement(); readNode(root, ""); } catch (DocumentException e) { e.printStackTrace(); } } @SuppressWarnings("unchecked") public static void readNode(Element root, String prefix) { if (root == null) return; // 获取节点的属性 List<Attribute> attrs = root.attributes(); if (attrs != null && attrs.size() > 0) { System.err.print(prefix); for (Attribute attr : attrs) { System.err.print(attr.getValue() + " "); } System.err.println(); } // 获取他的子节点 List<Element> childNodes = root.elements(); prefix += " "; for (Element e : childNodes) { readNode(e, prefix); } } public static void read2() { try { SAXReader reader = new SAXReader(); InputStream in = TestDom4j.class.getClassLoader().getResourceAsStream("university.xml"); Document doc = reader.read(in); doc.accept(new MyVistor()); } catch (DocumentException e) { e.printStackTrace(); } } /** * 写入方法 */ public static void write() { try { // 创建一个xml文档 Document doc = DocumentHelper.createDocument(); Element university = doc.addElement("university"); university.addAttribute("name", "tsu"); // 注释 university.addComment("这个是根节点"); Element college = university.addElement("college"); college.addAttribute("name", "cccccc"); college.setText("text"); File file = new File("src/dom4j-modify.xml"); if (file.exists()) { file.delete(); } file.createNewFile(); XMLWriter out = new XMLWriter(new FileWriter(file)); out.write(doc); out.flush(); out.close(); } catch (IOException e) { e.printStackTrace(); } } } class MyVistor extends VisitorSupport { public void visit(Attribute node) { System.out.println("Attibute: " + node.getName() + "=" + node.getValue()); } public void visit(Element node) { if (node.isTextOnly()) { System.out.println("Element: " + node.getName() + "=" + node.getText()); } else { System.out.println(node.getName()); } } @Override public void visit(ProcessingInstruction node) { System.out.println("PI:" + node.getTarget() + " " + node.getText()); } }
由于DOM4J比较重要我到网上还整理了一些代码:
Dom4j.java
import java.io.File; import java.io.FileOutputStream; import java.io.FileWriter; import java.io.IOException; import java.io.OutputStream; import java.io.OutputStreamWriter; import java.io.UnsupportedEncodingException; import java.net.URL; import java.util.Iterator; import java.util.List; import org.dom4j.Attribute; import org.dom4j.Document; import org.dom4j.DocumentHelper; import org.dom4j.Element; import org.dom4j.Node; import org.dom4j.io.OutputFormat; import org.dom4j.io.SAXReader; import org.dom4j.io.XMLWriter; public class Dom4j { /** * 获取Document对象 * 此方法文件位于 项目根目录 不是src目录 * @param filename 项目根目录下的XML文件 * @return document * */ public static Document load(String filename) { Document document = null; try { SAXReader saxReader = new SAXReader(); document = saxReader.read(new File(filename)); //读取XML文件,获得document对象 } catch (Exception ex) { ex.printStackTrace(); } return document; } /** * 通过url路径获取 Document对象 * 此方式 xml文件位于远程服务器上 * @param url 远程url文件 * @return document对象 */ public static Document load2(URL url) { Document document = null; try { SAXReader saxReader = new SAXReader(); document = saxReader.read(url); //读取XML文件,获得document对象 } catch (Exception ex) { ex.printStackTrace(); } return document; } /** * 获取根节点 * @param doc document对象 * @return 根元素 */ public static Element getRootElement(Document doc){ Element root=null; root=doc.getRootElement(); //获取根节点 return root; } /** * 将document树输出到指定的文件 * @param document document对象 * @param filename 文件名 * @return 布尔值 */ public static boolean doc2XmlFile(Document document, String filename) { boolean flag = true; try { XMLWriter writer = new XMLWriter( new OutputStreamWriter(new FileOutputStream(filename),"UTF-8")); writer.write(document); writer.close(); } catch (Exception ex) { flag = false; ex.printStackTrace(); } System.out.println(flag); return flag; } /** * * * 此方法在本类中无用 ,没有整合 有兴趣的可以自己动手整合一下 * * * Dom4j通过XMLWriter将Document对象表示的XML树写入指定的文件, * 并使用OutputFormat格式对象指定写入的风格和编码方法。 * 调用OutputFormat.createPrettyPrint()方法可以获得一个默认的pretty print风格的格式对象。 * 对OutputFormat对象调用setEncoding()方法可以指定XML文件的编码方法。 * @param doc * @param out * @param encoding * @throws UnsupportedEncodingException * @throws IOException */ /*public void writeTo(Document doc,OutputStream out, String encoding) throws UnsupportedEncodingException, IOException { OutputFormat format = OutputFormat.createPrettyPrint(); format.setEncoding("gb2312"); XMLWriter writer = new XMLWriter(System.out,format); writer.write(doc); writer.flush(); }*/ /** * 遍历根标记下的子元素 * @param args */ public static void read(Element root){ for(Iterator i=root.elementIterator();i.hasNext();){ Element element=(Element)i.next(); System.out.print(element.getName()+":"+element.getText()); if(element.getNodeType()==Node.ELEMENT_NODE){ read(element); } } } /** * 写入操作 * @param fileName */ public static void write(String fileName){ Document document=DocumentHelper.createDocument();//建立document对象,用来操作xml文件 Element booksElement=document.addElement("books");//建立根节点 booksElement.addComment("This is a test for dom4j ");//加入一行注释 Element bookElement=booksElement.addElement("book");//添加一个book节点 bookElement.addAttribute("show","yes");//添加属性内容 Element titleElement=bookElement.addElement("title");//添加文本节点 titleElement.setText("ajax in action");//添加文本内容 try{ XMLWriter writer=new XMLWriter(new FileWriter(new File(fileName))); writer.write(document); writer.close(); }catch(Exception e){ e.printStackTrace(); } } /** * 修改XML文件 */ public static void modifyXMLFile() { String oldStr = "test.xml"; String newStr = "test1.xml"; Document document = null; //修改节点的属性 try { SAXReader saxReader = new SAXReader(); // 用来读取xml文档 document = saxReader.read(new File(oldStr)); // 读取xml文档 List list = document.selectNodes("/books/book/@show");// 用xpath查找节点book的属性 Iterator iter = list.iterator(); while (iter.hasNext()) { Attribute attribute = (Attribute) iter.next(); if (attribute.getValue().equals("yes")) attribute.setValue("no"); } } catch (Exception e) { e.printStackTrace(); } //修改节点的内容 try { SAXReader saxReader = new SAXReader(); // 用来读取xml文档 document = saxReader.read(new File(oldStr)); // 读取xml文档 List list = document.selectNodes("/books/book/title");// 用xpath查找节点book的内容 Iterator iter = list.iterator(); while (iter.hasNext()) { Element element = (Element) iter.next(); element.setText("xxx");// 设置相应的内容 } } catch (Exception e) { e.printStackTrace(); } try { XMLWriter writer = new XMLWriter(new FileWriter(new File(newStr))); writer.write(document); writer.close(); } catch (Exception ex) { ex.printStackTrace(); } } public static void main(String[] args){ Document doc=load("student.xml"); Element root=getRootElement(doc); read(root); write("test.xml"); modifyXMLFile(); } }
XPath 是一门在 XML 文档中查找信息的语言, 可用来在 XML 文档中对元素和属性进行遍历。XPath 是 W3C XSLT 标准的主要元素,并且 XQuery 和 XPointer 同时被构建于 XPath 表达之上。因此,对 XPath 的理解是很多高级 XML 应用的基础。
XPath非常类似对数据库操作的SQL语言,或者说JQuery,它可以方便开发者抓起文档中需要的东西。(dom4j也支持xpath, dom4j使用xpath请点击这里)
TestXPath.java
import java.io.IOException; import java.io.InputStream; import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.parsers.ParserConfigurationException; import javax.xml.xpath.XPath; import javax.xml.xpath.XPathConstants; import javax.xml.xpath.XPathExpression; import javax.xml.xpath.XPathExpressionException; import javax.xml.xpath.XPathFactory; import org.w3c.dom.Document; import org.w3c.dom.NodeList; import org.xml.sax.SAXException; /** * * 普通DOM与XPath * @author licheng * */ public class TestXPath { public static void main(String[] args) { read(); } public static void read() { try { DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance(); DocumentBuilder builder = dbf.newDocumentBuilder(); InputStream in = TestXPath.class.getClassLoader().getResourceAsStream("university.xml"); Document doc = builder.parse(in); XPathFactory factory = XPathFactory.newInstance(); XPath xpath = factory.newXPath(); // 选取所有class元素的name属性 // XPath语法介绍: http://w3school.com.cn/xpath/ XPathExpression expr = xpath.compile("//class/@name"); NodeList nodes = (NodeList) expr.evaluate(doc, XPathConstants.NODESET); for (int i = 0; i < nodes.getLength(); i++) { System.out.println("name = " + nodes.item(i).getNodeValue()); } } catch (XPathExpressionException e) { e.printStackTrace(); } catch (ParserConfigurationException e) { e.printStackTrace(); } catch (SAXException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } } }
不知为何,博客园每天只能到发表一篇随笔,发第二篇的时候不能到首页显示。
利用JAVA,将XML文件导入数据库,和将数据库信息导入到XML的笔记将到将到明后两天发布。
最后将分享一个利用XML当数据库,查询英语六级词汇的案例,下面先分享一下截图吧:
实例将到明后两天分享。请多多关注哟。