zoukankan      html  css  js  c++  java
  • XML的两种解析方式

    JDK提供的XML解析方式分为两种:DOM方式和SAX方式
    DOM:Document Object Model。需要读取整个XML文档,先需要在内存中构架代表整个DOM树的Document对象,可以进行随机访问. 需要考虑内存.适合增删改
    SAX:Simple API for XML。采用事件驱动的方式解析XML文件,边读边对文档进行处理.适合读取

    其他的xml解析包:Dom4J, PullParser(安卓)

    Dom4J实现类似SAX方式, API类似DOM方式

    DOM @JDK

    public class DomTest {
        
        // 解析xml获取document对象
        private Document getDocument() throws ParserConfigurationException,
                SAXException, IOException {
            // 1. 获得工厂
            DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
            // 2. 获得 builder对象
            DocumentBuilder builder = factory.newDocumentBuilder();
            // 3. 解析xml得带document对象 
            Document document = builder.parse("src/book.xml");
            return document;
        }
        
        // 将内存中的document对象写回xml文件中
        private void writeBack2Xml(Document document)
                throws TransformerFactoryConfigurationError,
                TransformerConfigurationException, TransformerException {
            
            TransformerFactory factory = TransformerFactory.newInstance();
            Transformer transformer = factory.newTransformer();
            transformer.transform(new DOMSource(document), new StreamResult("src/book.xml"));
        }
        
        // 读取节点文本
        @Test
        public void testReadContent() throws Exception{
            Document document = getDocument();
            //根据标签名获得节点列表
            NodeList nodeList = document.getElementsByTagName("书");
            System.out.println("长度 : " + nodeList.getLength());
            // 返回第一个节点 
            Node firstNode = nodeList.item(0);
            // 返回文本内容
            String result  = firstNode.getTextContent();
            System.out.println(result);
        }
    
        // 读取节点的属性值 
        @Test
        public void testReadAttribute() throws Exception{
            Document document = getDocument();
            NodeList nodeList = document.getElementsByTagName("书");
            // 确认本Node为元素节点后加强转
            Node node = nodeList.item(0);
            if (node instanceof Element) {
                Element firstElement = (Element) node;
                String result = firstElement.getAttribute("出版社");
                System.out.println(result);
            }
        }
        
        // 添加节点 
        @Test
        public void testAddPrice() throws Exception, SAXException, IOException{
            Document document = getDocument();
            Node firstNode = document.getElementsByTagName("书").item(0);
            Element newElement = document.createElement("售价");
            newElement.setTextContent("79.00元");
            firstNode.appendChild(newElement);
            writeBack2Xml(document);  //写回
        }
    
        // 删除节点
        @Test
        public void testDelete() throws Exception, SAXException, IOException{
            Document document = getDocument();
            NodeList priceNodeList = document.getElementsByTagName("售价");
            for (int i = 0; i < priceNodeList.getLength(); i++) {
                Node node = priceNodeList.item(i);
                if("39.00元".equals(node.getTextContent())){
                    // 从父节点删除子节点, 类似dom的api
                    node.getParentNode().removeChild(node);
                }
            }
            writeBack2Xml(document); 
        }
        
        // 打印所有元素节点的名称
        @Test
        public void testPrintAllElementsName() throws Exception, SAXException, IOException{
            Document document = getDocument();
            // 递归打印
            printAllElementsName(document);
        }
        
        public void printAllElementsName(Node node){
            // 打印本节点
            if(Node.ELEMENT_NODE==node.getNodeType()){
                System.out.println(node.getNodeName());
            }
            // 处理子节点
            NodeList childNodes = node.getChildNodes();
            for (int i = 0; i < childNodes.getLength(); i++) {  //递归出口: 循环完成
                Node item = childNodes.item(i);
                printAllElementsName(item);
            }
        }
    }

    SAX @JDK

    public class SaxTest {
    
        public static void main(String[] args) throws Exception, SAXException {
    
            SAXParserFactory factory = SAXParserFactory.newInstance();
            SAXParser parser = factory.newSAXParser();
            XMLReader reader = parser.getXMLReader();
            reader.setContentHandler(new MyDefaultHandler());
            reader.parse("src/book.xml");
        }
    }
    
    class MyDefaultHandler extends DefaultHandler {
    
        // 作为被调用方, 流程不是这边控制, 所以定义状态位或计数器来标记流程进行的位置
        private boolean isPrice = false;
        private int count =1;
        
        @Override
        public void startElement(String uri, String localName, String qName,    // qName标签名
                Attributes attributes) throws SAXException {
            if ("售价".equals(qName)) {
                isPrice = true;
                count++;
            }
        }
    
        @Override
        public void endElement(String uri, String localName, String qName)
                throws SAXException {
            if ("售价".equals(qName)) {
                isPrice = false;
            }
        }
    
        // 读到文本了
        @Override
        public void characters(char[] ch, int start, int length)
                throws SAXException {
            if (isPrice&&count==3) {
                System.out.println("文本: " + new String(ch, start, length));
            }
        }
    }

    Dom4J

    public class Dom4JTest {
        
        private Document getDocument() throws DocumentException {
            // 获得 代表 xml 文件的 document 对象 
            SAXReader reader = new SAXReader();
            Document document = reader.read("src/book.xml");
            return document;
        }
        
        private void wirteBack2Xml(Document document) throws UnsupportedEncodingException, FileNotFoundException, IOException {
            OutputFormat format = OutputFormat.createPrettyPrint();
            format.setEncoding("UTF-8");
            XMLWriter writer = new XMLWriter(new FileOutputStream("src/book.xml"), format);
            writer.write(document);
            writer.close();
        }
        
        // 读取节点的文本内容
        // 由于 dom4j采用sax方式去解析的xml文档, 所以dom4j不能直接获得深层次的某个节点, 需要一级级获得
        @Test 
        public void testReadContent() throws Exception{
            
            Document document = getDocument();
            
            Element rootElement = document.getRootElement();
            Element firstLevelElement = rootElement.element("ele1");
            Element secondLevelElement = firstLevelElement.element("ele2");
            
            String value = secondLevelElement.getText();
            System.out.println(value);
        }
    
        // 读取属性值 
        @Test
        public void testReadAttribute() throws Exception{
            
            Document document = getDocument();
            
            Element rootElement = document.getRootElement();
            
            List<Element> list = rootElement.elements("书");
            Element secondElement = list.get(1);
            Attribute attribute = secondElement.attribute("出版社");
            
            String value = attribute.getValue();
            System.out.println(value);
        }
        
        // 添加节点 
        @Test
        public void testAddPrice() throws Exception{
            
            Document document = getDocument();
            
            Element rootElement = document.getRootElement();
            Element secondBookElement = (Element) rootElement.elements("书").get(1);
            // 创建新节点
            secondBookElement.addElement("newEle").setText("this is new Element");
            
            wirteBack2Xml(document);
        }
    
        
        // 删除节点
        @Test
        public void testDeletePrice() throws Exception{
            
            Document document = getDocument();
            Element rootElement = document.getRootElement();
            Element secondBookElement = (Element) rootElement.elements("书").get(1);
            Element targetBookPrice = (Element) secondBookElement.elements("售价").get(0);
            
            // 拿到父节点, 然后删除这个子节点 
            targetBookPrice.getParent().remove(targetBookPrice);
            wirteBack2Xml(document);
        }
    }
  • 相关阅读:
    购买成熟软件产品后的二次开发的问题
    outlook2010如何导入csv的通讯录?
    导入Excel数据时对数据校验提示方法
    系统开发中存储过程使用的优势和劣势
    FCKeditor.Net_2.5的使用
    [正则表达式]如何高亮显示搜索关键字
    国外网站模板网址集锦
    _NET 下 FCKeditor_2_5_1上传图片的配置
    用属性模拟多继承机制
    FCKeditor 2.6在ASP.NET中的配置方法
  • 原文地址:https://www.cnblogs.com/myJavaEE/p/6685361.html
Copyright © 2011-2022 走看看