zoukankan      html  css  js  c++  java
  • XmlAnalyzer1.00 源码

    此工程用途:将xml同级属性/子节点按字母序排列重新输出.

    源码下载: https://files.cnblogs.com/files/heyang78/XmlAnalyzer-20200526-1.zip

    核心类:

    Token,此类用于将XML文件中的文本分类:

    package com.heyang;
    
    public class Token {
        public final static int TYPE_OPEN_ANGLEBRACKET=0; // <
        public final static int TYPE_CLOSE_ANGLEBRACKET=1;// >
        public final static int TYPE_slant =2;              // /
        public final static int TYPE_TEXT=3;              // text
        public final static int TYPE_EQUAL =4;              // =
        public final static int TYPE_EMPTY_CLOSE =5;      // />
        public final static int TYPE_END_OPEN =6;          // </
        
        private int type;
        private String text;
        private int index;// Used to remember location
        
        public Token(char c,int type) {
            this.text=String.valueOf(c);
            this.type=type;
        }
        
        public Token(String word,int type) {
            this.text=word;
            this.type=type;
        }
        
        public String toString() {
            return String.format("token(text=%s,type=%d,index=%d)", text,type,index);
        }
    
        public int getType() {
            return type;
        }
    
        public void setType(int type) {
            this.type = type;
        }
    
        public String getText() {
            return text;
        }
    
        public void setText(String text) {
            this.text = text;
        }
    
        public int getIndex() {
            return index;
        }
    
        public void setIndex(int index) {
            this.index = index;
        }
    }

    Lexer,此类用于分词:

    package com.heyang;
    
    import java.util.ArrayList;
    import java.util.List;
    
    import org.apache.commons.lang.StringUtils;
    
    public class Lexer {
        private List<Token> tokens;
        
        public Lexer(String inputTxt) {
            tokens = new ArrayList<Token>();
    
            String bundle = "";
            for (int i = 0; i < inputTxt.length(); i++) {
                char c = inputTxt.charAt(i);
    
                if (Character.isWhitespace(c)) {
                    if (StringUtils.isNotEmpty(bundle.trim())) {
                        addText2Tokens(bundle);
                        bundle = "";
                    }
                    
                    continue;
                } else if (c == '<') {
                    int next=i+1;
                    if(next<inputTxt.length() && inputTxt.charAt(next)=='/') {
                        addText2Tokens(bundle);
                        bundle="";
                        tokens.add(new Token("</",Token.TYPE_END_OPEN));
                        i++;
                    }else {
                        tokens.add(new Token(c, Token.TYPE_OPEN_ANGLEBRACKET));
                    }
                    
                } else if (c == '>') {
                    if (StringUtils.isNotEmpty(bundle)) {
                        addText2Tokens(bundle);
                        bundle = "";
                    }
    
                    tokens.add(new Token(c, Token.TYPE_CLOSE_ANGLEBRACKET));
                }else if (c == '=') {
                    if (StringUtils.isNotEmpty(bundle)) {
                        addText2Tokens(bundle);
                        bundle = "";
                    }
    
                    tokens.add(new Token(c, Token.TYPE_EQUAL));
                }  else if (c == '/') {
                    int next=i+1;
                    if(next<inputTxt.length() && inputTxt.charAt(next)=='>') {
                        addText2Tokens(bundle);
                        bundle="";
                        tokens.add(new Token("/>",Token.TYPE_EMPTY_CLOSE));
                        i++;
                    }else {
                        tokens.add(new Token(c, Token.TYPE_slant));
                    }
                    
                } else if(c == '"') {
                    int idx=i+1;
                    
                    while(idx<inputTxt.length()) {
                        char cEnd = inputTxt.charAt(idx);
                        
                        if (cEnd == '"') {
                            break;
                        }
                        
                        idx++;
                    }
                    
                    String sub=inputTxt.substring(i, idx+1);
                    tokens.add(new Token(sub, Token.TYPE_TEXT));
                    i=idx;
                } else {
                    bundle += c;
                }
            }
            
            setTokenIndexes();
        }
        
        private boolean addText2Tokens(String text) {
            if(StringUtils.isNotEmpty(text)) {
                tokens.add(new Token(text, Token.TYPE_TEXT));
                return true;
            }else {
                return false;
            }
        }
    
        public void setTokenIndexes() {
            int idx = 0;
            for (Token t : tokens) {
                idx++;
                t.setIndex(idx);
            }
        }
    
        public void printTokens() {
            int idx = 0;
            for (Token t : tokens) {
                idx++;
                t.setIndex(idx);
                System.out.println("#" + idx + " " + t.getText());
            }
        }
        
        public String getCompactJsonTxt() {
            StringBuilder sb=new StringBuilder();
            
            for (Token t : tokens) {
                sb.append(t.getText()+" ");
            }
            
            return sb.toString();
        }
        
        public List<Token> getTokens() {
            return tokens;
        }
    }

    Node,此类代表一个xml节点:

    package com.heyang;
    
    import java.util.ArrayList;
    import java.util.Collections;
    import java.util.List;
    
    public class Node implements Comparable<Node>{
        private String text;
        private String name;
        private List<Node> children;
        private List<Property> proterties;
        private int depth=0;
        
        public int compareTo(Node another) {
            return this.name.compareTo(another.name);
        }
        
        public void addChild(Node n) {
            if(children==null) {
                children=new ArrayList<Node>();
            }
            
            children.add(n);
            adjustDepth();
        }
        
        private void adjustDepth() {
            if(children==null) {
                return;
            }
            for(Node json:children) {
                json.depth=this.depth+1;
                json.adjustDepth();
            }
        }
        
        public void addProperty(Property p) {
            if(proterties==null) {
                proterties=new ArrayList<Property>();
            }
            
            proterties.add(p);
        }
        
        public String toString() {
            String tabs=getIndentSpace();
            
            StringBuilder sb=new StringBuilder();
            sb.append(tabs);
            
            sb.append("<"+name);
            if(proterties!=null) {
                Collections.sort(proterties);
                
                for(Property p:proterties) {
                    sb.append(" "+p.getName()+"="+p.getValue());
                }
            }
            
            if(text==null && children==null) {
                sb.append("/>");
                return sb.toString();
            }else {
                sb.append(">");
            }
            
            if(text!=null) {
                sb.append(text);
            }
            
            if(children!=null) {
                
                
                Collections.sort(children);
                for(Node child:children) {
                    sb.append("
    ");
                    sb.append(child);
                }
            }
            
            if(children!=null) {
                sb.append("
    "+tabs+"</"+name+">");
            }else {
                sb.append("</"+name+">");
            }
            
            return sb.toString();
        }
        
        private String getIndentSpace() {
            return String.join("", Collections.nCopies(this.depth, "    "));
        }
        
        public String getText() {
            return text;
        }
        public void setText(String text) {
            this.text = text;
        }
        public String getName() {
            return name;
        }
        public void setName(String name) {
            this.name = name;
        }
        public List<Node> getChildren() {
            return children;
        }
        public void setChildren(List<Node> children) {
            this.children = children;
        }
        public List<Property> getProterties() {
            return proterties;
        }
        public void setProterties(List<Property> proterties) {
            this.proterties = proterties;
        }
    }

    property,此类代表xml的属性:

    package com.heyang;
    
    public class Property implements Comparable<Property>{
        private String name;
        private String value;
        
         public int compareTo(Property another) {
            return this.name.compareTo(another.name);
        }
        
        public Property(String name,String value) {
            this.name=name;
            this.value=value;
        }
        
        public String getName() {
            return name;
        }
        public void setName(String name) {
            this.name = name;
        }
        public String getValue() {
            return value;
        }
        public void setValue(String value) {
            this.value = value;
        }
    }

    TreeBuilder,此类用于自顶向下构建一棵树:

    package com.heyang;
    
    import java.util.List;
    
    public class TreeBuilder {
        private Node root;
        private List<Token> tokens;
        private int tokenIdx;
        
        public TreeBuilder(List<Token> tokens) throws Exception{
            this.tokens=tokens;
            this.tokenIdx=0;
            
            root=new Node();
            parseNode(root);
        }
        
        private void parseNode(Node parent) throws Exception{
            Token token;
            
            token=fetchToken();
            if(token.getType()!=Token.TYPE_OPEN_ANGLEBRACKET) {
                throw new Exception("Expected:'<' actual:"+token.getText()+" "+token);
            }
    
            token=fetchToken();
            if(token.getType()!=Token.TYPE_TEXT) {
                throw new Exception("Expected:text actual:"+token.getText()+" "+token);
            }
            
            // get node name
            parent.setName(token.getText());
            
            // get properties
            for(;;) {
                token=fetchToken();
                if(token.getType()!=Token.TYPE_TEXT) {
                    // 不满足属性条件,退回并退出
                    returnToken();
                    break;
                }
                
                String name=token.getText();
                
                // =
                token=fetchToken();
                if(token.getType()!=Token.TYPE_EQUAL) {
                    throw new Exception("Expected:= actual:"+token.getText()+" "+token);
                }
                
                token=fetchToken();
                if(token.getType()!=Token.TYPE_TEXT) {
                    throw new Exception("Expected:= actual:"+token.getText()+" "+token);
                }
                
                String value=token.getText();
                
                parent.addProperty(new Property(name,value));
            }
            
            
            token=fetchToken();
            if(token.getType()==Token.TYPE_EMPTY_CLOSE) {
                // 节点结束,无子节点,无文本
                return;
            }else if(token.getType()==Token.TYPE_CLOSE_ANGLEBRACKET) {
                // 存在子节点或文本,继续向下
            }else {
                // 未正常结束,抛出异常
                throw new Exception("Expected:'>' actual:"+token.getText()+" "+token);
            }
            
            // 取文本或子节点
            for(;;) {
                token=fetchToken();
                
                if(token.getType()==Token.TYPE_TEXT) {
                    // 取得文本
                    parent.setText(token.getText());
                }else if(token.getType()==Token.TYPE_OPEN_ANGLEBRACKET)  {
                    // TODO:取子节点,加子节点,递归向下
                    Node child=new Node();
                    parent.addChild(child);
                    
                    returnToken();
                    parseNode(child);
                }else {
                    // 不满足属性条件,退回并退出
                    returnToken();
                    break;
                }
            }
            
            token=fetchToken();
            if(token.getType()!=Token.TYPE_END_OPEN) {
                throw new Exception("Expected:'</' actual:"+token.getText()+" "+token);
            }
            
            token=fetchToken();
            if(token.getType()!=Token.TYPE_TEXT) {
                throw new Exception("Expected:text actual:"+token.getText()+" "+token);
            }
            
            String name=token.getText();
            if(!name.equals(parent.getName())) {
                throw new Exception("Expected node name:"+parent.getName()+" actual:"+name+" "+token);
            }
            
            token=fetchToken();
            if(token.getType()!=Token.TYPE_CLOSE_ANGLEBRACKET) {
                throw new Exception("Expected:'>' actual:"+token.getText()+" "+token);
            }
        }
        
        private Token fetchToken() {
            if(tokenIdx>=tokens.size()) {
                return null;
            }else {
                Token t=tokens.get(tokenIdx);
                tokenIdx++;
                return t;
            }        
        }
        
        private void returnToken() {
            if(tokenIdx>0) {
                tokenIdx--;
            }
        }
        
        public Node getRoot() {
            return root;
        }
    }

    最后整合调用:

    package com.heyang;
    
    import com.heyang.util.BracketChecker;
    import com.heyang.util.CommonUtil;
    import com.heyang.util.Renderer;
    
    public class EntryPoint {
        public static void main(String[] args) {
            try {
                // Read context from file
                String jsonTxt=CommonUtil.readTextFromFile("C:\hy\files\xml\01.xml");
                System.out.println("原文="+jsonTxt);
                
                // Is brackets balanced
                BracketChecker checker=new BracketChecker();
                boolean isBalanced=checker.isBalanced(jsonTxt);
                if(isBalanced==false) {
                    System.out.println(Renderer.paintBrown(checker.getErrMsg()));
                    return;
                }
                
                // Parse json to tokens
                Lexer lex=new Lexer(jsonTxt);
                //System.out.println("紧缩文本="+lex.getCompactJsonTxt());
                //lex.printTokens();
                
                // Build tree
                TreeBuilder builder=new TreeBuilder(lex.getTokens());
                Node root=builder.getRoot();
                System.out.println("整形后文本:
    "+root);
            }catch(Exception ex) {
                System.out.println(Renderer.paintBrown(ex.getMessage()));
                ex.printStackTrace();
            }
        }
    }

    整形效果:

    原文=<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd">  <modelVersion>4.0.0</modelVersion>  <groupId>com.heyang</groupId>  <artifactId>XmlAnalyzer</artifactId>  <version>1.00</version>    <dependencies>        <dependency>            <groupId>ch.qos.logback</groupId>            <artifactId>logback-classic</artifactId>            <version>1.1.11</version>        </dependency>        <dependency>            <groupId>ch.qos.logback</groupId>            <artifactId>logback-core</artifactId>            <version>1.1.11</version>        </dependency>                <dependency>            <groupId>commons-lang</groupId>            <artifactId>commons-lang</artifactId>            <version>2.6</version>        </dependency>    </dependencies></project>
    整形后文本:
    <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd">
        <artifactId>XmlAnalyzer</artifactId>
        <dependencies>
            <dependency>
                <artifactId>logback-classic</artifactId>
                <groupId>ch.qos.logback</groupId>
                <version>1.1.11</version>
            </dependency>
            <dependency>
                <artifactId>logback-core</artifactId>
                <groupId>ch.qos.logback</groupId>
                <version>1.1.11</version>
            </dependency>
            <dependency>
                <artifactId>commons-lang</artifactId>
                <groupId>commons-lang</groupId>
                <version>2.6</version>
            </dependency>
        </dependencies>
        <groupId>com.heyang</groupId>
        <modelVersion>4.0.0</modelVersion>
        <version>1.00</version>
    </project>

    2020-5-22 解析算术表达式

    2020-5-25 解析Json

    2002-5-26 解析XML

    感觉编译器/解释器的路越走越宽了.

    --2020年5月26日--

  • 相关阅读:
    MVVM
    vue-cli初始化项目2.x|3.x
    逻辑覆盖
    white box白盒测试
    black box黑盒测试
    总结回顾js arr的常见方法以及相关的使用场景(一)
    js 原生功底 (一)
    markdown 语法总结(一)
    阿里一面,面试官想看到的究竟是什么,带你揭秘!!!!
    关于Axios 源码你想了解的 在这儿
  • 原文地址:https://www.cnblogs.com/heyang78/p/12964733.html
Copyright © 2011-2022 走看看