此工程用途:将xml同级属性/子节点按字母序排列重新输出.
源码下载: https://files.cnblogs.com/files/heyang78/XmlAnalyzer-20200526-1.zip
核心类:
Token,此类用于将XML文件中的文本分类:
package com.heyang; public class Token { public final static int TYPE_OPEN_ANGLEBRACKET=0; // < public final static int TYPE_CLOSE_ANGLEBRACKET=1;// > public final static int TYPE_slant =2; // / public final static int TYPE_TEXT=3; // text public final static int TYPE_EQUAL =4; // = public final static int TYPE_EMPTY_CLOSE =5; // /> public final static int TYPE_END_OPEN =6; // </ private int type; private String text; private int index;// Used to remember location public Token(char c,int type) { this.text=String.valueOf(c); this.type=type; } public Token(String word,int type) { this.text=word; this.type=type; } public String toString() { return String.format("token(text=%s,type=%d,index=%d)", text,type,index); } public int getType() { return type; } public void setType(int type) { this.type = type; } public String getText() { return text; } public void setText(String text) { this.text = text; } public int getIndex() { return index; } public void setIndex(int index) { this.index = index; } }
Lexer,此类用于分词:
package com.heyang; import java.util.ArrayList; import java.util.List; import org.apache.commons.lang.StringUtils; public class Lexer { private List<Token> tokens; public Lexer(String inputTxt) { tokens = new ArrayList<Token>(); String bundle = ""; for (int i = 0; i < inputTxt.length(); i++) { char c = inputTxt.charAt(i); if (Character.isWhitespace(c)) { if (StringUtils.isNotEmpty(bundle.trim())) { addText2Tokens(bundle); bundle = ""; } continue; } else if (c == '<') { int next=i+1; if(next<inputTxt.length() && inputTxt.charAt(next)=='/') { addText2Tokens(bundle); bundle=""; tokens.add(new Token("</",Token.TYPE_END_OPEN)); i++; }else { tokens.add(new Token(c, Token.TYPE_OPEN_ANGLEBRACKET)); } } else if (c == '>') { if (StringUtils.isNotEmpty(bundle)) { addText2Tokens(bundle); bundle = ""; } tokens.add(new Token(c, Token.TYPE_CLOSE_ANGLEBRACKET)); }else if (c == '=') { if (StringUtils.isNotEmpty(bundle)) { addText2Tokens(bundle); bundle = ""; } tokens.add(new Token(c, Token.TYPE_EQUAL)); } else if (c == '/') { int next=i+1; if(next<inputTxt.length() && inputTxt.charAt(next)=='>') { addText2Tokens(bundle); bundle=""; tokens.add(new Token("/>",Token.TYPE_EMPTY_CLOSE)); i++; }else { tokens.add(new Token(c, Token.TYPE_slant)); } } else if(c == '"') { int idx=i+1; while(idx<inputTxt.length()) { char cEnd = inputTxt.charAt(idx); if (cEnd == '"') { break; } idx++; } String sub=inputTxt.substring(i, idx+1); tokens.add(new Token(sub, Token.TYPE_TEXT)); i=idx; } else { bundle += c; } } setTokenIndexes(); } private boolean addText2Tokens(String text) { if(StringUtils.isNotEmpty(text)) { tokens.add(new Token(text, Token.TYPE_TEXT)); return true; }else { return false; } } public void setTokenIndexes() { int idx = 0; for (Token t : tokens) { idx++; t.setIndex(idx); } } public void printTokens() { int idx = 0; for (Token t : tokens) { idx++; t.setIndex(idx); System.out.println("#" + idx + " " + t.getText()); } } public String getCompactJsonTxt() { StringBuilder sb=new StringBuilder(); for (Token t : tokens) { sb.append(t.getText()+" "); } return sb.toString(); } public List<Token> getTokens() { return tokens; } }
Node,此类代表一个xml节点:
package com.heyang; import java.util.ArrayList; import java.util.Collections; import java.util.List; public class Node implements Comparable<Node>{ private String text; private String name; private List<Node> children; private List<Property> proterties; private int depth=0; public int compareTo(Node another) { return this.name.compareTo(another.name); } public void addChild(Node n) { if(children==null) { children=new ArrayList<Node>(); } children.add(n); adjustDepth(); } private void adjustDepth() { if(children==null) { return; } for(Node json:children) { json.depth=this.depth+1; json.adjustDepth(); } } public void addProperty(Property p) { if(proterties==null) { proterties=new ArrayList<Property>(); } proterties.add(p); } public String toString() { String tabs=getIndentSpace(); StringBuilder sb=new StringBuilder(); sb.append(tabs); sb.append("<"+name); if(proterties!=null) { Collections.sort(proterties); for(Property p:proterties) { sb.append(" "+p.getName()+"="+p.getValue()); } } if(text==null && children==null) { sb.append("/>"); return sb.toString(); }else { sb.append(">"); } if(text!=null) { sb.append(text); } if(children!=null) { Collections.sort(children); for(Node child:children) { sb.append(" "); sb.append(child); } } if(children!=null) { sb.append(" "+tabs+"</"+name+">"); }else { sb.append("</"+name+">"); } return sb.toString(); } private String getIndentSpace() { return String.join("", Collections.nCopies(this.depth, " ")); } public String getText() { return text; } public void setText(String text) { this.text = text; } public String getName() { return name; } public void setName(String name) { this.name = name; } public List<Node> getChildren() { return children; } public void setChildren(List<Node> children) { this.children = children; } public List<Property> getProterties() { return proterties; } public void setProterties(List<Property> proterties) { this.proterties = proterties; } }
property,此类代表xml的属性:
package com.heyang; public class Property implements Comparable<Property>{ private String name; private String value; public int compareTo(Property another) { return this.name.compareTo(another.name); } public Property(String name,String value) { this.name=name; this.value=value; } public String getName() { return name; } public void setName(String name) { this.name = name; } public String getValue() { return value; } public void setValue(String value) { this.value = value; } }
TreeBuilder,此类用于自顶向下构建一棵树:
package com.heyang; import java.util.List; public class TreeBuilder { private Node root; private List<Token> tokens; private int tokenIdx; public TreeBuilder(List<Token> tokens) throws Exception{ this.tokens=tokens; this.tokenIdx=0; root=new Node(); parseNode(root); } private void parseNode(Node parent) throws Exception{ Token token; token=fetchToken(); if(token.getType()!=Token.TYPE_OPEN_ANGLEBRACKET) { throw new Exception("Expected:'<' actual:"+token.getText()+" "+token); } token=fetchToken(); if(token.getType()!=Token.TYPE_TEXT) { throw new Exception("Expected:text actual:"+token.getText()+" "+token); } // get node name parent.setName(token.getText()); // get properties for(;;) { token=fetchToken(); if(token.getType()!=Token.TYPE_TEXT) { // 不满足属性条件,退回并退出 returnToken(); break; } String name=token.getText(); // = token=fetchToken(); if(token.getType()!=Token.TYPE_EQUAL) { throw new Exception("Expected:= actual:"+token.getText()+" "+token); } token=fetchToken(); if(token.getType()!=Token.TYPE_TEXT) { throw new Exception("Expected:= actual:"+token.getText()+" "+token); } String value=token.getText(); parent.addProperty(new Property(name,value)); } token=fetchToken(); if(token.getType()==Token.TYPE_EMPTY_CLOSE) { // 节点结束,无子节点,无文本 return; }else if(token.getType()==Token.TYPE_CLOSE_ANGLEBRACKET) { // 存在子节点或文本,继续向下 }else { // 未正常结束,抛出异常 throw new Exception("Expected:'>' actual:"+token.getText()+" "+token); } // 取文本或子节点 for(;;) { token=fetchToken(); if(token.getType()==Token.TYPE_TEXT) { // 取得文本 parent.setText(token.getText()); }else if(token.getType()==Token.TYPE_OPEN_ANGLEBRACKET) { // TODO:取子节点,加子节点,递归向下 Node child=new Node(); parent.addChild(child); returnToken(); parseNode(child); }else { // 不满足属性条件,退回并退出 returnToken(); break; } } token=fetchToken(); if(token.getType()!=Token.TYPE_END_OPEN) { throw new Exception("Expected:'</' actual:"+token.getText()+" "+token); } token=fetchToken(); if(token.getType()!=Token.TYPE_TEXT) { throw new Exception("Expected:text actual:"+token.getText()+" "+token); } String name=token.getText(); if(!name.equals(parent.getName())) { throw new Exception("Expected node name:"+parent.getName()+" actual:"+name+" "+token); } token=fetchToken(); if(token.getType()!=Token.TYPE_CLOSE_ANGLEBRACKET) { throw new Exception("Expected:'>' actual:"+token.getText()+" "+token); } } private Token fetchToken() { if(tokenIdx>=tokens.size()) { return null; }else { Token t=tokens.get(tokenIdx); tokenIdx++; return t; } } private void returnToken() { if(tokenIdx>0) { tokenIdx--; } } public Node getRoot() { return root; } }
最后整合调用:
package com.heyang; import com.heyang.util.BracketChecker; import com.heyang.util.CommonUtil; import com.heyang.util.Renderer; public class EntryPoint { public static void main(String[] args) { try { // Read context from file String jsonTxt=CommonUtil.readTextFromFile("C:\hy\files\xml\01.xml"); System.out.println("原文="+jsonTxt); // Is brackets balanced BracketChecker checker=new BracketChecker(); boolean isBalanced=checker.isBalanced(jsonTxt); if(isBalanced==false) { System.out.println(Renderer.paintBrown(checker.getErrMsg())); return; } // Parse json to tokens Lexer lex=new Lexer(jsonTxt); //System.out.println("紧缩文本="+lex.getCompactJsonTxt()); //lex.printTokens(); // Build tree TreeBuilder builder=new TreeBuilder(lex.getTokens()); Node root=builder.getRoot(); System.out.println("整形后文本: "+root); }catch(Exception ex) { System.out.println(Renderer.paintBrown(ex.getMessage())); ex.printStackTrace(); } } }
整形效果:
原文=<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd"> <modelVersion>4.0.0</modelVersion> <groupId>com.heyang</groupId> <artifactId>XmlAnalyzer</artifactId> <version>1.00</version> <dependencies> <dependency> <groupId>ch.qos.logback</groupId> <artifactId>logback-classic</artifactId> <version>1.1.11</version> </dependency> <dependency> <groupId>ch.qos.logback</groupId> <artifactId>logback-core</artifactId> <version>1.1.11</version> </dependency> <dependency> <groupId>commons-lang</groupId> <artifactId>commons-lang</artifactId> <version>2.6</version> </dependency> </dependencies></project> 整形后文本: <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd"> <artifactId>XmlAnalyzer</artifactId> <dependencies> <dependency> <artifactId>logback-classic</artifactId> <groupId>ch.qos.logback</groupId> <version>1.1.11</version> </dependency> <dependency> <artifactId>logback-core</artifactId> <groupId>ch.qos.logback</groupId> <version>1.1.11</version> </dependency> <dependency> <artifactId>commons-lang</artifactId> <groupId>commons-lang</groupId> <version>2.6</version> </dependency> </dependencies> <groupId>com.heyang</groupId> <modelVersion>4.0.0</modelVersion> <version>1.00</version> </project>
2020-5-22 解析算术表达式
2020-5-25 解析Json
2002-5-26 解析XML
感觉编译器/解释器的路越走越宽了.
--2020年5月26日--