即去掉XML中所有<>及包含的内容
import org.dom4j.Document; import org.dom4j.DocumentException; import org.dom4j.io.SAXReader; public class congTools { static Document doc; static String content; public congTools(String filename){ SAXReader reader = new SAXReader(); try { doc = reader.read(filename); } catch (DocumentException e) { e.printStackTrace(); } content=doc.getRootElement().asXML(); } public int getLength(String content){ String result=""; String[] results=content.split("<.*?>"); for(String a:results){ result+=a; } return result.length(); } /** * @param args */ public static void main(String[] args) { // TODO Auto-generated method stub congTools cl=new congTools("src/00000000000052735781.xml"); int len=cl.getLength(cl.content); System.out.println(len); } }
split("<.*?>")表示,按以<开头,以>结尾的字符串作为分隔符