在网上找了很多关于解析超大xml的例子,都说java再带的jar包中有相关的SAXparse类来解析xml,但是试过了好多次,之后还是不行,还有dom4j.jar等等,都不能解析太多条数的xml,大概超过30M,就会解析报错。
不过偶尔看到过xercesImpl.jar,sax2.jar,jaxen-1.1.1.jar
import java.io.IOException; import javax.xml.parsers.ParserConfigurationException; import javax.xml.parsers.SAXParser; import javax.xml.parsers.SAXParserFactory; import java.io.File; import org.xml.sax.SAXException; public class SAX { public static void main(String[] args) { try { SAXParserFactory factory=SAXParserFactory.newInstance(); factory.setNamespaceAware(true); factory.setValidating(true); SAXParser parser=factory.newSAXParser(); SAXparse p1=new SAXparse(); parser.parse(new File("D:\\dblp.xml"), p1); } catch (ParserConfigurationException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (SAXException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } } }
或者
import java.io.FileWriter; import java.io.IOException; import java.io.InputStream; import java.util.Iterator; import java.util.List; import org.dom4j.Document; import org.dom4j.DocumentException; import org.dom4j.Element; import org.dom4j.io.SAXReader; /** * */ public class XMLParse { private String configName = "dblp_little.xml"; private SAXReader saxReader; private Document doc; private Element root; /** */ public XMLParse() { // InputStream in = Thread.currentThread().getContextClassLoader() // .getResourceAsStream(configName); saxReader = new SAXReader(); try { doc = saxReader.read(configName); } catch (DocumentException e) { e.printStackTrace(); } root = doc.getRootElement(); } /** * get Data Type * * @throws IOException */ public void getModelElement(String attribute) { FileWriter fileWriter = null; try { fileWriter = new FileWriter(attribute + ".txt"); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } List list = root.elements(); Element model = null; List childList = null; Element modelEle = null; Element returnModel = null; String dataType = null; StringBuffer sb = new StringBuffer(); int temp = 0; for (Iterator it = list.iterator(); it.hasNext();) { model = (Element) it.next(); temp++; System.out.println("temp:"+temp); childList = model.elements(); for (Iterator ite = childList.iterator(); ite.hasNext();) { modelEle = (Element) ite.next(); if (attribute.equals(modelEle.getName())) { dataType = modelEle.getText(); dataType = dataType; if (sb.length() > 1) { sb.append(","); } sb.append(dataType); } } dataType = sb.toString(); if (!"".equals(dataType)) { // 没有值的话,跳过往txt中写值 try { fileWriter.write(dataType); fileWriter.write("\r\n"); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } } sb.delete(0, sb.length()); try { fileWriter.flush(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } } try { // fileWriter.flush(); fileWriter.close(); System.out.println("xml解析成功,并成功写入到"+attribute+".txt 文件中!"); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } System.out.println("list.size:"+list.size()); } public static void main(String[] args) { // TODO Auto-generated method stub String attribute = null; XMLParse parse = new XMLParse(); attribute = "author"; parse.getModelElement(attribute); } }