zoukankan      html  css  js  c++  java
  • DOC解析xml

    虽然DOC方式解析xml代码比较简洁,适合解析小的文件,但是大文件,还是建议使用SAX进行解析,解析速度不只是快10倍那么简单,简直是快百倍不止。

    text.xml文件

    <?xml version="1.0" standalone="yes"?>
    <RECORDS>
    <RECORD>
    <接收时间>2017/2/6 11:19:49</接收时间>
    <来源地址>19.128.116.99</来源地址>
    <源> 460c5401 Security@FLOW: UDP flood attack:丢弃!trust::ethernet0/8 19.129.142.104</源>
    <目地>180.154.15.198</目地>
    </RECORD>
    <RECORD>
    <接收时间>2017/2/6 11:19:49</接收时间>
    <来源地址>19.128.116.99</来源地址>
    <源> 460c5403 Security@FLOW: UDP flood attack:丢弃!trust::ethernet0/8 19.130.4.8</源>
    <目地>19.16.25.120。发生了168次(在前43秒内)。</目地>
    </RECORD>

    <RECORDS>

    1.DOC方式解析

    package cn.mym.sysi.logread;


    import java.io.File;
    import java.io.Serializable;
    import java.text.SimpleDateFormat;
    import java.util.ArrayList;
    import java.util.Date;
    import java.util.HashMap;
    import java.util.List;
    import java.util.Map;

    import javax.xml.parsers.DocumentBuilder;
    import javax.xml.parsers.DocumentBuilderFactory;

    import org.w3c.dom.Document;
    import org.w3c.dom.NodeList;

    public class XML_LOG_READ_IN implements Serializable {

    /**
    * XML内网日志解析
    */
    private static final long serialVersionUID = -2653100457283598999L;
    public static List<Map<String, String>> xmlToString(File file) {
    List<Map<String, String>> list = new ArrayList<Map<String, String>>();
    try {

    DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance();
    DocumentBuilder docBuilder = dbFactory.newDocumentBuilder();
    Document doc = docBuilder.parse(file);
    NodeList nodeList = doc.getElementsByTagName("RECORD");
    for(int i=0; i< nodeList.getLength(); i++){
    Map<String, String> map = new HashMap<String, String>();
    String receiveTime = doc.getElementsByTagName("接收时间").item(i).getFirstChild().getNodeValue();
    String time = receiveTime.replace("/", "-");
    SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
    Date date = sdf.parse(time);
    map.put("time", sdf.format(date));
    String Ip = doc.getElementsByTagName("来源地址").item(i).getFirstChild().getNodeValue();
    map.put("ip", Ip);
    String source = doc.getElementsByTagName("源").item(i).getFirstChild().getNodeValue();
    // mac地址
    String mac = source.substring(0, 9).trim();
    map.put("mac", mac);
    // 攻击类型
    String[] str = source.split("Security@FLOW:")[1].split(":");
    String attackType = str[0].trim();
    map.put("attackType", attackType);
    // 交换机端口
    String[] str2 = source.split("trust::")[1].split(" ");
    String ethernet = str2[0].trim();
    map.put("ethernet", ethernet);
    // 源IP
    String[] str3 = str2[1].trim().split(" ");
    String sourceIp = str3[0].trim();
    map.put("sourceIp", sourceIp);
    String target = doc.getElementsByTagName("目地").item(i).getFirstChild().getNodeValue();
    String str4[];
    String s1;
    String targetIp;
    String attackNum = null;
    if(target.contains("‚")){
    str4 = target.trim().split("‚");
    s1 = str4[0].trim();
    targetIp = s1.substring(0, s1.length()-2);
    String[] s2 =str4[1].split("†")[1].split("¡");
    attackNum = s2[0].substring(0, s2[0].length()-2);
    }else{
    s1 = target.trim();
    targetIp = s1;
    }
    map.put("targetIp", targetIp);
    map.put("attackNum", attackNum);
    list.add(map);
    }
    System.out.println("xml文件解析完成");
    }catch(Exception e) {
    e.printStackTrace();
    }
    return list;
    }
    public static void main(String[] args) {
    List<Map<String, String>> list = new ArrayList<Map<String,String>>();
    File file = new File("D:\text.xml");
    long beginTime = System.currentTimeMillis();
    list = XML_LOG_READ_IN.xmlToString(file);
    long endTime = System.currentTimeMillis();
    System.out.println("解析完成,时间:"+(endTime-beginTime)/100+"秒");
    }
    }

    2.SAX方式解析xml

    public class InnerXmlSAXToString extends DefaultHandler {
    List<Map<String, String>> list = new ArrayList<Map<String, String>>();
    Map<String, String> map = new HashMap<String, String>();
    private String preTag = null;//作用是记录解析时的上一个节点名称
    public static void main(String[] args) {
    int size = 0;
    long beginTime = System.currentTimeMillis();
    try {
    File file = new File("D:\text.xml");
    InputSource inputSource = new InputSource(new FileInputStream(file));
    inputSource.setEncoding("UTF-8");
    InnerXmlSAXToString reader = new InnerXmlSAXToString();
    List<Map<String, String>> list = reader.getList(inputSource);
    size = list.size();
    if(list !=null&& list.size()>0){
    for(Map<String, String> obj:list){
    System.out.println(obj.get("time")+","+obj.get("ip")+","+obj.get("mac"));
    }
    }
    } catch (Exception e) {
    e.printStackTrace();
    }
    long endTime = System.currentTimeMillis();
    long time = endTime-beginTime;
    System.out.println("时间:"+time/1000+"秒"+size);
    }
    public List<Map<String, String>> getList(InputSource inputSource) throws Exception{
    SAXParserFactory sf = SAXParserFactory.newInstance();
    SAXParser sp = sf.newSAXParser();
    InnerXmlSAXToString reader = new InnerXmlSAXToString();
    sp.parse(inputSource, reader);

    return reader.getList();
    }

    public List<Map<String, String>> getList(){
    return list;
    }
    @Override
    public void startDocument() throws SAXException {
    list = new ArrayList<Map<String, String>>();
    }
    @Override
    public void characters(char ch[], int start, int length) throws SAXException {
    if(preTag!=null){
    try {
    if (preTag.equals("接收时间")) {
    String receiveTime = new String(ch, start, length);
    //System.out.print(receiveTime);
    String time = receiveTime.replace("/", "-");
    SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
    Date date = sdf.parse(time);
    //System.out.println("接收时间"+sdf.format(date));
    map.put("time", sdf.format(date));
    //list.add(map);
    }
    if (preTag.equals("来源地址")) {
    String Ip = new String(ch, start, length);
    //System.out.println("来源地址:" + Ip);
    map.put("ip", Ip);
    //list.add(map);
    }
    if (preTag.equals("源")) {
    String source = new String(ch, start, length);
    // mac地址
    String mac = source.substring(0, 9).trim();
    map.put("mac", mac);

    // 攻击类型
    String[] str = source.split("Security@FLOW:")[1].split(":");
    String attackType = str[0].trim();
    map.put("attackType", attackType);
    // 交换机端口
    String[] str2 = source.split("trust::")[1].split(" ");
    String ethernet = str2[0].trim();
    map.put("ethernet", ethernet);
    // 源IP
    String[] str3 = str2[1].trim().split(" ");
    String sourceIp = str3[0].trim();
    map.put("sourceIp", sourceIp);
    //list.add(map);
    }
    if (preTag.equals("目地")) {
    String target = new String(ch, start, length);
    String str4[];
    String s1;
    String targetIp;
    String attackNum = null;
    if(target.contains("‚")){
    str4 = target.trim().split("‚");
    s1 = str4[0].trim();
    targetIp = s1.substring(0, s1.length()-2);
    String[] s2 =str4[1].split("†")[1].split("¡");
    attackNum = s2[0].substring(0, s2[0].length()-2);
    }else{
    s1 = target.trim();
    targetIp = s1;
    }
    map.put("targetIp", targetIp);
    map.put("attackNum", attackNum);
    //list.add(map);
    }
    } catch (Exception e) {
    e.printStackTrace();
    }
    }
    }
    @Override
    public void startElement(String uri,String localName,String qName,Attributes attrs) {
    //tags.push(qName);
    if("RECORD".equals(qName)){
    map = new HashMap<String, String>();
    }
    preTag = qName;
    }
    @Override
    public void endElement(String uri, String localName, String qName)
    throws SAXException {
    if("RECORD".equals(qName)){
    list.add(map);
    // System.out.println(qName+list.size()+"------map:"+map.toString());
    }
    /*map.clear();*/
    preTag = null;/**当解析结束时置为空。这里很重要,例如,当图中画3的位置结束后,会调用这个方法
    ,如果这里不把preTag置为null,根据startElement(....)方法,preTag的值还是book,当文档顺序读到图
    中标记4的位置时,会执行characters(char[] ch, int start, int length)这个方法,而characters(....)方
    法判断preTag!=null,会执行if判断的代码,这样就会把空值赋值给book,这不是我们想要的。*/
    }
    public static String parserXml(File fileName) {

    try {
    DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance();
    DocumentBuilder docBuilder = dbFactory.newDocumentBuilder();
    Document doc = docBuilder.parse(fileName);
    NodeList nodeList = doc.getElementsByTagName("RECORD");
    //sax解析xml
    SAXParserFactory sf = SAXParserFactory.newInstance();
    SAXParser sp = sf.newSAXParser();
    InnerXmlSAXToString reader = new InnerXmlSAXToString();
    sp.parse(new InputSource("D:\text.xml"), reader);

    for(int i=0; i< nodeList.getLength(); i++){
    String receiveTime = doc.getElementsByTagName("接收时间").item(i).getFirstChild().getNodeValue();
    String time = receiveTime.replace("/", "-");
    SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
    Date date = sdf.parse(time);
    String Ip = doc.getElementsByTagName("来源地址").item(i).getFirstChild().getNodeValue();
    String source = doc.getElementsByTagName("源").item(i).getFirstChild().getNodeValue();
    // mac地址
    String mac = source.substring(0, 9).trim();
    // 攻击类型
    String[] str = source.split("Security@FLOW:")[1].split(":");
    String attackType = str[0].trim();
    // 交换机端口
    String[] str2 = source.split("trust::")[1].split(" ");
    String ethernet = str2[0].trim();
    // 源IP
    String[] str3 = str2[1].trim().split(" ");
    String sourceIp = str3[0].trim();
    String target = doc.getElementsByTagName("目地").item(i).getFirstChild().getNodeValue();
    String str4[];
    String s1;
    String targetIp;
    String attackNum = null;
    if(target.contains("‚")){
    str4 = target.trim().split("‚");
    s1 = str4[0].trim();
    targetIp = s1.substring(0, s1.length()-2);
    String[] s2 =str4[1].split("†")[1].split("¡");
    attackNum = s2[0].substring(0, s2[0].length()-2);
    }else{
    s1 = target.trim();
    targetIp = s1;
    }
    }
    } catch (Exception e) {
    e.printStackTrace();
    }
    return null;

    }
    }

    为了能保证编码问题。我选择使用InputSourcce,方便对编码进行设置。

    使用XMLReader解析

    // 1.新建一个工厂类SAXParserFactory

    SAXParserFactory factory =SAXParserFactory.newInstance();

    // 2.让工厂类产生一个SAX的解析类SAXParser

    SAXParser parser = factory.newSAXParser();

    // 3.从SAXPsrser中得到一个XMLReader实例

    XMLReader reader = parser.getXMLReader();

    // 4.得到内容处理器

    SaxHandler saxHandler = new SaxHandler();

    // 5.把自己写的handler注册到XMLReader中,一般最重要的就是ContentHandler

    reader.setContentHandler(saxHandler);

    // 6.将一个xml文档或者资源变成一个java可以处理的InputStream流后,解析正式开始

    reader.parse(newInputSource(new FileInputStream("src/com/andieguo/saxparserdemo/books.xml")));

    使用SAXParser解析

    // 1.创建解析工厂

    SAXParserFactory saxParserFactory =SAXParserFactory.newInstance();// 获取单例

    // 2.得到解析器

    SAXParser saxParser = saxParserFactory.newSAXParser();

    // 3.得到内容处理器

    SaxHandler saxHandler = new SaxHandler();

    // 4.解析器绑定内容处理器,并解析xml文件

    saxParser.parse(new File("src/com/andieguo/saxparserdemo/books.xml"),saxHandler);

  • 相关阅读:
    Azure HPC Pack Cluster添加辅助节点
    Azure HPC Pack 辅助节点模板配置
    Azure HPC Pack配置管理系列(PART6)
    Windows HPC Pack 2012 R2配置
    Azure HPC Pack 节点提升成域控制器
    Azure HPC Pack VM 节点创建和配置
    Azure HPC Pack 部署必要条件准备
    Azure HPC Pack 基础拓扑概述
    Azure VM 性能计数器配置
    Maven私仓配置
  • 原文地址:https://www.cnblogs.com/ouyanxia/p/6410251.html
Copyright © 2011-2022 走看看