zoukankan      html  css  js  c++  java
  • DOC解析xml

    虽然DOC方式解析xml代码比较简洁,适合解析小的文件,但是大文件,还是建议使用SAX进行解析,解析速度不只是快10倍那么简单,简直是快百倍不止。

    text.xml文件

    <?xml version="1.0" standalone="yes"?>
    <RECORDS>
    <RECORD>
    <接收时间>2017/2/6 11:19:49</接收时间>
    <来源地址>19.128.116.99</来源地址>
    <源> 460c5401 Security@FLOW: UDP flood attack:丢弃!trust::ethernet0/8 19.129.142.104</源>
    <目地>180.154.15.198</目地>
    </RECORD>
    <RECORD>
    <接收时间>2017/2/6 11:19:49</接收时间>
    <来源地址>19.128.116.99</来源地址>
    <源> 460c5403 Security@FLOW: UDP flood attack:丢弃!trust::ethernet0/8 19.130.4.8</源>
    <目地>19.16.25.120。发生了168次(在前43秒内)。</目地>
    </RECORD>

    <RECORDS>

    1.DOC方式解析

    package cn.mym.sysi.logread;


    import java.io.File;
    import java.io.Serializable;
    import java.text.SimpleDateFormat;
    import java.util.ArrayList;
    import java.util.Date;
    import java.util.HashMap;
    import java.util.List;
    import java.util.Map;

    import javax.xml.parsers.DocumentBuilder;
    import javax.xml.parsers.DocumentBuilderFactory;

    import org.w3c.dom.Document;
    import org.w3c.dom.NodeList;

    public class XML_LOG_READ_IN implements Serializable {

    /**
    * XML内网日志解析
    */
    private static final long serialVersionUID = -2653100457283598999L;
    public static List<Map<String, String>> xmlToString(File file) {
    List<Map<String, String>> list = new ArrayList<Map<String, String>>();
    try {

    DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance();
    DocumentBuilder docBuilder = dbFactory.newDocumentBuilder();
    Document doc = docBuilder.parse(file);
    NodeList nodeList = doc.getElementsByTagName("RECORD");
    for(int i=0; i< nodeList.getLength(); i++){
    Map<String, String> map = new HashMap<String, String>();
    String receiveTime = doc.getElementsByTagName("接收时间").item(i).getFirstChild().getNodeValue();
    String time = receiveTime.replace("/", "-");
    SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
    Date date = sdf.parse(time);
    map.put("time", sdf.format(date));
    String Ip = doc.getElementsByTagName("来源地址").item(i).getFirstChild().getNodeValue();
    map.put("ip", Ip);
    String source = doc.getElementsByTagName("源").item(i).getFirstChild().getNodeValue();
    // mac地址
    String mac = source.substring(0, 9).trim();
    map.put("mac", mac);
    // 攻击类型
    String[] str = source.split("Security@FLOW:")[1].split(":");
    String attackType = str[0].trim();
    map.put("attackType", attackType);
    // 交换机端口
    String[] str2 = source.split("trust::")[1].split(" ");
    String ethernet = str2[0].trim();
    map.put("ethernet", ethernet);
    // 源IP
    String[] str3 = str2[1].trim().split(" ");
    String sourceIp = str3[0].trim();
    map.put("sourceIp", sourceIp);
    String target = doc.getElementsByTagName("目地").item(i).getFirstChild().getNodeValue();
    String str4[];
    String s1;
    String targetIp;
    String attackNum = null;
    if(target.contains("‚")){
    str4 = target.trim().split("‚");
    s1 = str4[0].trim();
    targetIp = s1.substring(0, s1.length()-2);
    String[] s2 =str4[1].split("†")[1].split("¡");
    attackNum = s2[0].substring(0, s2[0].length()-2);
    }else{
    s1 = target.trim();
    targetIp = s1;
    }
    map.put("targetIp", targetIp);
    map.put("attackNum", attackNum);
    list.add(map);
    }
    System.out.println("xml文件解析完成");
    }catch(Exception e) {
    e.printStackTrace();
    }
    return list;
    }
    public static void main(String[] args) {
    List<Map<String, String>> list = new ArrayList<Map<String,String>>();
    File file = new File("D:\text.xml");
    long beginTime = System.currentTimeMillis();
    list = XML_LOG_READ_IN.xmlToString(file);
    long endTime = System.currentTimeMillis();
    System.out.println("解析完成,时间:"+(endTime-beginTime)/100+"秒");
    }
    }

    2.SAX方式解析xml

    public class InnerXmlSAXToString extends DefaultHandler {
    List<Map<String, String>> list = new ArrayList<Map<String, String>>();
    Map<String, String> map = new HashMap<String, String>();
    private String preTag = null;//作用是记录解析时的上一个节点名称
    public static void main(String[] args) {
    int size = 0;
    long beginTime = System.currentTimeMillis();
    try {
    File file = new File("D:\text.xml");
    InputSource inputSource = new InputSource(new FileInputStream(file));
    inputSource.setEncoding("UTF-8");
    InnerXmlSAXToString reader = new InnerXmlSAXToString();
    List<Map<String, String>> list = reader.getList(inputSource);
    size = list.size();
    if(list !=null&& list.size()>0){
    for(Map<String, String> obj:list){
    System.out.println(obj.get("time")+","+obj.get("ip")+","+obj.get("mac"));
    }
    }
    } catch (Exception e) {
    e.printStackTrace();
    }
    long endTime = System.currentTimeMillis();
    long time = endTime-beginTime;
    System.out.println("时间:"+time/1000+"秒"+size);
    }
    public List<Map<String, String>> getList(InputSource inputSource) throws Exception{
    SAXParserFactory sf = SAXParserFactory.newInstance();
    SAXParser sp = sf.newSAXParser();
    InnerXmlSAXToString reader = new InnerXmlSAXToString();
    sp.parse(inputSource, reader);

    return reader.getList();
    }

    public List<Map<String, String>> getList(){
    return list;
    }
    @Override
    public void startDocument() throws SAXException {
    list = new ArrayList<Map<String, String>>();
    }
    @Override
    public void characters(char ch[], int start, int length) throws SAXException {
    if(preTag!=null){
    try {
    if (preTag.equals("接收时间")) {
    String receiveTime = new String(ch, start, length);
    //System.out.print(receiveTime);
    String time = receiveTime.replace("/", "-");
    SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
    Date date = sdf.parse(time);
    //System.out.println("接收时间"+sdf.format(date));
    map.put("time", sdf.format(date));
    //list.add(map);
    }
    if (preTag.equals("来源地址")) {
    String Ip = new String(ch, start, length);
    //System.out.println("来源地址:" + Ip);
    map.put("ip", Ip);
    //list.add(map);
    }
    if (preTag.equals("源")) {
    String source = new String(ch, start, length);
    // mac地址
    String mac = source.substring(0, 9).trim();
    map.put("mac", mac);

    // 攻击类型
    String[] str = source.split("Security@FLOW:")[1].split(":");
    String attackType = str[0].trim();
    map.put("attackType", attackType);
    // 交换机端口
    String[] str2 = source.split("trust::")[1].split(" ");
    String ethernet = str2[0].trim();
    map.put("ethernet", ethernet);
    // 源IP
    String[] str3 = str2[1].trim().split(" ");
    String sourceIp = str3[0].trim();
    map.put("sourceIp", sourceIp);
    //list.add(map);
    }
    if (preTag.equals("目地")) {
    String target = new String(ch, start, length);
    String str4[];
    String s1;
    String targetIp;
    String attackNum = null;
    if(target.contains("‚")){
    str4 = target.trim().split("‚");
    s1 = str4[0].trim();
    targetIp = s1.substring(0, s1.length()-2);
    String[] s2 =str4[1].split("†")[1].split("¡");
    attackNum = s2[0].substring(0, s2[0].length()-2);
    }else{
    s1 = target.trim();
    targetIp = s1;
    }
    map.put("targetIp", targetIp);
    map.put("attackNum", attackNum);
    //list.add(map);
    }
    } catch (Exception e) {
    e.printStackTrace();
    }
    }
    }
    @Override
    public void startElement(String uri,String localName,String qName,Attributes attrs) {
    //tags.push(qName);
    if("RECORD".equals(qName)){
    map = new HashMap<String, String>();
    }
    preTag = qName;
    }
    @Override
    public void endElement(String uri, String localName, String qName)
    throws SAXException {
    if("RECORD".equals(qName)){
    list.add(map);
    // System.out.println(qName+list.size()+"------map:"+map.toString());
    }
    /*map.clear();*/
    preTag = null;/**当解析结束时置为空。这里很重要,例如,当图中画3的位置结束后,会调用这个方法
    ,如果这里不把preTag置为null,根据startElement(....)方法,preTag的值还是book,当文档顺序读到图
    中标记4的位置时,会执行characters(char[] ch, int start, int length)这个方法,而characters(....)方
    法判断preTag!=null,会执行if判断的代码,这样就会把空值赋值给book,这不是我们想要的。*/
    }
    public static String parserXml(File fileName) {

    try {
    DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance();
    DocumentBuilder docBuilder = dbFactory.newDocumentBuilder();
    Document doc = docBuilder.parse(fileName);
    NodeList nodeList = doc.getElementsByTagName("RECORD");
    //sax解析xml
    SAXParserFactory sf = SAXParserFactory.newInstance();
    SAXParser sp = sf.newSAXParser();
    InnerXmlSAXToString reader = new InnerXmlSAXToString();
    sp.parse(new InputSource("D:\text.xml"), reader);

    for(int i=0; i< nodeList.getLength(); i++){
    String receiveTime = doc.getElementsByTagName("接收时间").item(i).getFirstChild().getNodeValue();
    String time = receiveTime.replace("/", "-");
    SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
    Date date = sdf.parse(time);
    String Ip = doc.getElementsByTagName("来源地址").item(i).getFirstChild().getNodeValue();
    String source = doc.getElementsByTagName("源").item(i).getFirstChild().getNodeValue();
    // mac地址
    String mac = source.substring(0, 9).trim();
    // 攻击类型
    String[] str = source.split("Security@FLOW:")[1].split(":");
    String attackType = str[0].trim();
    // 交换机端口
    String[] str2 = source.split("trust::")[1].split(" ");
    String ethernet = str2[0].trim();
    // 源IP
    String[] str3 = str2[1].trim().split(" ");
    String sourceIp = str3[0].trim();
    String target = doc.getElementsByTagName("目地").item(i).getFirstChild().getNodeValue();
    String str4[];
    String s1;
    String targetIp;
    String attackNum = null;
    if(target.contains("‚")){
    str4 = target.trim().split("‚");
    s1 = str4[0].trim();
    targetIp = s1.substring(0, s1.length()-2);
    String[] s2 =str4[1].split("†")[1].split("¡");
    attackNum = s2[0].substring(0, s2[0].length()-2);
    }else{
    s1 = target.trim();
    targetIp = s1;
    }
    }
    } catch (Exception e) {
    e.printStackTrace();
    }
    return null;

    }
    }

    为了能保证编码问题。我选择使用InputSourcce,方便对编码进行设置。

    使用XMLReader解析

    // 1.新建一个工厂类SAXParserFactory

    SAXParserFactory factory =SAXParserFactory.newInstance();

    // 2.让工厂类产生一个SAX的解析类SAXParser

    SAXParser parser = factory.newSAXParser();

    // 3.从SAXPsrser中得到一个XMLReader实例

    XMLReader reader = parser.getXMLReader();

    // 4.得到内容处理器

    SaxHandler saxHandler = new SaxHandler();

    // 5.把自己写的handler注册到XMLReader中,一般最重要的就是ContentHandler

    reader.setContentHandler(saxHandler);

    // 6.将一个xml文档或者资源变成一个java可以处理的InputStream流后,解析正式开始

    reader.parse(newInputSource(new FileInputStream("src/com/andieguo/saxparserdemo/books.xml")));

    使用SAXParser解析

    // 1.创建解析工厂

    SAXParserFactory saxParserFactory =SAXParserFactory.newInstance();// 获取单例

    // 2.得到解析器

    SAXParser saxParser = saxParserFactory.newSAXParser();

    // 3.得到内容处理器

    SaxHandler saxHandler = new SaxHandler();

    // 4.解析器绑定内容处理器,并解析xml文件

    saxParser.parse(new File("src/com/andieguo/saxparserdemo/books.xml"),saxHandler);

  • 相关阅读:
    获取当前时区时间
    python lambda表达式详解
    Odoo 12开发之开发环境准备
    初步了解odoo12
    web前端面试题
    实现一个优先级队列
    面试题
    python读取和生成excel文件
    Django基础
    virtualenv
  • 原文地址:https://www.cnblogs.com/ouyanxia/p/6410251.html
Copyright © 2011-2022 走看看