zoukankan      html  css  js  c++  java
  • Java快速读取大文件

    Java快速读取大文件

    最近公司服务器监控系统需要做一个东西来分析Java应用程序的日志。

    第一步探索:

        首先我想到的是使用RandomAccessFile,因为他可以很方便的去获取和设置文件指针,下面是我的代码。

    package cn.mucang.exception.analyzer;
    
    import cn.mucang.exception.analyzer.analyze.LogAnalyzer;
    import cn.mucang.exception.analyzer.config.AnalyseConfig;
    import cn.mucang.exception.analyzer.support.DefaultLogLineBuilder;
    import org.slf4j.Logger;
    import org.slf4j.LoggerFactory;
    
    import java.io.BufferedReader;
    import java.io.File;
    import java.io.FileInputStream;
    import java.io.IOException;
    import java.io.InputStreamReader;
    import java.io.RandomAccessFile;
    
    /**
     * @author Gao Youbo
     * @since 2015/3/16.
     */
    public class LogUtils {
    
        private static final Logger LOG = LoggerFactory.getLogger(LogUtils.class);
    
        /**
         * 分析日志
         *
         * @param analyzer 分析器
         * @throws IOException
         */
        public static void analyse(LogAnalyzer analyzer) throws IOException {
            AnalyseConfig analyseConfig = analyzer.getAnalyseConfig();
            File file = new File(analyseConfig.getPath());
            LOG.info("开始分析日志文件...{}", file.getAbsolutePath());
            if (!file.exists()) {
                throw new IOException("日志文件不存在:" + analyseConfig);
            }
            if (analyseConfig.getFilePointer() < 0) {
                analyseConfig.setFilePointer(0);
            }
    
            FileInputStream stream = new FileInputStream(file);
            InputStreamReader reader = new InputStreamReader(stream);
            BufferedReader bufferedReader = new BufferedReader(reader);
            try (RandomAccessFile logFile = new RandomAccessFile(file, "r")) {
                long length = logFile.length();
                analyzer.getAnalyseConfig().setFileLenght(length); //设置文件字节长度
                if (analyseConfig.getFilePointer() > length) {
                    throw new IllegalArgumentException("开始指针位置越界");
                } else {
                    logFile.seek(analyseConfig.getFilePointer());
                }
                String line; //行数据
                int lineNumber = analyseConfig.getLineNumber(); //行号
                DefaultLogLineBuilder lb = null;
                long start = System.currentTimeMillis();
                while ((line = logFile.readLine()) != null) {
                    bufferedReader.readLine();
                    lineNumber++;
                    long filePointer = logFile.getFilePointer();
                    if (ParseUtils.isNewLine(lineNumber, line)) {
                        if (lb != null) {
                            analyzer.analyse(lb.getLogLine());
                        }
                        lb = new DefaultLogLineBuilder();
                    }
                    if (lb != null) {
                        lb.append(lineNumber, filePointer, line);
                        if (length == logFile.getFilePointer()) { //文档读取完了,调用一下分析
                            analyzer.analyse(lb.getLogLine());
                        }
                    }
                    if (lineNumber % 10000 == 0) {
                        long end = System.currentTimeMillis();
                        System.out.println(String.format("line=%s, used=%sms", lineNumber, end - start));
                        start = System.currentTimeMillis();
                    }
                }
            }
        }
    
    }

    下面看一下性能,分析一万行日志平均需要1500毫秒,因为我的日志分析使用到了正则,开始速度慢我以为是大量的正则运算造成的。

    第二部探索:

    我自己写了一个LogReader,自己控制指针位置。下面看一下代码:

    package cn.mucang.exception.analyzer;
    
    import java.io.BufferedReader;
    import java.io.Closeable;
    import java.io.File;
    import java.io.FileInputStream;
    import java.io.FileNotFoundException;
    import java.io.IOException;
    import java.io.InputStreamReader;
    
    /**
     * @author Gao Youbo
     * @since 2015-03-25 09:02
     */
    public class LogReader implements Closeable {
        /**
         * 文件大小
         */
        private long length;
        /**
         * 文件指针位置
         */
        private long filePointer;
        private FileInputStream inputStream;
        private InputStreamReader inputStreamReader;
        private BufferedReader bufferedReader;
    
        public LogReader(File logFile) throws FileNotFoundException {
            this.inputStream = new FileInputStream(logFile);
            this.inputStreamReader = new InputStreamReader(inputStream);
            this.bufferedReader = new BufferedReader(inputStreamReader);
            this.length = logFile.length();
        }
    
        public int read() throws IOException {
            filePointer++;
            return bufferedReader.read();
        }
    
        public String readLine() throws IOException {
            StringBuffer input = new StringBuffer();
            int c = -1;
            boolean eol = false; //end of line
            while (!eol) {
                switch (c = read()) {
                    case -1:
                    case '
    ':
                        eol = true;
                        break;
                    case '
    ':
                        eol = true;
                        long cur = getFilePointer();
                        if ((read()) != '
    ') {
                            skip(cur);
                        }
                    default:
                        input.append((char) c);
                        break;
                }
            }
            if ((c == -1) && (input.length() == 0)) {
                return null;
            }
            return input.toString();
        }
    
        /**
         * 获取当前读取到的指针
         *
         * @return
         * @throws IOException
         */
        public long getFilePointer() throws IOException {
            return filePointer;
        }
    
        /**
         * 从当前位置跳过n个char
         *
         * @param n
         * @return 实际跳过多少个char
         * @throws IOException
         */
        public long skip(long n) throws IOException {
            return inputStreamReader.skip(n);
        }
    
        /**
         * 返回日志文件的大小
         *
         * @return
         */
        public long length() {
            return length;
        }
    
        @Override
        public void close() throws IOException {
            if (bufferedReader != null) {
                bufferedReader.close();
            }
            if (inputStreamReader != null) {
                inputStreamReader.close();
            }
            if (inputStream != null) {
                inputStream.close();
            }
        }
    }
    
    package cn.mucang.exception.analyzer;
    
    import cn.mucang.exception.analyzer.analyze.LogAnalyzer;
    import cn.mucang.exception.analyzer.config.AnalyseConfig;
    import cn.mucang.exception.analyzer.support.DefaultLogLineBuilder;
    import org.slf4j.Logger;
    import org.slf4j.LoggerFactory;
    
    import java.io.File;
    import java.io.IOException;
    
    /**
     * @author Gao Youbo
     * @since 2015/3/16.
     */
    public class LogUtils {
    
        private static final Logger LOG = LoggerFactory.getLogger(LogUtils.class);
    
    
        /**
         * 分析日志
         *
         * @param analyzer 分析器
         * @throws java.io.IOException
         */
        public static void analyse(LogAnalyzer analyzer) throws IOException {
            AnalyseConfig analyseConfig = analyzer.getAnalyseConfig();
            File file = new File(analyseConfig.getPath());
            System.out.println(file.getAbsolutePath());
            LOG.info("开始分析日志文件...{}", file.getAbsolutePath());
            if (!file.exists()) {
                throw new IOException("日志文件不存在:" + analyseConfig);
            }
            if (analyseConfig.getFilePointer() < 0) {
                analyseConfig.setFilePointer(0);
            }
            try (LogReader logReader = new LogReader(file)) {
                long length = logReader.length();
                analyzer.getAnalyseConfig().setFileLenght(length); //设置文件字节长度
                if (analyseConfig.getFilePointer() > length) {
                    throw new IllegalArgumentException("开始指针位置越界");
                } else {
                    logReader.skip(analyseConfig.getFilePointer());
                }
                String line; //行数据
                int lineNumber = analyseConfig.getLineNumber(); //行号
                DefaultLogLineBuilder lb = null;
                long start = System.currentTimeMillis();
                while ((line = logReader.readLine()) != null) {
                    lineNumber++;
                    long filePointer = logReader.getFilePointer();
                    if (ParseUtils.isNewLine(lineNumber, line)) {
                        if (lb != null) {
                            analyzer.analyse(lb.getLogLine());
                        }
                        lb = new DefaultLogLineBuilder();
                    }
                    if (lb != null) {
                        lb.append(lineNumber, filePointer, line);
                        if (length == filePointer) { //文档读取完了,调用一下分析
                            analyzer.analyse(lb.getLogLine());
                        }
                    }
                    if (lineNumber % 10000 == 0) {
                        long end = System.currentTimeMillis();
                        System.out.println(String.format("line=%s, used=%s", lineNumber, end - start));
                        start = System.currentTimeMillis();
                    }
                }
            }
        }
    }
    接下来是测试的性能:

    日志解析速度提高了10倍。

  • 相关阅读:
    unsupported jsonb version number 123
    如何在MPlayer上支持RTSP
    TDengine 时序数据库的 ADO.Net Core 提供程序 Maikebing.EntityFrameworkCore.Taos
    如何使用IoTSharp对接ModBus?
    如何从源码启动和编译IoTSharp
    Asp.Net Core 自动适应Windows服务、Linux服务、手动启动时的内容路径的扩展方法
    MQTTnet 的Asp.Net Core 认证事件的扩展
    Asp.Net Core 中利用QuartzHostedService 实现 Quartz 注入依赖 (DI)
    The remote certificate is invalid according to the validation procedure 远程证书验证无效
    settings插拔式源码
  • 原文地址:https://www.cnblogs.com/firstdream/p/5585280.html
Copyright © 2011-2022 走看看