zoukankan      html  css  js  c++  java
  • 几种IO读文件性能对比

    文件大小19M,10万行左右的数据。堆内存设置如下:

    一,NIO,无内存溢出,耗时220毫秒左右。缺点是实现起来太复杂,要考虑汉字等。

    package myWeb.test.nio;
    
    import java.io.File;
    import java.io.FileInputStream;
    import java.io.IOException;
    import java.io.RandomAccessFile;
    import java.nio.ByteBuffer;
    import java.nio.channels.FileChannel;
    import java.util.ArrayList;
    import java.util.Arrays;
    import java.util.Date;
    import java.util.List;
    
    public class NIOReadFile1 {
           public static void main(String args[]) throws Exception {  
               long begin = System.currentTimeMillis();
                int bufSize = 30000;//一次读取的字节长度  
                File fin = new File("/Users/yp-tc-m-2777/Desktop/CCB_NET_B2C_JHBZ1012all.txt");//读取的文件  
               // File fout = new File("/Users/yp-tc-m-2777/Desktop/22.txt");//写出的文件  
                FileChannel fcin = new RandomAccessFile(fin, "r").getChannel();  
                ByteBuffer rBuffer = ByteBuffer.allocate(bufSize);  
          
              //  FileChannel fcout = new RandomAccessFile(fout, "rws").getChannel();  
                ByteBuffer wBuffer = ByteBuffer.allocateDirect(bufSize);  
          
                readFileByLine(bufSize, fcin, rBuffer, null, wBuffer);  
                long end = System.currentTimeMillis();
                System.out.print(end-begin);//测试执行时间  
                if(fcin.isOpen()){  
                    fcin.close();  
                }  
    //            if(fcout.isOpen()){  
    //                fcout.close();  
    //            }  
            }  
          
            public static void readFileByLine(int bufSize, FileChannel fcin,  
                    ByteBuffer rBuffer, FileChannel fcout, ByteBuffer wBuffer) {  
                String enter = "
    ";  
             //   List<String> dataList = new ArrayList<String>();//存储读取的每行数据  
                byte[] lineByte = new byte[0];  
                  
                String encode = "utf-8";  
    //          String encode = "UTF-8";  
                try {  
                    //temp:由于是按固定字节读取,在一次读取中,第一行和最后一行经常是不完整的行,因此定义此变量来存储上次的最后一行和这次的第一行的内容,  
                    //并将之连接成完成的一行,否则会出现汉字被拆分成2个字节,并被提前转换成字符串而乱码的问题  
                    byte[] temp = new byte[0];  
                    while (fcin.read(rBuffer) != -1) {//fcin.read(rBuffer):从文件管道读取内容到缓冲区(rBuffer)  
                        int rSize = rBuffer.position();//读取结束后的位置,相当于读取的长度  
                        byte[] bs = new byte[rSize];//用来存放读取的内容的数组  
                        rBuffer.rewind();//将position设回0,所以你可以重读Buffer中的所有数据,此处如果不设置,无法使用下面的get方法  
                        rBuffer.get(bs);//相当于rBuffer.get(bs,0,bs.length()):从position初始位置开始相对读,读bs.length个byte,并写入bs[0]到bs[bs.length-1]的区域  
                        rBuffer.clear();  
                          
                        int startNum = 0;  
                        int LF = 10;//换行符  
                        int CR = 13;//回车符  
                        boolean hasLF = false;//是否有换行符  
                        for(int i = 0; i < rSize; i++){  
                            if(bs[i] == LF){  
                                hasLF = true;  
                                int tempNum = temp.length;  
                                int lineNum = i - startNum;  
                                lineByte = new byte[tempNum + lineNum];//数组大小已经去掉换行符  
                                  
                                System.arraycopy(temp, 0, lineByte, 0, tempNum);//填充了lineByte[0]~lineByte[tempNum-1]  
                                temp = new byte[0];  
                                System.arraycopy(bs, startNum, lineByte, tempNum, lineNum);//填充lineByte[tempNum]~lineByte[tempNum+lineNum-1]  
                                  
                                String line = new String(lineByte, 0, lineByte.length, encode);//一行完整的字符串(过滤了换行和回车)  
                           //     dataList.add(line);  
        //                      System.out.println(line);  
    //                            writeFileByLine(fcout, wBuffer, line + enter);  
                                  
                                //过滤回车符和换行符  
                                if(i + 1 < rSize && bs[i + 1] == CR){  
                                    startNum = i + 2;  
                                }else{  
                                    startNum = i + 1;  
                                }  
                                  
                            }  
                        }  
                        if(hasLF){  
                            temp = new byte[bs.length - startNum];  
                            System.arraycopy(bs, startNum, temp, 0, temp.length);  
                        }else{//兼容单次读取的内容不足一行的情况  
                            byte[] toTemp = new byte[temp.length + bs.length];  
                            System.arraycopy(temp, 0, toTemp, 0, temp.length);  
                            System.arraycopy(bs, 0, toTemp, temp.length, bs.length);  
                            temp = toTemp;  
                        }  
                    }  
                    if(temp != null && temp.length > 0){//兼容文件最后一行没有换行的情况  
                        String line = new String(temp, 0, temp.length, encode);  
                 //       dataList.add(line);  
    //                  System.out.println(line);  
    //                    writeFileByLine(fcout, wBuffer, line + enter);  
                    } 
                 //   System.out.println(dataList.size());
                } catch (IOException e) {  
                    e.printStackTrace();  
                }   
            }  
          
            /** 
             * 写到文件上 
             * @param fcout 
             * @param wBuffer 
             * @param line 
             */  
            @SuppressWarnings("static-access")  
            private static void writeFileByLine(FileChannel fcout, ByteBuffer wBuffer,  
                    String line) {  
                try {  
                    fcout.write(wBuffer.wrap(line.getBytes("UTF-8")), fcout.size());  
                } catch (IOException e) {  
                    e.printStackTrace();  
                }  
            }  
    }

    二,BIO使用BUFFER,180毫秒左右竟然是最高效,代码最简洁的方式。

    package myWeb.test.nio;
    
    import java.io.BufferedInputStream;
    import java.io.BufferedOutputStream;
    import java.io.BufferedReader;
    import java.io.DataInputStream;
    import java.io.DataOutputStream;
    import java.io.File;
    import java.io.FileInputStream;
    import java.io.FileOutputStream;
    import java.io.IOException;
    import java.io.InputStreamReader;
    import java.util.ArrayList;
    import java.util.List;
    
    public class OfOldIO1 {
        public static void main(String[] args) throws IOException {
            long begin = System.currentTimeMillis();
            BufferedReader read = new BufferedReader(new InputStreamReader(new FileInputStream(new File("/Users/yp-tc-m-2777/Desktop/CCB_NET_B2C_JHBZ1012all.txt"))));
            String line = null;
        //     List<String> dataList = new ArrayList<String>();//存储读取的每行数据 
            while((line = read.readLine())!=null){
                //System.out.println(line);
            //    dataList.add(line);
            }
            if(read!=null){
                read.close();
            }
            long end = System.currentTimeMillis();
        //    System.out.println(dataList.size());
            System.out.print(end-begin);//测试执行时间 
        }
    }

     三,Scanner,惨不忍睹。

    package myWeb.test.nio;
    
    import java.io.FileInputStream;
    import java.io.FileNotFoundException;
    import java.io.IOException;
    import java.util.ArrayList;
    import java.util.List;
    import java.util.Scanner;
    
    public class ScannerReadFile {
        public static void main(String[] args) throws IOException {
            long begin = System.currentTimeMillis();
            FileInputStream inputStream = null;
            Scanner sc = null;
        //     List<String> dataList = new ArrayList<String>();//存储读取的每行数据 
            try {
                inputStream = new FileInputStream("/Users/yp-tc-m-2777/Desktop/CCB_NET_B2C_JHBZ1012all.txt");
                sc = new Scanner(inputStream, "UTF-8");
                while (sc.hasNextLine()) {
                    String line = sc.nextLine();
              //      dataList.add(line);
                    // System.out.println(line);
                }
                if (sc.ioException() != null) {
                    throw sc.ioException();
                }
            } finally {
                if (inputStream != null) {
                    inputStream.close();
                }
                if (sc != null) {
                    sc.close();
                }
            }
            long end = System.currentTimeMillis();
        //     System.out.println(dataList.size());
                System.out.print(end-begin);//测试执行时间  
        }
    }

    四,CommonIO,与NIO类似。

    package myWeb.test.nio;
    
    import java.io.File;
    import java.io.IOException;
    
    import org.apache.commons.io.FileUtils;
    import org.apache.commons.io.LineIterator;
    
    public class CommonIOReadFile {
        public static void main(String[] args) throws Exception {
            long begin = System.currentTimeMillis();
            LineIterator it = FileUtils.lineIterator(new File("/Users/yp-tc-m-2777/Desktop/CCB_NET_B2C_JHBZ1012all.txt"), "UTF-8");
            try {
                while (it.hasNext()) {
                    String line = it.nextLine();
                    // do something with line
                }
            } finally {
                LineIterator.closeQuietly(it);
            }
            long end = System.currentTimeMillis();
            //     System.out.println(dataList.size());
                    System.out.print(end-begin);//测试执行时间  
        }
    }

  • 相关阅读:
    高精度
    欢迎来到我的博客!
    1
    POJ 2774 求两个串的最长公共前缀 | 后缀数组
    ural1297 求最长回文子串 | 后缀数组
    洛谷 [SCOI2010]股票交易 | 单调性DP
    BZOJ 1096: [ZJOI2007]仓库建设 | 斜率优化DP
    洛谷 P2906 [USACO08OPEN]牛的街区Cow Neighborhoods | Set+并查集
    BZOJ 1010: [HNOI2008]玩具装箱toy | 单调队列优化DP
    BZOJ 1342: [Baltic2007]Sound静音问题 | 单调队列维护的好题
  • 原文地址:https://www.cnblogs.com/coolgame/p/9055129.html
Copyright © 2011-2022 走看看