zoukankan      html  css  js  c++  java
  • 大文件读写

        import java.io.BufferedInputStream;  
        import java.io.BufferedReader;  
        import java.io.File;  
        import java.io.FileInputStream;  
        import java.io.FileWriter;  
        import java.io.IOException;  
        import java.io.InputStreamReader;  
          
        public class FileSplit  
        {     
            public static void main(String[] args) throws IOException  
            {  
                long timer = System.currentTimeMillis();  
                int bufferSize = 20 * 1024 * 1024;//设读取文件的缓存为20MB  
                  
                //建立缓冲文本输入流  
                File file = new File("/media/Data/毕业设计/kdd cup/数据/userid_profile.txt");  
                FileInputStream fileInputStream = new FileInputStream(file);  
                BufferedInputStream bufferedInputStream = new BufferedInputStream(fileInputStream);  
                InputStreamReader inputStreamReader = new InputStreamReader(bufferedInputStream);  
                BufferedReader input = new BufferedReader(inputStreamReader, bufferSize);  
                  
                int splitNum = 112-1;//要分割的块数减一  
                int fileLines = 23669283;//输入文件的行数  
                long perSplitLines = fileLines / splitNum;//每个块的行数  
                for (int i = 0; i <= splitNum; ++i)  
                {  
                    //分割  
                    //每个块建立一个输出  
                    FileWriter output = new FileWriter("/home/haoqiong/part" + i + ".txt");  
                    String line = null;  
                    //逐行读取,逐行输出  
                    for (long lineCounter = 0; lineCounter < perSplitLines && (line = input.readLine()) != null; ++lineCounter)  
                    {  
                        output.append(line + "
    ");  
                    }  
                    output.flush();  
                    output.close();  
                    output = null;  
                }  
                input.close();  
                timer = System.currentTimeMillis() - timer;  
                System.out.println("处理时间:" + timer);  
            }  
        }  

    方法2

    void largeFileIO(String inputFile, String outputFile) {
            try {
                BufferedInputStream bis = new BufferedInputStream(new FileInputStream(new File(inputFile)));
                BufferedReader in = new BufferedReader(new InputStreamReader(bis, "utf-8"), 10 * 1024 * 1024);//10M缓存
                FileWriter fw = new FileWriter(outputFile);
                while (in.ready()) {
                    String line = in.readLine();
                    fw.append(line + " ");
                }
                in.close();
                fw.flush();
                fw.close();
            } catch (IOException ex) {
                ex.printStackTrace();
            }

    //NIO 读写

    package com.netty.demo1.vera.demo;
    
    import java.io.File;
    import java.io.FileInputStream;
    import java.io.FileNotFoundException;
    import java.io.IOException;
    import java.io.RandomAccessFile;
    import java.io.UnsupportedEncodingException;
    import java.nio.ByteBuffer;
    import java.nio.channels.FileChannel;
    import java.nio.charset.Charset;
    import java.util.ArrayList;
    import java.util.Date;
    import java.util.List;
    
    interface Callback {
        void action(String line);
    }
    /**
     * Hello world!
     *
     */
    public class NIOReadline 
    {
        public static void main(String[] args) throws IOException {
             int bufSize = 1000000;//一次读取的字节长度  
                File fin = new File("C:\Devs\abc.txt");//读取的文件  
                File fout = new File("C:\Devs\out.txt");//写出的文件  
                Date startDate = new Date();  
                FileChannel fcin = new RandomAccessFile(fin, "r").getChannel();  
                ByteBuffer rBuffer = ByteBuffer.allocate(bufSize);  
          
                FileChannel fcout = new RandomAccessFile(fout, "rws").getChannel();  
                ByteBuffer wBuffer = ByteBuffer.allocateDirect(bufSize);  
          
                readFileByLine(bufSize, fcin, rBuffer, fcout, wBuffer);  
                Date endDate = new Date();  
                  
                System.out.print(startDate+"|"+endDate);//测试执行时间  
                if(fcin.isOpen()){  
                    fcin.close();  
                }  
                if(fcout.isOpen()){  
                    fcout.close();  
                }  
        }
        
        public static void readFileByLine(int bufSize, FileChannel fcin,  
                ByteBuffer rBuffer, FileChannel fcout, ByteBuffer wBuffer) {  
            String enter = "
    ";  
            List<String> dataList = new ArrayList<String>();//存储读取的每行数据  
            byte[] lineByte = new byte[0];  
              
            String encode = "GBK";  
    //      String encode = "UTF-8";  
            try {  
                //temp:由于是按固定字节读取,在一次读取中,第一行和最后一行经常是不完整的行,因此定义此变量来存储上次的最后一行和这次的第一行的内容,  
                //并将之连接成完成的一行,否则会出现汉字被拆分成2个字节,并被提前转换成字符串而乱码的问题  
                byte[] temp = new byte[0];  
                while (fcin.read(rBuffer) != -1) {//fcin.read(rBuffer):从文件管道读取内容到缓冲区(rBuffer)  
                    int rSize = rBuffer.position();//读取结束后的位置,相当于读取的长度  
                    byte[] bs = new byte[rSize];//用来存放读取的内容的数组  
                    rBuffer.rewind();//将position设回0,所以你可以重读Buffer中的所有数据,此处如果不设置,无法使用下面的get方法  
                    rBuffer.get(bs);//相当于rBuffer.get(bs,0,bs.length()):从position初始位置开始相对读,读bs.length个byte,并写入bs[0]到bs[bs.length-1]的区域  
                    rBuffer.clear();  
                      
                    int startNum = 0;  
                    int LF = 10;//换行符  
                    int CR = 13;//回车符  
                    boolean hasLF = false;//是否有换行符  
                    for(int i = 0; i < rSize; i++){  
                        if(bs[i] == LF){  
                            hasLF = true;  
                            int tempNum = temp.length;  
                            int lineNum = i - startNum;  
                            lineByte = new byte[tempNum + lineNum];//数组大小已经去掉换行符  
                              
                            System.arraycopy(temp, 0, lineByte, 0, tempNum);//填充了lineByte[0]~lineByte[tempNum-1]  
                            temp = new byte[0];  
                            System.arraycopy(bs, startNum, lineByte, tempNum, lineNum);//填充lineByte[tempNum]~lineByte[tempNum+lineNum-1]  
                              
                            String line = new String(lineByte, 0, lineByte.length, encode);//一行完整的字符串(过滤了换行和回车)  
                            dataList.add(line);  
    //                      System.out.println(line);  
                            writeFileByLine(fcout, wBuffer, line + enter);  
                              
                            //过滤回车符和换行符  
                            if(i + 1 < rSize && bs[i + 1] == CR){  
                                startNum = i + 2;  
                            }else{  
                                startNum = i + 1;  
                            }  
                              
                        }  
                    }  
                    if(hasLF){  
                        temp = new byte[bs.length - startNum];  
                        System.arraycopy(bs, startNum, temp, 0, temp.length);  
                    }else{//兼容单次读取的内容不足一行的情况  
                        byte[] toTemp = new byte[temp.length + bs.length];  
                        System.arraycopy(temp, 0, toTemp, 0, temp.length);  
                        System.arraycopy(bs, 0, toTemp, temp.length, bs.length);  
                        temp = toTemp;  
                    }  
                }  
                if(temp != null && temp.length > 0){//兼容文件最后一行没有换行的情况  
                    String line = new String(temp, 0, temp.length, encode);  
                    dataList.add(line);  
    //              System.out.println(line);  
                    writeFileByLine(fcout, wBuffer, line + enter);  
                }  
            } catch (IOException e) {  
                e.printStackTrace();  
            }   
        }  
      
        /** 
         * 写到文件上 
         * @param fcout 
         * @param wBuffer 
         * @param line 
         */  
        @SuppressWarnings("static-access")  
        public static void writeFileByLine(FileChannel fcout, ByteBuffer wBuffer,  
                String line) {  
            try {  
                fcout.write(wBuffer.wrap(line.getBytes("UTF-8")), fcout.size());  
            } catch (IOException e) {  
                e.printStackTrace();  
            }  
        }  
    }
  • 相关阅读:
    MessageDigest简介
    深入入门正则表达式(java)
    JAVA 正则 Pattern 和 Matcher
    理解Servlet过滤器 (javax.servlet.Filter)
    Java 之 I/O 系列 01 ——基础
    finally块中的代码一定会执行吗?
    wait(), notify(),sleep详解
    Java 多线程——基础知识
    集合迭代时对集合进行修改抛ConcurrentModificationException 原因 以及解决方案
    深入理解ServletRequest与ServletResponse
  • 原文地址:https://www.cnblogs.com/MarchThree/p/5890784.html
Copyright © 2011-2022 走看看