zoukankan      html  css  js  c++  java
  • java读写文件,读超大文件

    一直在处理爬虫,经常能遇到读写文件的操作,很多时候都是读写超大文件,记录如下:
    
    一、读文件
    
        import java.io.BufferedOutputStream;
    
        import  java.io.BufferedReader;
    
        import java.io.File;
    
        import java.io.FileOutputStream;
    
       import java.io.FileReader;
    
       import java.io.FileWriter;
    
       import java.io.IOException;
    
       import java.io.OutputStreamWriter;
    
         public void ReadFile(String filename) {
    
              File file=new File(filename);
    
             BufferedReader reader=null;
    
          try{
    
                   reader=new BufferedReader(new FileReader(file));   //如果是读大文件  则  new BufferedReader(new FileReader(file),5*1024*1024);  即,设置缓存
    
                   String tempString=null;
    
                   while((tempString=reader.readLine())!=null)
    
                   {
    
                        //进行操作.....
    
                  }
    
                reader.close();
    
           }catch(IOException e){
    
                 e.printStackTrace();
    
          }finally{
    
               if(reader!=null)
    
             {
    
                   try{
    
                          reader.close();
    
                   }catch(IOException e)
    
                 {
    
                 e.printStackTrace();
    
                }
    
              }
    
           }
    
    }
    
    二、写文件
    
        import java.io.BufferedOutputStream;
    
        import  java.io.BufferedReader;
    
       import  java.io.BufferedWriter;
    
        import java.io.File;
    
        import java.io.FileOutputStream;
    
       import java.io.FileReader;
    
       import java.io.FileWriter;
    
       import java.io.IOException;
    
       import java.io.OutputStreamWriter;
    
     
    
        public void method1(String file,String content){
    
         BufferedWriter out=null;
    
         try{
    
               out=new BufferedWriter(new OutputStreamWriter(new FileOutputStream(file,true)));  //追加的方法
    
              out.write(content);
    
              out.write("
    ");
    
       }catch(IOException e){
    
                 e.printStackTrace();
    
          }finally{
    
         try{
    
        out.close();
    
      }catch(IOException e){
    
                 e.printStackTrace();
    
          }
    
    }
    
    }
    
    public void main(String[] args)
    
    {
    
       String filename="D:a.txt";
    
      File f=new File(filename);
    
     if(f.exists()){
    
        f.delete();
    
    }
    
    f.createNewFile();
    
    String ss="sssssss";
    
    method1(filename,ss);
    
    }
    
    ===========================华丽丽的分隔符=============================
    
    在实践中发现上面的方法最多也就能处理10M以内的数据,从而编辑此文章。
    
    想要真正处理上百M,甚至上G的文件,那就要用到java的nio包:
    
    下面的代码转自【http://www.oschina.net/code/snippet_54100_7938】
    
    import java.io.BufferedReader; 
    import java.io.File; 
    import java.io.FileInputStream; 
    import java.io.IOException; 
    import java.io.RandomAccessFile; 
    import java.nio.ByteBuffer; 
    import java.nio.channels.FileChannel; 
    
    
    public class TestNio { 
    
    public static void main(String args[]) throws Exception{ 
    int bufSize = 100; 
    File fin = new File("D:\workspace\test\usagetracking.log"); 
    File fout = new File("D:\workspace\test\usagetracking2.log"); 
    
    
    FileChannel fcin = new RandomAccessFile(fin, "r").getChannel(); 
    ByteBuffer rBuffer = ByteBuffer.allocate(bufSize); 
    
    
    FileChannel fcout = new RandomAccessFile(fout, "rws").getChannel(); 
    ByteBuffer wBuffer = ByteBuffer.allocateDirect(bufSize); 
    
    
    
    
    readFileByLine(bufSize, fcin, rBuffer, fcout, wBuffer); 
    
    
    System.out.print("OK!!!"); 
    } 
    
    /*读文件同时写文件*/
    public static void readFileByLine(int bufSize, FileChannel fcin, ByteBuffer rBuffer, FileChannel fcout, ByteBuffer wBuffer){ 
    String enterStr = "
    "; 
    try{ 
    byte[] bs = new byte[bufSize]; 
    
    
    int size = 0; 
    StringBuffer strBuf = new StringBuffer(""); 
    //while((size = fcin.read(buffer)) != -1){ 
    while(fcin.read(rBuffer) != -1){ 
         int rSize = rBuffer.position(); 
         rBuffer.rewind(); 
         rBuffer.get(bs); 
         rBuffer.clear(); 
         String tempString = new String(bs, 0, rSize); 
         //System.out.print(tempString); 
         //System.out.print("<200>"); 
    
    
         int fromIndex = 0; 
         int endIndex = 0; 
         while((endIndex = tempString.indexOf(enterStr, fromIndex)) != -1){ 
          String line = tempString.substring(fromIndex, endIndex); 
          line = new String(strBuf.toString() + line); 
          //System.out.print(line); 
          //System.out.print("</over/>"); 
          //write to anthone file 
          writeFileByLine(fcout, wBuffer, line); 
    
    
          
          strBuf.delete(0, strBuf.length()); 
          fromIndex = endIndex + 1; 
         } 
         if(rSize > tempString.length()){ 
         strBuf.append(tempString.substring(fromIndex, tempString.length())); 
         }else{ 
         strBuf.append(tempString.substring(fromIndex, rSize)); 
         } 
    } 
    } catch (IOException e) { 
    // TODO Auto-generated catch block 
    e.printStackTrace(); 
    } 
    } 
    
    /*写文件*/
    public static void writeFileByLine(FileChannel fcout, ByteBuffer wBuffer, String line){ 
    try { 
    //write on file head 
    //fcout.write(wBuffer.wrap(line.getBytes())); 
    //wirte append file on foot 
    fcout.write(wBuffer.wrap(line.getBytes()), fcout.size()); 
    
    
    } catch (IOException e) { 
    // TODO Auto-generated catch block 
    e.printStackTrace(); 
    } 
    } 
    
    
    }
    
     
  • 相关阅读:
    C#基于Socket的简单聊天室实践
    进程及进程间通信
    一次千万级别的SQL查询简单优化体验
    Attribute和Property
    解析Resources.arsc
    数据挖掘十大算法之决策树详解(2)
    数据挖掘十大算法之决策树详解(1)
    线性回归浅谈(Linear Regression)
    贝叶斯推断及其互联网应用(三):拼写检查
    贝叶斯推断及其互联网应用(二):过滤垃圾邮件
  • 原文地址:https://www.cnblogs.com/wh-king/p/4324699.html
Copyright © 2011-2022 走看看