zoukankan      html  css  js  c++  java
  • RandomAccessFile读取大文件时效率很低,现进行改进---BufferedRandomAccessFile

      /**
       * Licensed to the Apache Software Foundation (ASF) under one
       * or more contributor license agreements.  See the NOTICE file
       * distributed with this work for additional information
       * regarding copyright ownership.  The ASF licenses this file
       * to you under the Apache License, Version 2.0 (the
       * "License"); you may not use this file except in compliance
       * with the License.  You may obtain a copy of the License at
       *
       *     http://www.apache.org/licenses/LICENSE-2.0
       *
       * Unless required by applicable law or agreed to in writing, software
       * distributed under the License is distributed on an "AS IS" BASIS,
       * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
       * See the License for the specific language governing permissions and
       * limitations under the License.
       */
      
      import java.io.File;
      import java.io.FileNotFoundException;
      import java.io.IOException;
      import java.io.RandomAccessFile;
      import java.util.Arrays;
      
      /**
       * A <code>BufferedRandomAccessFile</code> is like a
       * <code>RandomAccessFile</code>, but it uses a private buffer so that most
       * operations do not require a disk access.
       * <P>
       * 
       * Note: The operations on this class are unmonitored. Also, the correct
       * functioning of the <code>RandomAccessFile</code> methods that are not
       * overridden here relies on the implementation of those methods in the
       * superclass.
       */
      
      public final class BufferedRandomAccessFile extends RandomAccessFile
      {
          static final int LogBuffSz_ = 16; // 64K buffer
          public static final int BuffSz_ = (1 << LogBuffSz_);
          static final long BuffMask_ = ~(((long) BuffSz_) - 1L);
      
          private String path_;
          
          /*
           * This implementation is based on the buffer implementation in Modula-3's
           * "Rd", "Wr", "RdClass", and "WrClass" interfaces.
           */
          private boolean dirty_; // true iff unflushed bytes exist
          private boolean syncNeeded_; // dirty_ can be cleared by e.g. seek, so track sync separately
          private long curr_; // current position in file
          private long lo_, hi_; // bounds on characters in "buff"
          private byte[] buff_; // local buffer
          private long maxHi_; // this.lo + this.buff.length
          private boolean hitEOF_; // buffer contains last file block?
          private long diskPos_; // disk position
      
          /*
          * To describe the above fields, we introduce the following abstractions for
          * the file "f":
          *
          * len(f) the length of the file curr(f) the current position in the file
          * c(f) the abstract contents of the file disk(f) the contents of f's
          * backing disk file closed(f) true iff the file is closed
          *
          * "curr(f)" is an index in the closed interval [0, len(f)]. "c(f)" is a
          * character sequence of length "len(f)". "c(f)" and "disk(f)" may differ if
          * "c(f)" contains unflushed writes not reflected in "disk(f)". The flush
          * operation has the effect of making "disk(f)" identical to "c(f)".
          *
          * A file is said to be *valid* if the following conditions hold:
          *
          * V1. The "closed" and "curr" fields are correct:
          *
          * f.closed == closed(f) f.curr == curr(f)
          *
          * V2. The current position is either contained in the buffer, or just past
          * the buffer:
          *
          * f.lo <= f.curr <= f.hi
          *
          * V3. Any (possibly) unflushed characters are stored in "f.buff":
          *
          * (forall i in [f.lo, f.curr): c(f)[i] == f.buff[i - f.lo])
          *
          * V4. For all characters not covered by V3, c(f) and disk(f) agree:
          *
          * (forall i in [f.lo, len(f)): i not in [f.lo, f.curr) => c(f)[i] ==
          * disk(f)[i])
          *
          * V5. "f.dirty" is true iff the buffer contains bytes that should be
          * flushed to the file; by V3 and V4, only part of the buffer can be dirty.
          *
          * f.dirty == (exists i in [f.lo, f.curr): c(f)[i] != f.buff[i - f.lo])
          *
          * V6. this.maxHi == this.lo + this.buff.length
          *
          * Note that "f.buff" can be "null" in a valid file, since the range of
          * characters in V3 is empty when "f.lo == f.curr".
          *
          * A file is said to be *ready* if the buffer contains the current position,
          * i.e., when:
          *
          * R1. !f.closed && f.buff != null && f.lo <= f.curr && f.curr < f.hi
          *
          * When a file is ready, reading or writing a single byte can be performed
          * by reading or writing the in-memory buffer without performing a disk
          * operation.
          */
          
          /**
           * Open a new <code>BufferedRandomAccessFile</code> on <code>file</code>
           * in mode <code>mode</code>, which should be "r" for reading only, or
           * "rw" for reading and writing.
           */
          public BufferedRandomAccessFile(File file, String mode) throws IOException
          {
              this(file, mode, 0);
          }
          
          public BufferedRandomAccessFile(File file, String mode, int size) throws IOException
          {
              super(file, mode);
              path_ = file.getAbsolutePath();
              this.init(size);
          }
          
          /**
           * Open a new <code>BufferedRandomAccessFile</code> on the file named
           * <code>name</code> in mode <code>mode</code>, which should be "r" for
           * reading only, or "rw" for reading and writing.
           */
          public BufferedRandomAccessFile(String name, String mode) throws IOException
          {
              this(name, mode, 0);
          }
          
          public BufferedRandomAccessFile(String name, String mode, int size) throws FileNotFoundException
          {
              super(name, mode);
              path_ = name;
              this.init(size);
          }
          
          private void init(int size)
          {
              this.dirty_ = false;
              this.lo_ = this.curr_ = this.hi_ = 0;
              this.buff_ = (size > BuffSz_) ? new byte[size] : new byte[BuffSz_];
              this.maxHi_ = (long) BuffSz_;
              this.hitEOF_ = false;
              this.diskPos_ = 0L;
          }
      
          public String getPath()
          {
              return path_;
          }
      
          public void sync() throws IOException
          {
              if (syncNeeded_)
              {
                  flush();
                  getChannel().force(true);
                  syncNeeded_ = false;
              }
          }
      
    //      public boolean isEOF() throws IOException
    //      {
    //          assert getFilePointer() <= length();
    //          return getFilePointer() == length();
    //      }
      
          public void close() throws IOException
          {
              this.flush();
              this.buff_ = null;
              super.close();
          }
          
          /**
           * Flush any bytes in the file's buffer that have not yet been written to
           * disk. If the file was created read-only, this method is a no-op.
           */
          public void flush() throws IOException
          {        
              this.flushBuffer();
          }
          
          /* Flush any dirty bytes in the buffer to disk. */
          private void flushBuffer() throws IOException
          {   
              if (this.dirty_)
              {
                  if (this.diskPos_ != this.lo_)
                      super.seek(this.lo_);
                  int len = (int) (this.curr_ - this.lo_);
                  super.write(this.buff_, 0, len);
                  this.diskPos_ = this.curr_;             
                  this.dirty_ = false;
              }
          }
          
          /*
           * Read at most "this.buff.length" bytes into "this.buff", returning the
           * number of bytes read. If the return result is less than
           * "this.buff.length", then EOF was read.
           */
          private int fillBuffer() throws IOException
          {
              int cnt = 0;
              int rem = this.buff_.length;
              while (rem > 0)
              {
                  int n = super.read(this.buff_, cnt, rem);
                  if (n < 0)
                      break;
                  cnt += n;
                  rem -= n;
              }
              if ( (cnt < 0) && (this.hitEOF_ = (cnt < this.buff_.length)) )
              {
                  // make sure buffer that wasn't read is initialized with -1
                  Arrays.fill(this.buff_, cnt, this.buff_.length, (byte) 0xff);
              }
              this.diskPos_ += cnt;
              return cnt;
          }
          
          /*
           * This method positions <code>this.curr</code> at position <code>pos</code>.
           * If <code>pos</code> does not fall in the current buffer, it flushes the
           * current buffer and loads the correct one.<p>
           * 
           * On exit from this routine <code>this.curr == this.hi</code> iff <code>pos</code>
           * is at or past the end-of-file, which can only happen if the file was
           * opened in read-only mode.
           */
          public void seek(long pos) throws IOException
          {
              if (pos >= this.hi_ || pos < this.lo_)
              {
                  // seeking outside of current buffer -- flush and read             
                  this.flushBuffer();
                  this.lo_ = pos & BuffMask_; // start at BuffSz boundary
                  this.maxHi_ = this.lo_ + (long) this.buff_.length;
                  if (this.diskPos_ != this.lo_)
                  {
                      super.seek(this.lo_);
                      this.diskPos_ = this.lo_;
                  }
                  int n = this.fillBuffer();
                  this.hi_ = this.lo_ + (long) n;
              }
              else
              {
                  // seeking inside current buffer -- no read required
                  if (pos < this.curr_)
                  {
                      // if seeking backwards, we must flush to maintain V4
                      this.flushBuffer();
                  }
              }
              this.curr_ = pos;
          }
          
          public long getFilePointer()
          {
              return this.curr_;
          }
      
          public long length() throws IOException
          {
              // max accounts for the case where we have written past the old file length, but not yet flushed our buffer
              return Math.max(this.curr_, super.length());
          }
      
          public int read() throws IOException
          {
              if (this.curr_ >= this.hi_)
              {
                  // test for EOF
                  // if (this.hi < this.maxHi) return -1;
                  if (this.hitEOF_)
                      return -1;
                  
                  // slow path -- read another buffer
                  this.seek(this.curr_);
                  if (this.curr_ == this.hi_)
                      return -1;
              }
              byte res = this.buff_[(int) (this.curr_ - this.lo_)];
              this.curr_++;
              return ((int) res) & 0xFF; // convert byte -> int
          }
          
          public int read(byte[] b) throws IOException
          {
              return this.read(b, 0, b.length);
          }
          
          public int read(byte[] b, int off, int len) throws IOException
          {
              if (this.curr_ >= this.hi_)
              {
                  // test for EOF
                  // if (this.hi < this.maxHi) return -1;
                  if (this.hitEOF_)
                      return -1;
                  
                  // slow path -- read another buffer
                  this.seek(this.curr_);
                  if (this.curr_ == this.hi_)
                      return -1;
              }
              len = Math.min(len, (int) (this.hi_ - this.curr_));
              int buffOff = (int) (this.curr_ - this.lo_);
              System.arraycopy(this.buff_, buffOff, b, off, len);
              this.curr_ += len;
              return len;
          }
          
          public void write(int b) throws IOException
          {
              if (this.curr_ >= this.hi_)
              {
                  if (this.hitEOF_ && this.hi_ < this.maxHi_)
                  {
                      // at EOF -- bump "hi"
                      this.hi_++;
                  }
                  else
                  {
                      // slow path -- write current buffer; read next one
                      this.seek(this.curr_);
                      if (this.curr_ == this.hi_)
                      {
                          // appending to EOF -- bump "hi"
                          this.hi_++;
                      }
                  }
              }
              this.buff_[(int) (this.curr_ - this.lo_)] = (byte) b;
              this.curr_++;
              this.dirty_ = true;
              syncNeeded_ = true;
          }
          
          public void write(byte[] b) throws IOException
          {
              this.write(b, 0, b.length);
          }
          
          public void write(byte[] b, int off, int len) throws IOException
          {        
              while (len > 0)
              {              
                  int n = this.writeAtMost(b, off, len);
                  off += n;
                  len -= n;
                  this.dirty_ = true;
                  syncNeeded_ = true;
              }
          }
          
          /*
           * Write at most "len" bytes to "b" starting at position "off", and return
           * the number of bytes written.
           */
          private int writeAtMost(byte[] b, int off, int len) throws IOException
          {        
              if (this.curr_ >= this.hi_)
              {
                  if (this.hitEOF_ && this.hi_ < this.maxHi_)
                  {
                      // at EOF -- bump "hi"
                      this.hi_ = this.maxHi_;
                  }
                  else
                  {                                
                      // slow path -- write current buffer; read next one                
                      this.seek(this.curr_);
                      if (this.curr_ == this.hi_)
                      {
                          // appending to EOF -- bump "hi"
                          this.hi_ = this.maxHi_;
                      }
                  }
              }
              len = Math.min(len, (int) (this.hi_ - this.curr_));
              int buffOff = (int) (this.curr_ - this.lo_);
              System.arraycopy(b, off, this.buff_, buffOff, len);
              this.curr_ += len;
              return len;
          }
      }

    仿照:http://www.ibm.com/developerworks/cn/java/l-javaio/

     转载 : http://blog.csdn.net/silentbalanceyh/article/details/5252285

  • 相关阅读:
    MySql cmd下的学习笔记 —— 引擎和事务(engine,transaction)
    MySql cmd下的学习笔记 —— 有关视图的操作(algorithm)
    MySql cmd下的学习笔记 —— 有关视图的操作(建立表)
    MySql cmd下的学习笔记 —— 有关常用函数的介绍(数学函数,聚合函数等等)
    MySql cmd下的学习笔记 —— 有关多表查询的操作(多表查询练习题及union操作)
    MySql 在cmd下的学习笔记 —— 有关多表查询的操作(内连接,外连接,交叉连接)
    MySql cmd下的学习笔记 —— 有关子查询的操作(where型,from型,exists型子查询)
    MySql cmd下的学习笔记 —— 有关select的操作(order by,limit)
    剑指Offer--第21题 调整数组顺序使奇数位于偶数前面;
    剑指Offer--和为s的连续正数序列
  • 原文地址:https://www.cnblogs.com/622698abc/p/3265584.html
Copyright © 2011-2022 走看看