zoukankan      html  css  js  c++  java
  • lzw压缩算法

    using System;
    using System.IO;

    namespace Gif.Components
    {
     public class LZWEncoder
     {

      private static readonly int EOF = -1;

      private int imgW, imgH;
      private byte[] pixAry;
      private int initCodeSize;
      private int remaining;
      private int curPixel;

      // GIFCOMPR.C       - GIF Image compression routines
      //
      // Lempel-Ziv compression based on 'compress'.  GIF modifications by
      // David Rowley (mgardi@watdcsu.waterloo.edu)

      // General DEFINEs

      static readonly int BITS = 12;

      static readonly int HSIZE = 5003; // 80% occupancy

      // GIF Image compression - modified 'compress'
      //
      // Based on: compress.c - File compression ala IEEE Computer, June 1984.
      //
      // By Authors:  Spencer W. Thomas      (decvax!harpo!utah-cs!utah-gr!thomas)
      //              Jim McKie              (decvax!mcvax!jim)
      //              Steve Davies           (decvax!vax135!petsd!peora!srd)
      //              Ken Turkowski          (decvax!decwrl!turtlevax!ken)
      //              James A. Woods         (decvax!ihnp4!ames!jaw)
      //              Joe Orost              (decvax!vax135!petsd!joe)

      int n_bits; // number of bits/code
      int maxbits = BITS; // user settable max # bits/code
      int maxcode; // maximum code, given n_bits
      int maxmaxcode = 1 << BITS; // should NEVER generate this code

      int[] htab = new int[HSIZE];//这个是放hash的筒子,在这里面可以很快的找到1个key
      int[] codetab = new int[HSIZE];

      int hsize = HSIZE; // for dynamic table sizing

      int free_ent = 0; // first unused entry

      // block compression parameters -- after all codes are used up,
      // and compression rate changes, start over.
      bool clear_flg = false;

      // Algorithm:  use open addressing double hashing (no chaining) on the
      // prefix code / next character combination.  We do a variant of Knuth's
      // algorithm D (vol. 3, sec. 6.4) along with G. Knott's relatively-prime
      // secondary probe.  Here, the modular division first probe is gives way
      // to a faster exclusive-or manipulation.  Also do block compression with
      // an adaptive reset, whereby the code table is cleared when the compression
      // ratio decreases, but after the table fills.  The variable-length output
      // codes are re-sized at this point, and a special CLEAR code is generated
      // for the decompressor.  Late addition:  construct the table according to
      // file size for noticeable speed improvement on small files.  Please direct
      // questions about this implementation to ames!jaw.

      int g_init_bits;

      int ClearCode;
      int EOFCode;

      // output
      //
      // Output the given code.
      // Inputs:
      //      code:   A n_bits-bit integer.  If == -1, then EOF.  This assumes
      //              that n_bits =< wordsize - 1.
      // Outputs:
      //      Outputs code to the file.
      // Assumptions:
      //      Chars are 8 bits long.
      // Algorithm:
      //      Maintain a BITS character long buffer (so that 8 codes will
      // fit in it exactly).  Use the VAX insv instruction to insert each
      // code in turn.  When the buffer fills up empty it and start over.

      int cur_accum = 0;
      int cur_bits = 0;

      int [] masks =
      {
       0x0000,
       0x0001,
       0x0003,
       0x0007,
       0x000F,
       0x001F,
       0x003F,
       0x007F,
       0x00FF,
       0x01FF,
       0x03FF,
       0x07FF,
       0x0FFF,
       0x1FFF,
       0x3FFF,
       0x7FFF,
       0xFFFF };

      // Number of characters so far in this 'packet'
      int a_count;

      // Define the storage for the packet accumulator
      byte[] accum = new byte[256];

      //----------------------------------------------------------------------------
      public LZWEncoder(int width, int height, byte[] pixels, int color_depth)
      {
       imgW = width;
       imgH = height;
       pixAry = pixels;
       initCodeSize = Math.Max(2, color_depth);
      }
     
      // Add a character to the end of the current packet, and if it is 254
      // characters, flush the packet to disk.
      void Add(byte c, Stream outs)
      {
       accum[a_count++] = c;
       if (a_count >= 254)
        Flush(outs);
      }
     
      // Clear out the hash table

      // table clear for block compress
      void ClearTable(Stream outs)
      {
       ResetCodeTable(hsize);
       free_ent = ClearCode + 2;
       clear_flg = true;

       Output(ClearCode, outs);
      }
     
      // reset code table
            // 全部初始化为-1
      void ResetCodeTable(int hsize)
      {
       for (int i = 0; i < hsize; ++i)
        htab[i] = -1;
      }
     
      void Compress(int init_bits, Stream outs)
      {
       int fcode;
       int i /* = 0 */;
       int c;
       int ent;
       int disp;
       int hsize_reg;
       int hshift;

       // Set up the globals:  g_init_bits - initial number of bits
                //原始数据的字长,在gif文件中,原始数据的字长可以为1(单色图),4(16色),和8(256色)
                //开始的时候先加上1
                //但是当原始数据长度为1的时候,开始为3
                //因此原始长度1->3,4->5,8->9

                //?为何原始数据字长为1的时候,开始长度为3呢??
                //如果+1=2,只能表示四种状态,加上clearcode和endcode就用完了。所以必须扩展到3
       g_init_bits = init_bits;

       // Set up the necessary values
                //是否需要加清除标志
                //GIF为了提高压缩率,采用的是变长的字长(VCL)。比如说原始数据是8位,那么开始先加上1位(8+1=9)
                //当标号到2^9=512的时候,超过了当前长度9所能表现的最大值,此时后面的标号就必须用10位来表示
                //以此类推,当标号到2^12的时候,因为最大为12,不能继续扩展了,需要在2^12=4096的位置上插入一个ClearCode,表示从这往后,从9位重新再来了        
       clear_flg = false;
       n_bits = g_init_bits;
                //获得n位数能表述的最大值(gif图像中开始一般为3,5,9,故maxcode一般为7,31,511)
       maxcode = MaxCode(n_bits);
                //表示从这里我重新开始构造字典字典了,以前的所有标记作废,
                //开始使用新的标记。这个标号集的大小多少比较合适呢?据说理论上是越大压缩率越高(我个人感觉太大了也不见得就好),
                //不过处理的开销也呈指数增长
                //gif规定,clearcode的值为原始数据最大字长所能表达的数值+1;比如原始数据长度为8,则clearcode=1<<(9-1)=256
       ClearCode = 1 << (init_bits - 1);
                //结束标志为clearcode+1
       EOFCode = ClearCode + 1;
                //这个是解除结束的
       free_ent = ClearCode + 2;
                //清楚数量
       a_count = 0; // clear packet
                //从图像中获得下一个像素
       ent = NextPixel();

       hshift = 0;
       for (fcode = hsize; fcode < 65536; fcode *= 2)
        ++hshift;
                //设置hash码范围
       hshift = 8 - hshift; // set hash code range bound

       hsize_reg = hsize;
                //清除固定大小的hash表,用于存储标记,这个相当于字典
       ResetCodeTable(hsize_reg); // clear hash table

       Output(ClearCode, outs);

       outer_loop : while ((c = NextPixel()) != EOF)
           {
            fcode = (c << maxbits) + ent;                            
            i = (c << hshift) ^ ent; // xor hashing
                                 //嘿嘿,小样,又来了,我认识你
            if (htab[i] == fcode)
            {
             ent = codetab[i];
             continue;
            }
                                 //这小子,新来的
            else if (htab[i] >= 0) // non-empty slot
            {
             disp = hsize_reg - i; // secondary hash (after G. Knott)
             if (i == 0)
              disp = 1;
             do
             {
              if ((i -= disp) < 0)
               i += hsize_reg;

              if (htab[i] == fcode)
              {
               ent = codetab[i];
               goto outer_loop;
              }
             } while (htab[i] >= 0);
            }
             Output(ent, outs);
                                 //从这里可以看出,ent就是前缀(prefix),而当前正在处理的字符标志就是后缀(suffix)
            ent = c;
                                 //判断终止结束符是否超过当前位数所能表述的范围
            if (free_ent < maxmaxcode)
            {
                                     //如果没有超
             codetab[i] = free_ent++; // code -> hashtable
                                     //hash表里面建立相应索引
             htab[i] = fcode;
            }
            else
                                     //说明超过了当前所能表述的范围,清空字典,重新再来
             ClearTable(outs);
           }
       // Put out the final code.
       Output(ent, outs);
       Output(EOFCode, outs);
      }
     
      //----------------------------------------------------------------------------
      public void Encode( Stream os)
      {
       os.WriteByte( Convert.ToByte( initCodeSize) ); // write "initial code size" byte
                //这个图像包含多少个像素
       remaining = imgW * imgH; // reset navigation variables
                //当前处理的像素索引
       curPixel = 0;

       Compress(initCodeSize + 1, os); // compress and write the pixel data

       os.WriteByte(0); // write block terminator
      }
     
      // Flush the packet to disk, and reset the accumulator
      void Flush(Stream outs)
      {
       if (a_count > 0)
       {
        outs.WriteByte( Convert.ToByte( a_count ));
        outs.Write(accum, 0, a_count);
        a_count = 0;
       }
      } 
          
            /// <summary>
            /// 获得n位数所能表达的最大数值
            /// </summary>
            /// <param name="n_bits">位数,一般情况下n_bits = 9</param>
            /// <returns>最大值,例如n_bits=8,则返回值就为2^8-1=255</returns>
      int MaxCode(int n_bits)
      {
       return (1 << n_bits) - 1;
      }
     
      //----------------------------------------------------------------------------
      // Return the next pixel from the image
      //----------------------------------------------------------------------------
            /// <summary>
            /// 从图像中获得下一个像素
            /// </summary>
            /// <returns></returns>
      private int NextPixel()
      {
                //还剩多少个像素没有处理
                //如果没有了,返回结束标志
       if (remaining == 0)
        return EOF;
                //否则处理下一个,并将未处理像素数目-1
       --remaining;
                //当前处理的像素
       int temp = curPixel + 1;
                //如果当前处理像素在像素范围之内
       if ( temp < pixAry.GetUpperBound( 0 ))
       {
                    //下一个像素
        byte pix = pixAry[curPixel++];
        return pix & 0xff;
       }
       return 0xff;
      }
         /// <summary>
         /// 输出字到输出流
         /// </summary>
         /// <param name="code">要输出的字</param>
         /// <param name="outs">输出流</param>
      void Output(int code, Stream outs)
      {
                //得到当前标志位所能表示的最大标志值
       cur_accum &= masks[cur_bits];

       if (cur_bits > 0)
        cur_accum |= (code << cur_bits);
       else
                   //如果标志位为0,就将当前标号为输入流
        cur_accum = code;
                //当前能标志的最大字长度(9-10-11-12-9-10。。。。。。。)
       cur_bits += n_bits;
                //如果当前最大长度大于8
       while (cur_bits >= 8)
       {
                    //向流中输出一个字节
        Add((byte) (cur_accum & 0xff), outs);
                    //将当前标号右移8位
        cur_accum >>= 8;
        cur_bits -= 8;
       }

       // If the next entry is going to be too big for the code size,
       // then increase it, if possible.
       if (free_ent > maxcode || clear_flg)
       {
        if (clear_flg)
        {
         maxcode = MaxCode(n_bits = g_init_bits);
         clear_flg = false;
        }
        else
        {
         ++n_bits;
         if (n_bits == maxbits)
          maxcode = maxmaxcode;
         else
          maxcode = MaxCode(n_bits);
        }
       }

       if (code == EOFCode)
       {
        // At EOF, write the rest of the buffer.
        while (cur_bits > 0)
        {
         Add((byte) (cur_accum & 0xff), outs);
         cur_accum >>= 8;
         cur_bits -= 8;
        }

        Flush(outs);
       }
      }
     }
    }

  • 相关阅读:
    MongoDB Schema Design
    WinDBG中的poi是做什么用的?
    如何在Visual Studio中运行和调试汇编代码?
    [翻译图书] 未完工 Moving Applications to the Cloud on the Microsoft Windows Azure Platform 4
    在Word中生成随机的样本文本
    Quiz Win32内存表示与数值大小
    rep stos dword ptr es:[edi] 是做什么的?
    Windows Azure中虚拟机无法启动, 报错RoleStateUnknown的解决方案
    COM基础介绍
    64位的dump里如何寻找第一个到第四个参数?
  • 原文地址:https://www.cnblogs.com/jillzhang/p/531631.html
Copyright © 2011-2022 走看看