zoukankan      html  css  js  c++  java
  • 基于二叉树和双向链表实现限制长度的最优Huffman编码

    该代码採用二叉树结合双向链表实现了限制长度的最优Huffman编码,本文代码中的权重所有採用整数值表示。http://pan.baidu.com/s/1mgHn8lq
    算法原理详见:A fast algorithm for optimal length-limited Huffman codes.pdf
    演示样例:符号ABCDE的权重分别为10,6,2,1,1
       不限制长度的最优Huffman编码为A:0,B:10,C:110,D:1110,E:1111,平均码长为1.8bits/symbol;
       限制长度3的最优Huffman编码为  A:0,B:100,C:101,D:110,E:111,  平均码长为2.0bits/symbol;
    限制长度最优Huffman编码实现代码例如以下:
    //Reference:A fast algorithm for optimal length-limited Huffman codes.pdf,http://pan.baidu.com/s/1o6E19Bs
    //author:by Pan Yumin.2014-06-18
    //with the method of BinaryTree and linked-list
    #include <stdio.h>
    #include <memory.h>
    #include <malloc.h>
    
    #define  MaxSymbols 256	//the Maximum Number of Symbols
    #define  MaxHuffLen	16	//the Limited Length
    
    typedef unsigned char boolean;
    #ifndef FALSE			//in case these macros already exist
    #define FALSE	0		//values of boolean
    #endif
    
    #ifndef TRUE
    #define TRUE	1
    #endif
    
    typedef struct __Node{
    	int width;
    	int weight;
    	int index;
    	int depth;
    
    	struct __Node *prev;	//double linked list
    	struct __Node *next;	//double linked list
    	struct __Node *left;	//left child
    	struct __Node *right;	//right child
    }Node;
    
    typedef struct __HuffTable{
    	unsigned int index;
    	unsigned int len;
    	unsigned int code;
    }HuffTable;
    
    //Test memory leak
    /*int g_malloc = 0,g_free = 0;
    
    void* my_malloc(int size){
    	g_malloc++;
    	return malloc(size);
    }
    void my_free(void *ptr){
    	if(ptr){
    		g_free++;
    		free(ptr);
    		ptr = NULL;
    	}
    }
    #define malloc my_malloc
    #define free my_free*/
    
    //Get the smallest term in the diadic expansion of X
    int GetSmallestTerm(int X)
    {
    	int N=0;
    	while((X & 0x01) == 0){
    		X >>= 1;
    		N++;
    	}
    	return 1<<N;
    }
    void deleteNode(Node *head,unsigned char *Flag,int Symbols,boolean isDelete)
    {
    	if(head->left == NULL && head->right == NULL){
    		if(isDelete)
    			Flag[head->depth*Symbols+head->index] = 0;
    		else
    			Flag[head->depth*Symbols+head->index] = 1;
    	}
    	if(head->left){
    		deleteNode(head->left,Flag,Symbols,isDelete);
    	}
    	if(head->right){
    		deleteNode(head->right,Flag,Symbols,isDelete);
    	}
    	free(head);	head = NULL;
    }
    
    //N:the Num of node
    void Package_Merge(Node *head,Node **tail,int minWidth,unsigned char * Flag,int Symbols)
    {
    	Node *tmp = NULL,*node_1 = NULL,*node_2 = NULL;
    	Node *node_P_head = NULL,*node_P_tail = NULL;		//node_P_tail not store data,node_P_head store data
    	Node *node_head = head;								//the head of 2*minWidth
    	//package
    	node_P_tail = (Node *)malloc(sizeof(Node));
    	memset(node_P_tail,0,sizeof(Node));
    
    	node_2 = node_P_tail;	node_1 = (*tail)->prev;
    	for(;node_1 != NULL && node_1 != head; node_1=(*tail)->prev){
    		if(node_1->width == minWidth){
    			tmp = (Node*)malloc(sizeof(Node));
    			tmp->right = node_1->next;				//insert from right to left,so the weight from small to large
    			tmp->left = node_1;
    			tmp->width = 2*minWidth;
    			tmp->weight = node_1->weight+node_1->next->weight;
    			tmp->next = node_2;
    			tmp->prev = NULL;
    
    			node_2->prev = tmp;
    			node_2 = tmp;
    			*tail = node_1->prev;	(*tail)->next = NULL;		//two intervals
    		}else{
    			break;
    		}
    	}
    	node_P_head = node_2;
    
    	if(*tail != head && (*tail)->width == minWidth){	//if the number of minwidth is odd,delete the max weight item of minwidth
    		*tail = (*tail)->prev;
    		deleteNode((*tail)->next,Flag,Symbols,TRUE);
    		(*tail)->next = NULL;
    	}
    
    	//find the range of 2*minWidth
    	node_1 = *tail;
    	for(;node_1 != head && node_1->width == 2*minWidth;node_1 = node_1->prev){
    	}
    	node_head = node_1;		//the head of 2*minWidth, node_head not store 2*minWidth
    
    	//merge
    	node_1 = node_head->next;	node_2 = node_P_head;
    	for(;node_1 != NULL && node_2 != node_P_tail;){
    		if(node_1->weight >= node_2->weight){
    			node_1 = node_1->next;
    		}else{		//insert to the major list
    			node_1->prev->next = node_2;
    			node_2->prev = node_1->prev;
    			node_1->prev = node_2;
    
    			node_2 = node_2->next;
    			node_2->prev->next = node_1;
    
    			node_2->prev = NULL;
    		}
    	}
    	if(node_1 == NULL){		//insert list 2 to the major list
    		(*tail)->next = node_2;
    		node_2->prev = *tail;
    		*tail = node_P_tail->prev;
    		(*tail)->next = NULL;
    		free(node_P_tail);	node_P_tail = NULL;
    	}else{
    		free(node_P_tail);	node_P_tail = NULL;
    	}
    }
    
    //N:the Num of node
    int LengthLimitedHuffmanCode(Node *head,Node *tail,int X,unsigned char * Flag,int Symbols)
    {
    	int minwidth,r;
    
    	while(X>0){
    		minwidth = GetSmallestTerm(X);
    		if( head->next == NULL)		//I empty
    			return -1;
    		r = tail->width;				//Just for Huffman Code,else r = GetMinWidth(head);
    		if(r>minwidth){
    			return -2;
    		}else if(r == minwidth){
    			tail = tail->prev;
    			deleteNode(tail->next,Flag,Symbols,FALSE);
    			tail->next = NULL;
    			X = X-minwidth;
    		}else{
    			Package_Merge(head,&tail,r,Flag,Symbols);
    		}		
    	}
    
    	return 0;
    }
    void PrintHuffCode(HuffTable Huffcode)
    {
    	int i;
    	for(i=Huffcode.len-1;i>=0;i--){
    		printf("%d",(Huffcode.code>>i) & 0x01);
    	}
    }
    void GenerateHuffmanCode(HuffTable *HuffCode,unsigned char *Flag,int L,int Symbols,int *SortIndex)
    {
    	char Code[17];
    	int Pre_L = 0;
    	int i=0,j=0;
    	unsigned int codes[MaxHuffLen+2]={0},rank[MaxHuffLen+1] = {0};	//rank: the number of symbols in every length
    	//find the first code
    	for(i=0;i<Symbols;i++){
    		for(j=0;j<L;j++){
    			HuffCode[i].len += Flag[j*Symbols+i];
    		}
    		if(HuffCode[i].len != 0)
    			rank[HuffCode[i].len]++;
    		HuffCode[i].index = SortIndex[i];
    	}
    
    	for(i=0;i<=L;i++){
    		codes[i+1] = (codes[i]+rank[i])<<1;
    		rank[i] = 0;
    	}
    
    	//code
    	for(i=0;i<Symbols;i++){
    		HuffCode[i].code = codes[HuffCode[i].len] + rank[HuffCode[i].len]++;
    	}
    }
    float BitsPerSymbol(HuffTable *HuffCode,int *weight,int Symbols,int WeightSum)
    {
    	float bitspersymbol = 0.0;
    	int i;
    	for(i=0;i<Symbols;i++){
    		bitspersymbol += (float)HuffCode[i].len*weight[i];
    	}
    	return bitspersymbol/WeightSum;
    }
    
    void FreqSort(int *Freq,int *SortIndex,int Symbols)
    {
    	int i,j,tmp;
    	for(i=0;i<Symbols;i++){
    		for(j=i+1;j<Symbols;j++){
    			if(Freq[i]<Freq[j]){
    				tmp = Freq[i];
    				Freq[i] = Freq[j];
    				Freq[j] = tmp;
    
    				tmp = SortIndex[i];
    				SortIndex[i] = SortIndex[j];
    				SortIndex[j] = tmp;
    			}
    		}
    	}
    }
    
    int GenLenLimitedOptHuffCode(int *Freq,int Symbols)
    {
    	int i,j;
    	unsigned char *Flag = NULL;	//record the state of the node
    	unsigned int rank[MaxHuffLen];
    	Node *node = NULL,*head = NULL,*tail = NULL,*tmp = NULL;	//head not store data,just a head,tail store data
    	int Ret = 0;
    	HuffTable HuffCode[MaxSymbols];
    	float bitspersymbols = 0.0;
    	int WeightSum = 0;
    	int SortIndex[MaxSymbols];
    
    	if(Symbols > (1<<MaxHuffLen)){
    		printf("Symbols > (1<<MaxHuffLen)
    ");
    		return -1;
    	}
    
    	for(i=0;i<MaxSymbols;i++){
    		SortIndex[i] = i;
    	}
    	FreqSort(Freq,SortIndex,Symbols);		//sort
    
    	for(i=0;i<Symbols;i++){	
    		WeightSum += Freq[i];
    	}
    
    	head = (Node*)malloc(sizeof(Node));
    	memset(head,0,sizeof(Node));
    	Flag = (unsigned char*)malloc(MaxHuffLen*Symbols*sizeof(unsigned char));
    	memset(Flag,1,MaxHuffLen*Symbols*sizeof(unsigned char));
    
    	memset(HuffCode,0,sizeof(HuffCode));
    	node = head;
    
    	for(i=0;i<MaxHuffLen;i++){
    		for(j=0;j<Symbols;j++){
    			tmp = (Node*)malloc(sizeof(Node));
    			tmp->prev = node;							tmp->next = NULL;
    			tmp->left = NULL;							tmp->right = NULL;
    			tmp->width = 1<<(MaxHuffLen-i-1);
    			tmp->weight = Freq[j];
    			tmp->index = j;								tmp->depth = i;
    			node->next = tmp;
    			node = tmp;
    		}
    	}
    	tail = node;	//tail
    	Ret = LengthLimitedHuffmanCode(head,tail,(Symbols-1)<<MaxHuffLen,Flag,Symbols);
    
    	GenerateHuffmanCode(HuffCode,Flag,MaxHuffLen,Symbols,SortIndex);
    	
    	//print HuffCode
    	for(i=0;i<Symbols;i++){
    		printf("%03d weight:%04d Code:",HuffCode[i].index,Freq[i]);
    		PrintHuffCode(HuffCode[i]);
    		printf("	CodeLen:%02d",HuffCode[i].len);
    		printf("
    ");
    	}
    	bitspersymbols = BitsPerSymbol(HuffCode,Freq,Symbols,WeightSum);
    	printf("average code length:%f bits/symbol.
    ",bitspersymbols);
    
    	free(head);	head = NULL;
    	free(Flag);	Flag = NULL;
    
    	return Ret;
    }
    #include <time.h>
    int main()
    {
    	//int Freq[MaxSymbols] = {1,25,3,4,9,6,4,6,26,15,234,4578};	//weight is not zero.
     	int Freq[MaxSymbols] = {10,6,2,1,1};	//weight is not zero.
     	GenLenLimitedOptHuffCode(Freq,5);
     	return 0;
    }

    执行上述程序输出结果例如以下所看到的:




  • 相关阅读:
    码农的半衰期只有15年?
    微软面试100题2010年版全部答案集锦(转自July)
    大量url,如何去重
    后缀树求最长子字符串
    转 STL hash_map & map
    有n 个长为m+1 的字符串,求前后m个字符匹配所能形成的最长字符串链:利用弗洛伊德算法求最长路径
    获取本机地址信息,遇到小问题...有待解决
    HDOJ 1006
    归并排序
    插入排序的简单实现
  • 原文地址:https://www.cnblogs.com/yutingliuyl/p/6944421.html
Copyright © 2011-2022 走看看