zoukankan      html  css  js  c++  java
  • 哈夫曼(huffman )的编码

    #pragma once
    #include "stdafx.h"
    #include "BinTree.h"
    #include "BitsOper.h"
    namespace Huffman
    {
        using namespace std;
        std::map<std::string, int> mp;
        using line = std::vector<string>;
        using vctLine = std::vector<line>;
        using char_int = std::pair<char, int>;
        std::map<char, string> ms;
        std::map<char, string> codeMap;
    
        template<typename T>
        struct HuffTree
        {
            enum ChildState {
                LeftChild = 0,
                RightChild = 1,
                Root = 2    //特点 父节点为空
            };
            T data{ T() };    // 0 或者1
            HuffTree* Left{ nullptr };
            HuffTree* Right{ nullptr };
            HuffTree* Pairent{ nullptr };
    
            ChildState localState;
            void InsertLeft(HuffTree* ht)
            {
                Left = ht;
            }
            void InsertRight(HuffTree* ht)
            {
                Right = ht;
            }
            void Free()
            {
                //if (Left != nullptr) FreeHuffChild(Left);
                //if (Right != nullptr) FreeHuffChild(Right);
                FreeHuffTree(Left);
                FreeHuffTree(Right);
            }
            void FreeHuffTree(HuffTree* tree)
            {
                if (tree->Left)
                {
                    FreeHuffTree(tree->Left);
                    //delete tree->Left;
                    tree->Left = nullptr;
                }
                if (tree->Right)
                {
                    FreeHuffTree(tree->Right);
                    //delete tree->Right;
                    tree->Right = nullptr;
                }
                delete tree;
            }
    
            std::vector<T> ToList()
            {
                std::vector<T> vct;
                PushNodeToVct(vct, this);
                return vct;
            }
            //Breadth-First Search
    
    
            int ndepth = 0;
            string ss = "";
            void PushNodeToVct(std::vector<T>& vct, HuffTree* hu)
            {
                vct.push_back(hu->data);
                cout << "(" << hu->data.first << "," << hu->data.second << ")";
                //cout << (int)hu->data.second;
                ss += std::to_string(hu->data.second);
                if (hu->Left != nullptr)
                {
                    ss += "->";
                    cout << "->";
                    PushNodeToVct(vct, hu->Left);
                    ndepth++;
                }
                else {
                    ndepth--;
                    //ss.substr(0, ss.size() - 3);
                    cout << endl;
                    //return;
                }
                if (hu->Right != nullptr)
                {
                    //cout << hu->data.first << endl;
                    ss += "->";
                    cout << "->";
                    PushNodeToVct(vct, hu->Right);
                    ndepth++;
                }
                else {
                    //ss.substr(0, ss.size());
                    cout << "
    ";
                    //ss += "
    ";
                    //cout << ss;
                }
                ndepth++;
            }
        };
        using HInfoT = HuffTree<char_int>;
        HInfoT* huffTree = nullptr;    //构造结果
    
        int BinaryStringToInt(string s)
        {
            uint64_t val = 0;
            for (int i = 0; i < s.size(); i++)
            {
                int asd = s[i] == '1' ? 1 : 0;
                val += (asd << s.size() - 1 - i);
            }
            return val;
        }
        std::string ToBinaryString(uint8_t byte)
        {
            string s = "";
            //
            for (int i = 7; i >= 0; i--)
            {
                uint8_t b = 1 << i;
                uint8_t bb = byte & b;
                if (bb == 0)
                {
                    s += "0";
                }
                else {
                    s += "1";
                }
            }
            return s;
        }
        std::string DecodeHuffman(std::vector<unsigned char> buff)
        {
            string bits = "";
            for (int i = 0; i < buff.size(); i++)
            {
                bits += ToBinaryString(buff[i]);
            }
            std::string rs = "";
            HInfoT* pCur = huffTree;
            for (int i = 0; i < bits.length(); i++)
            {
                if (bits[i] == '0')
                {
                    if (pCur->Left != nullptr) {
                        pCur = pCur->Left;
                    }
                    else {
                        rs.push_back(pCur->data.first);
                        pCur = huffTree->Left;
                    }
                }
                if (bits[i] == '1')
                {
                    if (pCur->Right != nullptr)
                    {
                        pCur = pCur->Right;
                    }
                    else {
                        rs.push_back(pCur->data.first);
                        pCur = huffTree->Right;
                    }
                }
            }
            return rs;
        }
    
        void GenerateHuffCode(line& sr, HInfoT* hit, string& str)
        {
            if (hit->Left != nullptr)
            {
                str += "1";
                ms[hit->Left->data.first] = str;
                GenerateHuffCode(sr, hit->Left, str);
            }
            else {
                string c = str;
                c.erase(c.end() - 1);
                sr.push_back(c);
                return;
            }
    
            if (hit->Right != nullptr)
            {
                str += "0";
                ms[hit->Right->data.first] = str;
                GenerateHuffCode(sr, hit->Right, str);
            }
            //左子节点一定会有 right node 不一定有
            else {
                //由于先遍历左子节点
                string c = str;
                c.erase(c.end() - 1);
                sr.push_back(c);
            }
            //FindTree(sr, tree);
        }
        void GetCode(HInfoT* node, string& s)
        {
            if (node->localState == HInfoT::ChildState::LeftChild)
            {
                s += "0";
                if (node->Pairent != NULL)
                {
                    GetCode(node->Pairent, s);
                }
            }
            if (node->localState == HInfoT::ChildState::RightChild)
            {
                s += "1";
                if (node->Pairent != NULL)
                {
                    GetCode(node->Pairent, s);
                }
            }
            if (node->localState == HInfoT::ChildState::Root)
            {
                //s += "r";
            }
        }
        
        vector<unsigned char>  EncodeHuffman(std::string str)
        {
            using HInfoT = HuffTree<char_int>;
            std::map<char, int> mp;
            for (int i = 0; i < str.length(); i++)
            {
                if (mp.find(str[i]) == mp.end())
                {
                    mp.insert(std::pair<char, int>(str[i], 1));
                }
                else {
                    mp[str[i]] ++;
                }
            }
            std::deque<char_int> dqLst;
            for (auto it : mp)
            {
                dqLst.push_back(it);
                cout << it.first << endl;
            }
    
            std::function<bool(char_int&, char_int&)> asc =
                [](char_int& lhs, char_int& rhs) {
                return lhs.second < rhs.second;
            };
            std::sort(dqLst.begin(), dqLst.end(), asc);
    
            //当前概率 + 当前所指向的元素
            HInfoT* root = nullptr;// 左节点为 0 右节点1
            HInfoT* hinf = nullptr;
            std::deque<HInfoT*> dqNodes;
            std::function<bool(HInfoT*, HInfoT*)> ascNode =
                [](HInfoT* l, HInfoT* r) {
                return l->data.second < r->data.second;
            };
    
            for (int i = 0; i < dqLst.size(); i++)
            {
                HInfoT* h = new HInfoT;
                //char
                h->data = dqLst[i];
                h->data.first = dqLst[i].first;
                //概率
                h->data.second = dqLst[i].second;
                dqNodes.push_back(h);
                cout << "(" << h->data.first << "," << h->data.second << ")" << endl;
            }
            int pos;
            char ch;
            while (dqNodes.size() > 2)
            {
                hinf = new HInfoT();
                root = hinf;
    
                hinf->data.second = dqNodes[0]->data.second + dqNodes[1]->data.second;
                hinf->data.first = dqNodes[0]->data.first;
                int nLeft = dqNodes[0]->data.second < dqNodes[1]->data.second ? 0 : 1;
                int nRight = nLeft == 0 ? 1 : 0;
                hinf->Left = dqNodes[nLeft];
                dqNodes[nLeft]->Pairent = hinf;
                dqNodes[nLeft]->localState = HInfoT::LeftChild;
    
                dqNodes[nRight]->Pairent = hinf;
                dqNodes[nRight]->localState = HInfoT::RightChild;
                ch = hinf->Left->data.first;
    
                hinf->Right = dqNodes[nRight];
                //dqNodes.erase(dqNodes.begin());
                //dqNodes.erase(dqNodes.begin() + 1);
                dqNodes.pop_front();
                dqNodes.pop_front();
                dqNodes.push_back(hinf);
                std::sort(dqNodes.begin(), dqNodes.end(), ascNode);
    
            }
            int nLeft = dqNodes[0]->data.second < dqNodes[1]->data.second ? 0 : 1;
            int nRight = nLeft == 0 ? 1 : 0;
            root = new HInfoT();
            root->data.second = hinf->data.second + hinf->data.second;
            root->data.first = hinf->data.first;
            root->Left = dqNodes[nLeft];
            root->Right = dqNodes[nRight];
            root->localState = HInfoT::Root;
            dqNodes[nLeft]->Pairent = root;
            dqNodes[nLeft]->localState = HInfoT::ChildState::LeftChild;
            dqNodes[nRight]->Pairent = root;
            dqNodes[nRight]->localState = HInfoT::ChildState::RightChild;
            std::vector<HInfoT*> vn;
            std::function<void(std::vector<HInfoT*>*)> bfsFunc = [root](std::vector<HInfoT*>* vhint) {
    
                deque<HInfoT*> tmpStack;
                HInfoT* r = root;
                tmpStack.push_back(root);
                vhint->push_back(root);
                while (tmpStack.size() > 0)
                {
                    r = tmpStack.front();
                    tmpStack.pop_front();
                    if (r->Left != nullptr)
                    {
                        tmpStack.push_back(r->Left);
                        vhint->push_back(r->Left);
                    }
                    if (r->Right != nullptr)
                    {
                        tmpStack.push_back(r->Right);
                        vhint->push_back(r->Right);
                    }
                }
    
            };
            bfsFunc(&vn);
            string codeNodeStr;
            for (auto v : vn)
            {
                codeNodeStr.clear();
                //只记录底层节点的码,也就是真实的码
                if (v->Left == nullptr && v->Right == nullptr)
                {
                    GetCode(v, codeNodeStr);
                    std::reverse(codeNodeStr.begin(), codeNodeStr.end());
                    codeMap[v->data.first] = codeNodeStr;
                }
            }
            cout << endl;
            for (auto vn : codeMap)
            {
                cout << "[" << vn.first << ":" << vn.second << "]";
            }
            cout << "codeMap" << endl;
            //获取编码表
            auto vct = root->ToList();
            for (auto v : vct)
            {
                std::cout << v.first << ":" << v.second << endl;
            }
            //生成Huffman表
            //小的为0 大的为1
            const int leftVal = 0;
            const int rightVal = 1;
            line vl;
            vl.push_back(string());
            string s = "";
            //GenerateHuffCode(vl, root, s);
            //for (int i = 0; i < vl.size(); i++)
            //{
            //    //cout << vl[i] << endl;
            //    string stt = "";
            //    for (int j = 0; j < vl[i].size(); j++)
            //    {
            //        if (j % 3 == 0)
            //        {
            //            stt += ",";
            //        }
            //        stt += vl[i][j];
            //    }
            //    cout << stt << endl;
            //}
            //cout << "---------------------" << endl;/* << pr.second */
    
    
            //std::string encoderStr;
            //char buff[2048];
            std::string sres;
            deque<char> sdq;    //用来存储字符串
            deque<string> dq;    //编码替换  需要换成每8个Bit为1字节
            deque<string> stringRes;    //最终结果 8 bit 为一个string
            vector<unsigned char> ucRes;    //字节表
            //std::copy(sdq.begin(), sdq.end(), str.begin());
            for (int i = 0; i < str.size(); i++)
            {
                sdq.push_back(str[i]);
            }
            unsigned char uch;
            while (sdq.size() > 0)
            {
                //存储010字符串
                dq.push_back(codeMap[sdq.front()]);
                sdq.pop_front();
                //buff = ms[str[i]];
            }
            int i = 0;
            string sChar;    //到8位就下一个
            s = "";
            for (int i = 0; i < dq.size(); i++)
            {
                s += dq[i];
            }
            int offset = 0;
            for (int i = 0; i < s.size(); i++)
            {
                //这里可以优化
                if (i % 8 == 0 && i != 0)
                {
                    string tmp;
                    tmp.resize(8);
                    std::copy(s.begin() + i - 8, s.begin() + i, tmp.begin());
                    stringRes.push_back(tmp);
                    offset += 8;
                }
            }
            if (offset < s.size())
            {
                string tmp;
                tmp.resize(s.size() - offset);
                std::copy(s.begin() + offset, s.end(), tmp.begin());
                while (tmp.size() < 8) {
                    tmp.push_back('0');
                }
                stringRes.push_back(tmp);
            }
    
            for (int i = 0; i < stringRes.size(); i++)
            {
                ucRes.push_back(BinaryStringToInt(stringRes[i]));
                //cout << ucRes[i] << endl;
            }
            int m = ucRes.size();
            cout << "uc res" << m << endl;
            huffTree = root;
            return ucRes;
        }
        
    }

     构造过程:     1.先统计各个字符的出现概率

             2.找出最小的两个作为一个节点,该节点的概率为两个子节点的概率之和,继续排序,

             3.重复上一步骤,知道剩下的节点为1

             4.约定左子节点和右子节点 为0 或者1

    其中还有一个类似的编码类似,算术编码,算术编码的过程是要把概率用整数表示。构造完成之后然后用对应的节点替换,节点表示(从根节点到节点)

    解码过程,使用遍历,遇到0然后指向左子节点(按照约定),遇到1(指向右子节点),然后指向下一节点。如果为空,则把该子节点所表示的字符打印。

    然后从根节点开始。

      中间huffman树是可以按位操作的,每个节点表示0或者1 占据大小为1bit,所以在替换环节,用bit oper 或者bit vector来处理。读取的时候也一样

  • 相关阅读:
    洛谷P1199三国游戏
    Cracking the Coding Interview 6.2
    Cracking the Coding Interview 5.2
    Cracking the Coding Interview 5.7
    洗牌算法
    字符串排列组合问题
    指针作为形参
    KMP算法代码
    搜索二叉树
    面试题集锦
  • 原文地址:https://www.cnblogs.com/yang131/p/13857953.html
Copyright © 2011-2022 走看看