zoukankan      html  css  js  c++  java
  • 数据结构 最优字符串编码 (哈夫曼编码)

    Description

    基于任给一串大写英文字母序列(例如MNOPPPOPMMPOPOPPOPNP),编程实现求解一套二进制编码,使得上述正文的编码最短。

    Input

    有多组输入数据,每组一串字符串,每个字符串长度不超过1000且只包含大写英文字母。

    Output

    每组数据输出两行,第一行输出组数,接下来每行输出一个字母的编码,满足字典序小的字母的编码字典序也尽量小,下一行输出编码后串的长度, 若长度小于50,输出编码后的字符串,格式见样例。

    Sample Input

    ABC

    Sample Output

    Case #1:
    A: 0
    B: 10
    C: 11
    5 01011

    HINT

    考察知识点:哈夫曼树, 时间复杂度O(nlogn),空间复杂度O(n),好多人都过不了,数据已经减少了,大家可以试试。


    Append Code

    析:就是先建树,再遍历树,然后去求每个叶子结点的哈夫曼编码。建树可以用优先队列,优先频率高的和字典序大的,在遍历时,向左就加0,向右就加1,

    到最后结点就存储起来。

    代码如下:

    #pragma comment(linker, "/STACK:1024000000,1024000000")
    #include <cstdio>
    #include <string>
    #include <cstdlib>
    #include <cmath>
    #include <iostream>
    #include <cstring>
    #include <set>
    #include <queue>
    #include <algorithm>
    #include <vector>
    #include <map>
    #include <cctype>
    #include <cmath>
    #include <stack>
    //#include <tr1/unordered_map>
    #define freopenr freopen("in.txt", "r", stdin)
    #define freopenw freopen("out.txt", "w", stdout)
    using namespace std;
    //using namespace std :: tr1;
     
    typedef long long LL;
    typedef pair<int, int> P;
    const int INF = 0x3f3f3f3f;
    const double inf = 0x3f3f3f3f3f3f;
    const LL LNF = 0x3f3f3f3f3f3f;
    const double PI = acos(-1.0);
    const double eps = 1e-8;
    const int maxn = 1000 + 5;
    const LL mod = 10000000000007;
    const int N = 1e6 + 5;
    const int dr[] = {-1, 0, 1, 0, 1, 1, -1, -1};
    const int dc[] = {0, 1, 0, -1, 1, -1, 1, -1};
    const int hr[]= {-2, -2, -1, -1, 1, 1, 2, 2};
    const int hc[]= {-1, 1, -2, 2, -2, 2, -1, 1};
    const char *Hex[] = {"0000", "0001", "0010", "0011", "0100", "0101", "0110", "0111", "1000", "1001", "1010", "1011", "1100", "1101", "1110", "1111"};
    inline LL gcd(LL a, LL b){  return b == 0 ? a : gcd(b, a%b); }
    int n, m;
    const int mon[] = {0, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31};
    const int monn[] = {0, 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31};
    inline int Min(int a, int b){ return a < b ? a : b; }
    inline int Max(int a, int b){ return a > b ? a : b; }
    inline LL Min(LL a, LL b){ return a < b ? a : b; }
    inline LL Max(LL a, LL b){ return a > b ? a : b; }
    inline bool is_in(int r, int c){
        return r >= 0 && r < n && c >= 0 && c < m;
     
    }
    struct Node{
        int id, num, ch;
        Node *lchild, *rchild;
        bool operator < (const Node &p) const{
            return num > p.num || (num == p.num && ch < p.ch);
        }
    };
    Node a[120];
    char s[maxn];
    int num[30];
    int cnt;
    bool ok;
    string str[30];
     
    void solve(){
        for(int i = 0; i < 26; ++i)  if(num[i]){
            a[cnt].ch = i;
            a[cnt].id = cnt;
            a[cnt++].num = num[i];
        }
        priority_queue<Node> pq;
        for(int i = 0; i < cnt; ++i)  pq.push(a[i]);
        if(cnt == 1) ok = true;
        while(pq.size() > 1){
            int id1 = pq.top().id, num1 = pq.top().num, ch1 = pq.top().ch; pq.pop();
            int id2 = pq.top().id, num2 = pq.top().num, ch2 = pq.top().ch; pq.pop();
            if(num1 == num2 && ch1 > ch2){
                a[cnt].ch = ch2;
                a[cnt].num = num1 + num2;
                a[cnt].id = cnt;
                a[cnt].lchild = &a[id2];
                a[cnt].rchild = &a[id1];
            }
            else{
                a[cnt].ch = ch1;
                a[cnt].num = num1 + num2;
                a[cnt].id = cnt;
                a[cnt].lchild = &a[id1];
                a[cnt].rchild = &a[id2];
            }
            pq.push(a[cnt]);
            ++cnt;
        }
    }
     
    void dfs(Node *p, string s){
        if(p->lchild == 0 && p->rchild == 0) str[p->ch] = s;
        if(p->lchild != 0)  dfs(p->lchild, s + "0");
        if(p->rchild != 0) dfs(p->rchild, s + "1");
    }
     
    int main(){
        int kase = 0;
        while(scanf("%s", s) == 1){
            printf("Case #%d:
    ", ++kase);
            for(int i = 0; i < 120; ++i){
                a[i].id = a[i].num = 0;
                a[i].lchild = a[i].rchild = 0;
                if(i < 30) str[i].clear();
            }
            n = strlen(s);
            memset(num, 0, sizeof num);
            for(int i = 0; i < n; ++i){
                ++num[s[i]-'A'];
            }
            cnt = 0;
            ok = false;
            solve();
            if(ok)  str[s[0]-'A'] = "0";
            else  dfs(&a[cnt-1], "");
            for(int i = 0; i < 26; ++i)
                if(str[i] != "")  printf("%c: %s
    ", i+'A', str[i].c_str());
            string ans;
            for(int i = 0; i < n; ++i)
                ans += str[s[i]-'A'];
            printf("%d", ans.size());
            if(ans.size() < 50)  printf(" %s", ans.c_str());
            printf("
    ");
        }
        return 0;
    }
    
  • 相关阅读:
    输入url后的加载过程~
    编写一个方法,求字符串长度~~~
    闭包 什么是闭包 为什么用闭包~~
    作用域链的理解~~
    谈谈javascript的基本规范~~~~
    html中datalist 是什么??????
    elementui中el-input联想搜索框
    js中数组对象去重的方法
    vue视频截图第一帧demo
    styled-components的基本使用
  • 原文地址:https://www.cnblogs.com/dwtfukgv/p/5990690.html
Copyright © 2011-2022 走看看