zoukankan      html  css  js  c++  java
  • 中英混串转拼音 源码

    /* 本程序只对GB2312编码有效 */
    
    
    /* 汉字读音分界点处的码值数组 */
    static const unsigned short code_pin[] = {
        0xb0a1,0xb0a3,0xb0b0,0xb0b9,0xb0bc,0xb0c5,0xb0d7,0xb0df,0xb0ee,0xb0fa,0xb1ad,0xb1bc,0xb1c0,0xb1c6,
        0xb1de,0xb1ea,0xb1ee,0xb1f2,0xb1f8,0xb2a3,0xb2b8,0xb2c1,0xb2c2,0xb2cd,0xb2d4,0xb2d9,0xb2de,0xb2e3,
        0xb2e5,0xb2f0,0xb2f3,0xb2fd,0xb3ac,0xb3b5,0xb3bb,0xb3c5,0xb3d4,0xb3e4,0xb3e9,0xb3f5,0xb4a7,0xb4a8,
        0xb4af,0xb4b5,0xb4ba,0xb4c1,0xb4c3,0xb4cf,0xb4d5,0xb4d6,0xb4da,0xb4dd,0xb4e5,0xb4e8,0xb4ee,0xb4f4,
        0xb5a2,0xb5b1,0xb5b6,0xb5c2,0xb5c5,0xb5cc,0xb5df,0xb5ef,0xb5f8,0xb6a1,0xb6aa,0xb6ab,0xb6b5,0xb6bc,
        0xb6cb,0xb6d1,0xb6d5,0xb6de,0xb6ea,0xb6f7,0xb6f8,0xb7a2,0xb7aa,0xb7bb,0xb7c6,0xb7d2,0xb7e1,0xb7f0,
        0xb7f1,0xb7f2,0xb8c1,0xb8c3,0xb8c9,0xb8d4,0xb8dd,0xb8e7,0xb8f8,0xb8f9,0xb8fb,0xb9a4,0xb9b3,0xb9bc,
        0xb9ce,0xb9d4,0xb9d7,0xb9e2,0xb9e5,0xb9f5,0xb9f8,0xb9fe,0xbaa1,0xbaa8,0xbabb,0xbabe,0xbac7,0xbad9,
        0xbadb,0xbadf,0xbae4,0xbaed,0xbaf4,0xbba8,0xbbb1,0xbbb6,0xbbc4,0xbbd2,0xbbe7,0xbbed,0xbbf7,0xbcce,
        0xbcdf,0xbda9,0xbdb6,0xbdd2,0xbded,0xbea3,0xbebc,0xbebe,0xbecf,0xbee8,0xbeef,0xbef9,0xbfa6,0xbfaa,
        0xbfaf,0xbfb5,0xbfbc,0xbfc0,0xbfcf,0xbfd3,0xbfd5,0xbfd9,0xbfdd,0xbfe4,0xbfe9,0xbfed,0xbfef,0xbff7,
        0xc0a4,0xc0a8,0xc0ac,0xc0b3,0xc0b6,0xc0c5,0xc0cc,0xc0d5,0xc0d7,0xc0e2,0xc0e5,0xc1a9,0xc1aa,0xc1b8,
        0xc1c3,0xc1d0,0xc1d5,0xc1e1,0xc1ef,0xc1fa,0xc2a5,0xc2ab,0xc2bf,0xc2cd,0xc2d3,0xc2d5,0xc2dc,0xc2e8,
        0xc2f1,0xc2f7,0xc3a2,0xc3a8,0xc3b4,0xc3b5,0xc3c5,0xc3c8,0xc3d0,0xc3de,0xc3e7,0xc3ef,0xc3f1,0xc3f7,
        0xc3fd,0xc3fe,0xc4b1,0xc4b4,0xc4c3,0xc4ca,0xc4cf,0xc4d2,0xc4d3,0xc4d8,0xc4d9,0xc4db,0xc4dc,0xc4dd,
        0xc4e8,0xc4ef,0xc4f1,0xc4f3,0xc4fa,0xc4fb,0xc5a3,0xc5a7,0xc5ab,0xc5ae,0xc5af,0xc5b0,0xc5b2,0xc5b6,
        0xc5b7,0xc5be,0xc5c4,0xc5ca,0xc5d2,0xc5d7,0xc5de,0xc5e7,0xc5e9,0xc5f7,0xc6aa,0xc6ae,0xc6b2,0xc6b4,
        0xc6b9,0xc6c2,0xc6cb,0xc6da,0xc6fe,0xc7a3,0xc7b9,0xc7c1,0xc7d0,0xc7d5,0xc7e0,0xc7ed,0xc7ef,0xc7f7,
        0xc8a6,0xc8b1,0xc8b9,0xc8bb,0xc8bf,0xc8c4,0xc8c7,0xc8c9,0xc8d3,0xc8d5,0xc8d6,0xc8e0,0xc8e3,0xc8ed,
        0xc8ef,0xc8f2,0xc8f4,0xc8f6,0xc8f9,0xc8fd,0xc9a3,0xc9a6,0xc9aa,0xc9ad,0xc9ae,0xc9af,0xc9b8,0xc9ba,
        0xc9ca,0xc9d2,0xc9dd,0xc9e9,0xc9f9,0xcaa6,0xcad5,0xcadf,0xcba2,0xcba4,0xcba8,0xcbaa,0xcbad,0xcbb1,
        0xcbb5,0xcbb9,0xcbc9,0xcbd1,0xcbd4,0xcbe1,0xcbe4,0xcbef,0xcbf2,0xcbfa,0xcca5,0xccae,0xccc0,0xcccd,
        0xccd8,0xccd9,0xccdd,0xccec,0xccf4,0xccf9,0xccfc,0xcda8,0xcdb5,0xcdb9,0xcdc4,0xcdc6,0xcdcc,0xcdcf,
        0xcdda,0xcde1,0xcde3,0xcdf4,0xcdfe,0xcec1,0xcecb,0xcece,0xced7,0xcef4,0xcfb9,0xcfc6,0xcfe0,0xcff4,
        0xd0a8,0xd0bd,0xd0c7,0xd0d6,0xd0dd,0xd0e6,0xd0f9,0xd1a5,0xd1ab,0xd1b9,0xd1c9,0xd1ea,0xd1fb,0xd2ac,
        0xd2bb,0xd2f0,0xd3a2,0xd3b4,0xd3b5,0xd3c4,0xd3d9,0xd4a7,0xd4bb,0xd4c5,0xd4d1,0xd4d4,0xd4db,0xd4df,
        0xd4e2,0xd4f0,0xd4f4,0xd4f5,0xd4f6,0xd4fa,0xd5aa,0xd5b0,0xd5c1,0xd5d0,0xd5da,0xd5e4,0xd5f4,0xd6a5,
        0xd6d0,0xd6db,0xd6e9,0xd7a5,0xd7a7,0xd7a8,0xd7ae,0xd7b5,0xd7bb,0xd7bd,0xd7c8,0xd7d7,0xd7de,0xd7e2,
        0xd7ea,0xd7ec,0xd7f0,0xd7f2 };
    
    /* 汉字读音数组 */
    static const char *str_pin[] = {
        "a","ai","an","ang","ao","ba","bai","ban","bang","bao","bei","ben","beng","bi","bian","biao",
        "bie","bin","bing","bo","bu","ca","cai","can","cang","cao","ce","ceng","cha","chai","chan",
        "chang","chao","che","chen","cheng","chi","chong","chou","chu","chuai","chuan","chuang","chui",
        "chun","chuo","ci","cong","cou","cu","cuan","cui","cun","cuo","da","dai","dan","dang","dao",
        "de","deng","di","dian","diao","die","ding","diu","dong","dou","du","duan","dui","dun","duo",
        "e","en","er","fa","fan","fang","fei","fen","feng","fo","fou","fu","ga","gai","gan","gang",
        "gao","ge","gei","gen","geng","gong","gou","gu","gua","guai","guan","guang","gui","gun",
        "guo","ha","hai","han","hang","hao","he","hei","hen","heng","hong","hou","hu","hua","huai",
        "huan","huang","hui","hun","huo","ji","jia","jian","jiang","jiao","jie","jin","jing",
        "jiong","jiu","ju","juan","jue","jun","ka","kai","kan","kang","kao","ke","ken","keng",
        "kong","kou","ku","kua","kuai","kuan","kuang","kui","kun","kuo","la","lai","lan","lang",
        "lao","le","lei","leng","li","lia","lian","liang","liao","lie","lin","ling","liu","long","lou",
        "lu","lv","luan","lue","lun","luo","ma","mai","man","mang","mao","me","mei","men","meng",
        "mi","mian","miao","mie","min","ming","miu","mo","mou","mu","na","nai","nan","nang","nao","ne",
        "nei","nen","neng","ni","nian","niang","niao","nie","nin","ning","niu","nong","nu","nv","nuan",
        "nue","nuo","o","ou","pa","pai","pan","pang","pao","pei","pen","peng","pi","pian","piao","pie",
        "pin","ping","po","pu","qi","qia","qian","qiang","qiao","qie","qin","qing","qiong","qiu","qu",
        "quan","que","qun","ran","rang","rao","re","ren","reng","ri","rong","rou","ru","ruan","rui",
        "run","ruo","sa","sai","san","sang","sao","se","sen","seng","sha","shai","shan","shang","shao",
        "she","shen","sheng","shi","shou","shu","shua","shuai","shuan","shuang","shui","shun","shuo",
        "si","song","sou","su","suan","sui","sun","suo","ta","tai","tan","tang","tao","te","teng",
        "ti","tian","tiao","tie","ting","tong","tou","tu","tuan","tui","tun","tuo","wa","wai","wan",
        "wang","wei","wen","weng","wo","wu","xi","xia","xian","xiang","xiao","xie","xin","xing",
        "xiong","xiu","xu","xuan","xue","xun","ya","yan","yang","yao","ye","yi","yin","ying","yo",
        "yong","you","yu","yuan","yue","yun","za","zai","zan","zang","zao","ze","zei","zen","zeng",
        "zha","zhai","zhan","zhang","zhao","zhe","zhen","zheng","zhi","zhong","zhou","zhu","zhua",
        "zhuai","zhuan","zhuang","zhui","zhun","zhuo","zi","zong","zou","zu","zuan","zui","zun","zuo"};
    
    /* 数组长度 */
    static const size_t SIZE_ARRAY = sizeof(code_pin) / sizeof(short);
    
    
    #include <iostream>
    #include <string>
    
    using std::string;
    using std::cout;
    using std::endl;
    
    /* 获取一个汉字的拼音 */
    const char* get_pin(unsigned short char_zh)
    {
        size_t low = 0, high = SIZE_ARRAY - 1;
        size_t index;
        while (high - low != 1)/* 类似,但不是折半查找 */
        {
            index = (low + high) / 2;
            if (char_zh == code_pin[index]) return str_pin[index];
            if (char_zh > code_pin[index]) low = index;
            else high = index;
        }
        return str_pin[char_zh >= code_pin[high] ? high : low];   
    }
    
    /* 将中英混合字符串转化成拼音形式 */
    string str_to_pin(string const &input)
    {
        string result ;// 返回值
        unsigned short char_zh;// 迭代汉字
        const size_t INPUT_LEN = input.length();
        unsigned char high_byte, low_byte;// 汉字的高低字节
        for (size_t i = 0; i < INPUT_LEN; ++i)
        {
            high_byte = input[i];
            if (high_byte < 0x80) result.append(1, high_byte);
            else
            {
                low_byte = input[++i];
                char_zh = (high_byte << 8 ) + low_byte;
                result.append(get_pin(char_zh));
            }
        }
        return result;
    }
    
    /* 将中英混合字符串转化成拼音形式,每个拼音后跟一个空格 */
    string str_to_pin_space(string const &input)
    {
        string result;// 返回值
        unsigned short char_zh;// 迭代汉字
        const size_t INPUT_LEN = input.length();
        unsigned char high_byte, low_byte;// 汉字的高低字节
        for (size_t i = 0; i < INPUT_LEN; ++i)
        {
            high_byte = input[i];
            if(high_byte < 0x80)
            {
                if (i > 0 && input[i-1] < 0) result.append(1, ' ');
                result.append(1, high_byte);
            }
            else
            {
                if(i > 0) result.append(1, ' ');
                low_byte = input[++i];
                char_zh = (high_byte << 8 ) + low_byte ;
                result.append(get_pin(char_zh));
            }
        }
        return result;
    }
    
    /* 
       将中英及数字混合串转化成拼音,且每一连续部分之间放置一个空格。
       输入必须满足只有上述三种字符的情况,正确性需要由调用端保证。 
    */
    string str_to_pin_space_(string const &input)
    {
        string result(1, ' ') ;// 返回值,以一个空格初始化
        unsigned short char_zh;// 迭代汉字
        const size_t INPUT_LEN = input.length();
        unsigned char high_byte, low_byte;// 汉字的高低字节
        for (size_t i = 0; i < INPUT_LEN; ++i)
        {
            high_byte = input[i] ;
            if (high_byte < 58)// 数字字符
            {
                if (i > 0 && (input[i-1] < 0 || input[i-1] > 57))
                {
                    result.append(1, ' ');
                }
                result.append(1, high_byte);
            }
            else if (high_byte > 64 && high_byte < 123)// 英文字符
            {
                if (i > 0 && (input[i-1] < 0 || input[i-1] < 58))
                {
                    result.append(1, ' ');
                }
                result.append(1, high_byte);
            }
            else// 汉字字符
            {
                if(i > 0) result.append(1, ' ');
                low_byte = input[++i];// 低字节被赋值
                char_zh = (high_byte << 8 ) + low_byte ;
                result.append(get_pin(char_zh));
            }
        }
        return result;
    }
    
    /* 测试代码 */
    int main()
    {
        cout << str_to_pin_space_("zhonghua人民dfd啊a152左边35gdaf共和国") << endl;
        return 0;
    }

  • 相关阅读:
    在日本被禁止的コンプガチャ設計
    Starling常见问题解决办法
    Flixel引擎学习笔记
    SQLSERVER中修复状态为Suspect的数据库
    T4 (Text Template Transformation Toolkit)实现简单实体代码生成
    创建Linking Server in SQL SERVER 2008
    Linq to Sql 与Linq to Entities 生成的SQL Script与分页实现
    Linq to Entity 的T4 模板生成代码
    在VisualStudio2008 SP1中调试.net framework 源代码
    使用HttpModules实现Asp.net离线应用程序
  • 原文地址:https://www.cnblogs.com/mtcnn/p/9410065.html
Copyright © 2011-2022 走看看