zoukankan      html  css  js  c++  java
  • 将文字拆成拼音得到首字母(返回多音字)

    using System;
    using System.Collections.Generic;
    using System.Linq;
    using System.Text;
    using System.Text.RegularExpressions;
    
    namespace PinYin
    {
        class Pinyin
        {
            /// <summary> 
            /// 在指定的字符串列表CnStr中检索符合拼音索引字符串 
            /// </summary> 
            /// <param name="CnStr">汉字字符串</param> 
            /// <returns>相对应的汉语拼音首字母串</returns> 
            public static string[] GetSpellCode(string CnStr)
            {           
                CnStr = Regex.Replace(CnStr, "\s", "");
    
                string ReturnStr = ResolvePinyinString(CnStr);
    
                string[] strArray = ReturnStr.Split(",;".ToCharArray());
    
                return strArray;
            }
    
            private static string ResolvePinyinString(string HanZiStr)
            //获取汉字字符串的拼音首字母,含多音字
            {
                int i, j, k, m;
                string tmpStr;
                string returnStr = "";  //返回最终结果的字符串
                string[] tmpArr;
                for (i = 0; i < HanZiStr.Length; i++)
                {   //处理汉字字符串,对每个汉字的首字母进行一次循环
                    tmpStr = GetCharSpellCode((char)HanZiStr[i]);   //获取第i个汉字的拼音首字母,可能为1个或多个
                    if (tmpStr.Length > 0)
                    {   //汉字的拼音首字母存在的情况才进行操作
                        if (returnStr != "")
                        {   //不是第一个汉字
                            Regex regex = new Regex(",");
                            tmpArr = regex.Split(returnStr);
                            returnStr = "";
                            for (k = 0; k < tmpArr.Length; k++)
                            {
                                for (j = 0; j < tmpStr.Length; j++)    //对返回的每个首字母进行拼接
                                {
                                    string charcode = tmpStr[j].ToString(); //取出第j个拼音字母
                                    returnStr += tmpArr[k] + charcode + ",";
                                }
                            }
                            if (returnStr != "")
                                returnStr = returnStr.Substring(0, returnStr.Length - 1);
                        }
                        else
                        {   //构造第一个汉字返回结果
                            for (m = 0; m < tmpStr.Length - 1; m++)
                                returnStr += tmpStr[m] + ",";
                            returnStr += tmpStr[tmpStr.Length - 1];
                        }
                    }
                }
                return returnStr;   //返回处理结果字符串,以,分隔每个拼音组合
            }
    
    
            /// <summary> 
            /// 获取单个汉字对应的拼音首字符字符串,
            /// </summary> 
            /// <param name="CnChar">单个汉字</param> 
            /// <returns>单个大写字母</returns> 
            private static string GetCharSpellCode(char HanZi)      
            {
                //此处收录了375个多音字
                string MultiPinyin = "19969:DZ,19975:WM,19988:QJ,20048:YL,20056:SC,20060:NM,20094:QG,20127:QJ,20167:QC,20193:YG,20250:KH,20256:ZC,20282:SC,20285:QJG,20291:TD,20314:YD,20340:NE,20375:TD,20389:YJ,20391:CZ,20415:PB,20446:YS,20447:SQ,20504:TC,20608:KG,20854:QJ,20857:ZC,20911:PF,20504:TC,20608:KG,20854:QJ,20857:ZC,20911:PF,20985:AW,21032:PB,21048:XQ,21049:SC,21089:YS,21119:JC,21242:SB,21273:SC,21305:YP,21306:QO,21330:ZC,21333:SDC,21345:QK,21378:CA,21397:SC,21414:XS,21442:SC,21477:JG,21480:TD,21484:ZS,21494:YX,21505:YX,21512:HG,21523:XH,21537:PB,21542:PF,21549:KH,21571:E,21574:DA,21588:TD,21589:O,21618:ZC,21621:KHA,21632:ZJ,21654:KG,21679:LKG,21683:KH,21710:A,21719:YH,21734:WOE,21769:A,21780:WN,21804:XH,21834:A,21899:ZD,21903:RN,21908:WO,21939:ZC,21956:SA,21964:YA,21970:TD,22003:A,22031:JG,22040:XS,22060:ZC,22066:ZC,22079:MH,22129:XJ,22179:XA,22237:NJ,22244:TD,22280:JQ,22300:YH,22313:XW,22331:YQ,22343:YJ,22351:PH,22395:DC,22412:TD,22484:PB,22500:PB,22534:ZD,22549:DH,22561:PB,22612:TD,22771:KQ,22831:HB,22841:JG,22855:QJ,22865:XQ,23013:ML,23081:WM,23487:SX,23558:QJ,23561:YW,23586:YW,23614:YW,23615:SN,23631:PB,23646:ZS,23663:ZT,23673:YG,23762:TD,23769:ZS,23780:QJ,23884:QK,24055:XH,24113:DC,24162:ZC,24191:GA,24273:QJ,24324:NL,24377:TD,24378:QJ,24439:PF,24554:ZS,24683:TD,24694:WE,24733:LK,24925:TN,25094:ZG,25100:XQ,25103:XH,25153:PB,25170:PB,25179:KG,25203:PB,25240:ZS,25282:FB,25303:NA,25324:KG,25341:ZY,25373:WZ,25375:XJ,25384:A,25457:A,25528:SD,25530:SC,25552:TD,25774:ZC,25874:ZC,26044:YW,26080:WM,26292:PB,26333:PB,26355:ZY,26366:CZ,26397:ZC,26399:QJ,26415:ZS,26451:SB,26526:ZC,26552:JG,26561:TD,26588:JG,26597:CZ,26629:ZS,26638:YL,26646:XQ,26653:KG,26657:XJ,26727:HG,26894:ZC,26937:ZS,26946:ZC,26999:KJ,27099:KJ,27449:YQ,27481:XS,27542:ZS,27663:ZS,27748:TS,27784:SC,27788:ZD,27795:TD,27812:O,27850:PB,27852:MB,27895:SL,27898:PL,27973:QJ,27981:KH,27986:HX,27994:XJ,28044:YC,28065:WG,28177:SM,28267:QJ,28291:KH,28337:ZQ,28463:TL,28548:DC,28601:TD,28689:PB,28805:JG,28820:QG,28846:PB,28952:TD,28975:ZC,29100:A,29325:QJ,29575:SL,29602:FB,30010:TD,30044:CX,30058:PF,30091:YSP,30111:YN,30229:XJ,30427:SC,30465:SX,30631:YQ,30655:QJ,30684:QJG,30707:SD,30729:XH,30796:LG,30917:PB,31074:NM,31085:JZ,31109:SC,31181:ZC,31192:MLB,31293:JQ,31400:YX,31584:YJ,31896:ZN,31909:ZY,31995:XJ,32321:PF,32327:ZY,32418:HG,32420:XQ,32421:HG,32438:LG,32473:GJ,32488:TD,32521:QJ,32527:PB,32562:ZSQ,32564:JZ,32735:ZD,32793:PB,33071:PF,33098:XL,33100:YA,33152:PB,33261:CX,33324:BP,33333:TD,33406:YA,33426:WM,33432:PB,33445:JG,33486:ZN,33493:TS,33507:QJ,33540:QJ,33544:ZC,33564:XQ,33617:YT,33632:QJ,33636:XH,33637:YX,33694:WG,33705:PF,33728:YW,33882:SR,34067:WM,34074:YW,34121:QJ,34255:ZC,34259:XL,34425:JH,34430:XH,34485:KH,34503:YS,34532:HG,34552:XS,34558:YE,34593:ZL,34660:YQ,34892:XH,34928:SC,34999:QJ,35048:PB,35059:SC,35098:ZC,35203:TQ,35265:JX,35299:JX,35782:SZ,35828:YS,35830:E,35843:TD,35895:YG,35977:MH,36158:JG,36228:QJ,36426:XQ,36466:DC,36710:JC,36711:ZYG,36767:PB,36866:SK,36951:YW,37034:YX,37063:XH,37218:ZC,37325:ZC,38063:PB,38079:TD,38085:QY,38107:DC,38116:TD,38123:YD,38224:HG,38241:XTC,38271:ZC,38415:YE,38426:KH,38461:YD,38463:AE,38466:PB,38477:XJ,38518:YT,38551:WK,38585:ZC,38704:XS,38739:LJ,38761:GJ,38808:SQ,39048:JG,39049:XJ,39052:HG,39076:CZ,39271:XT,39534:TD,39552:TD,39584:PB,39647:SB,39730:LG,39748:TPB,40109:ZQ,40479:ND,40516:HG,40536:HG,40583:QJ,40765:YQ,40784:QJ,40840:YK,40863:QJG,";
                string resStr = "";
                int i, j, uni;
                uni = (UInt16)HanZi;
                if (uni > 40869 || uni < 19968)
                    return resStr;
                //返回该字符在Unicode字符集中的编码值
                i = MultiPinyin.IndexOf(uni.ToString());
                //检查是否是多音字,是按多音字处理,不是就直接在strChineseFirstPY字符串中找对应的首字母
                if (i < 0)
                //获取非多音字汉字首字母
                {
                    resStr = GetSingleCharSpellCode(HanZi.ToString());
    
                }
                else
                {   //获取多音字汉字首字母
                    j = MultiPinyin.IndexOf(",", i);
                    resStr = MultiPinyin.Substring(i + 6, j - i - 6);
                }
                return resStr;
            }
    
    
            /// <summary> 
            /// 得到一个非多音字汉字的拼音第一个字母,如果是一个英文字母则直接返回大写字母 
            /// </summary> 
            /// <param name="CnChar">单个汉字</param> 
            /// <returns>单个大写字母</returns> 
            private static string GetSingleCharSpellCode(string CnChar)
            {
                long iCnChar;
    
                byte[] ZW = System.Text.Encoding.Default.GetBytes(CnChar);
    
                //如果是字母,则直接返回 
                if (ZW.Length == 1)
                {
                    return CnChar.ToUpper();
                }
                else
                {
                    // get the array of byte from the single char 
                    int i1 = (short)(ZW[0]);
                    int i2 = (short)(ZW[1]);
                    iCnChar = i1 * 256 + i2;
                }
    
                //expresstion 
                //table of the constant list 
                // 'A'; //45217..45252 
                // 'B'; //45253..45760 
                // 'C'; //45761..46317 
                // 'D'; //46318..46825 
                // 'E'; //46826..47009 
                // 'F'; //47010..47296 
                // 'G'; //47297..47613 
    
                // 'H'; //47614..48118 
                // 'J'; //48119..49061 
                // 'K'; //49062..49323 
                // 'L'; //49324..49895 
                // 'M'; //49896..50370 
                // 'N'; //50371..50613 
                // 'O'; //50614..50621 
                // 'P'; //50622..50905 
                // 'Q'; //50906..51386 
    
                // 'R'; //51387..51445 
                // 'S'; //51446..52217 
                // 'T'; //52218..52697 
                //没有U,V 
                // 'W'; //52698..52979 
                // 'X'; //52980..53640 
                // 'Y'; //53689..54480 
                // 'Z'; //54481..55289 
    
                // iCnChar match the constant 
                if ((iCnChar >= 45217) && (iCnChar <= 45252))
                {
                    return "A";
                }
                else if ((iCnChar >= 45253) && (iCnChar <= 45760))
                {
                    return "B";
                }
                else if ((iCnChar >= 45761) && (iCnChar <= 46317))
                {
                    return "C";
                }
                else if ((iCnChar >= 46318) && (iCnChar <= 46825))
                {
                    return "D";
                }
                else if ((iCnChar >= 46826) && (iCnChar <= 47009))
                {
                    return "E";
                }
                else if ((iCnChar >= 47010) && (iCnChar <= 47296))
                {
                    return "F";
                }
                else if ((iCnChar >= 47297) && (iCnChar <= 47613))
                {
                    return "G";
                }
                else if ((iCnChar >= 47614) && (iCnChar <= 48118))
                {
                    return "H";
                }
                else if ((iCnChar >= 48119) && (iCnChar <= 49061))
                {
                    return "J";
                }
                else if ((iCnChar >= 49062) && (iCnChar <= 49323))
                {
                    return "K";
                }
                else if ((iCnChar >= 49324) && (iCnChar <= 49895))
                {
                    return "L";
                }
                else if ((iCnChar >= 49896) && (iCnChar <= 50370))
                {
                    return "M";
                }
    
                else if ((iCnChar >= 50371) && (iCnChar <= 50613))
                {
                    return "N";
                }
                else if ((iCnChar >= 50614) && (iCnChar <= 50621))
                {
                    return "O";
                }
                else if ((iCnChar >= 50622) && (iCnChar <= 50905))
                {
                    return "P";
                }
                else if ((iCnChar >= 50906) && (iCnChar <= 51386))
                {
                    return "Q";
                }
                else if ((iCnChar >= 51387) && (iCnChar <= 51445))
                {
                    return "R";
                }
                else if ((iCnChar >= 51446) && (iCnChar <= 52217))
                {
                    return "S";
                }
                else if ((iCnChar >= 52218) && (iCnChar <= 52697))
                {
                    return "T";
                }
                else if ((iCnChar >= 52698) && (iCnChar <= 52979))
                {
                    return "W";
                }
                else if ((iCnChar >= 52980) && (iCnChar <= 53640))
                {
                    return "X";
                }
                else if ((iCnChar >= 53689) && (iCnChar <= 54480))
                {
                    return "Y";
                }
                else if ((iCnChar >= 54481) && (iCnChar <= 55289))
                {
                    return "Z";
                }
                else return ("?");
            }
        }
    }
  • 相关阅读:
    WDM驱动加载方式理解
    应用程序与设备对象交换数据的三种方法
    IRP完成例程返回值理解
    关于IoCallDriver使用的疑惑
    Ring0打开其他设备对象三种方式整理
    DPC和ISR的理解
    Windows驱动开发技术详解HelloWDM例子win7下无法安装
    wdk中ramdisk代码解读
    内核编程键盘过滤几种方法思路整理
    IOAPIC重定位中断处理函数思路整理
  • 原文地址:https://www.cnblogs.com/jijm123/p/12663590.html
Copyright © 2011-2022 走看看