zoukankan      html  css  js  c++  java
  • c#拆分字符串英文和数字(包括国外所以文字)

     先创建一个类:

     /// <summary>
        /// 字符串分析
        /// </summary>
        interface IStringAna
        {
            /// <summary>
            /// key:文本包含的汉字或英文单词,value:汉字或英文单词出现的次数
            /// </summary>
            /// <param name="text"></param>
            /// <returns>key:文本包含的汉字或英文单词,value:汉字或英文单词出现的次数</returns>
            Dictionary<string, int> Ana(string text);
        }
    
        class StringAna : IStringAna
        {
            public Dictionary<string, int> Ana(string text)
            {
                var words = new List<string>();
                var dict = new Dictionary<string, int>();
                byte[] bts = Encoding.Unicode.GetBytes(text);
                //foreach (var bt in bts)
                //{
                //    Console.Write(string.Format("{0} ",bt));
                //}
                //Console.WriteLine();
                var pointer = 0;
                var i = 0;
                while (i <= bts.Length-2) 
                {                
                    byte[] tmp;
    
                    if (bts[i + 1] == 0 && bts[i] != 32)
                    {
                        pointer = i;
                        while (pointer + 2 < bts.Length && bts[pointer +2] != 32 && bts[pointer + 2+1] == 0)
                        {
                            pointer += 2;
                        }
                        var len = pointer+2 - i;
                        tmp = new byte[len];
                        Array.Copy(bts, i, tmp, 0, len);
                        i = pointer+2;
                    }
                    else if (bts[i] == 32 && bts[i + 1] == 0)
                    {
                        i += 2;
                        continue;
                    }
                    else
                    {
                        tmp = new byte[] { bts[i], bts[i + 1] };
                        i += 2;
                    }
                    var word = Bytes2Word(tmp);
                    words.Add(word);
                    Put(dict, Bytes2Word(tmp));
                }
                return dict;
            }
    
            
            string Bytes2Word(byte[] bytes)
            {
                return Encoding.Unicode.GetString(bytes);
            }
    
            void Put(Dictionary<string,int> dict,string word)
            {
                int value;
                if(dict.TryGetValue(word,out value))
                {
                    dict[word] = value + 1;
                }
                else
                {
                    dict[word] = 1;
                }
               
            }
        }

    实现接口:

         var text = GetTestText();//生成测试文本
                Console.WriteLine(text);
                IStringAna ana = new StringAna();
                var sp = Stopwatch.StartNew();
                var rlt = ana.Ana(text);
                Console.WriteLine($"Elapsed:{sp.ElapsedTicks}");
                if (rlt == null)
                {
                    Console.WriteLine("error");
                }
                else
                {
                    foreach(var item in rlt)
                    {
                        Console.WriteLine($"{item.Key}:{item.Value}");
                    }
  • 相关阅读:
    上一章下一章
    三、Spring-Bean高级装配
    二、Spring装配Bean
    IDEA--IDEA debug断点调试技巧
    springboot基础
    一、初识Spring
    十、理解JavaBean
    # 记一次Tomcat升级——幽灵猫
    CentOS7离线安装ansible的方法
    【转发】
  • 原文地址:https://www.cnblogs.com/zwyAndDong/p/7389533.html
Copyright © 2011-2022 走看看