zoukankan      html  css  js  c++  java
  • 判断重复字符存在:更有意义一点

       1:      class Program
       2:      {
       3:          static void Main(string[] args)
       4:          {
       5:              Program p = new Program();
       6:              List<char> result = p.FindMostFrequentlyUsedChars("hjkluuiuikjk");
       7:   
       8:              Console.ReadKey();
       9:          }
      10:   
      11:          /// <summary>
      12:          /// 判断字符串中是否存在重复字符
      13:          /// 该算法假设的前提条件:所有字符都是ASCII
      14:          /// 时间复杂度O(n),n=s.Length
      15:          /// 空间复杂度为常数O(256)
      16:          /// 时间复杂度已经最低了,空间复杂度还能有更优化的解法嘛?
      17:          /// 参见IsStringWithNoDuplicatedChar_Implemenation2
      18:          /// </summary>
      19:          public bool IsStringWithNoDuplicatedChar_Implemenation1(string s)
      20:          {
      21:              bool[] flags = new bool[256];
      22:              foreach (char c in s)
      23:              {
      24:                  //错误处理:含有非ASCII码情况
      25:                  if (c > 255)
      26:                  {
      27:                      throw new ArgumentException("string contains UNICODE char");
      28:                  }
      29:   
      30:                  if (flags[c])
      31:                  {
      32:                      return false;
      33:                  }
      34:                  flags[c] = true;
      35:              }
      36:              return true;
      37:          }
      38:   
      39:          /// <summary>
      40:          /// bit映射法,该解法对比IsStringWithNoDuplicatedChar_Implemenation1空间使用上更少了
      41:          /// </summary>
      42:          public bool IsStringWithNoDuplicatedChar_Implemenation2(string s)
      43:          {
      44:              //注意是Int32而不是Int,在64位机器上Int=Int64
      45:              Int32[] flags = new Int32[8];
      46:              foreach (char c in s)
      47:              {
      48:                  //依旧需要该错误处理,因为假设前提是不变的,否则该算法无效
      49:                  if (c > 255)
      50:                  {
      51:                      throw new ArgumentException("string contains UNICODE char");
      52:                  }
      53:   
      54:                  int index = c / 32;
      55:                  int relative_position = c % 32;
      56:                  if ((flags[index] & (1 << relative_position)) >0)
      57:                  {
      58:                      return false;
      59:                  }
      60:                  flags[index] |= (1 << relative_position);
      61:              }
      62:              return true;
      63:          }
      64:   
      65:          /// <summary>
      66:          /// 再次增进假设,假设所有字符都是介于a-z的小写字母
      67:          /// 26个字母可用一个32位的int映射
      68:          /// </summary>
      69:          public bool IsStringWithNoDuplicatedChar_Implemenation3(string s)
      70:          {
      71:              Int32 flag = 0;
      72:              foreach (char c in s)
      73:              {
      74:                  if (c <='a' && c>='z')
      75:                  {
      76:                      throw new ArgumentException("string contains char out of a-z");
      77:                  }
      78:   
      79:                  int relative_position = c - 'a';
      80:                  if ((flag & (1 << relative_position)) > 0)
      81:                  {
      82:                      return false;
      83:                  }
      84:                  flag |= (1 << relative_position);
      85:              }
      86:              return true;
      87:          }
      88:   
      89:   
      90:          /// <summary>
      91:          ///以上算法有意义麽?
      92:          ///很明显这些算法都基于一个共同的前提就是字符集的基数是ASCII码,
      93:          ///如果是Unicode,那么无无论是哪一种,都需要更大的空间
      94:          ///如果假设前提还是ASCII,其实也还有更好的思路:
      95:          ///如果字符串长度大于字符集范围,那么一定属于两种情况之一:
      96:          ///要么有重复字符,要麽输入非法
      97:          ///这便是IsStringWithNoDuplicatedChar_Implemenation4
      98:          /// </summary>
      99:          public bool IsStringWithNoDuplicatedChar_Implemenation4(string s)
     100:          {
     101:              if (s.Length > 256)
     102:              {
     103:                  return false;
     104:              }
     105:              else
     106:              {
     107:                  return IsStringWithNoDuplicatedChar_Implemenation2(s);
     108:              }
     109:          }
     110:   
     111:          /// <summary>
     112:          /// 基于输入是ASCII这个前提,
     113:          /// 长度大于256,根本不用判断,肯定有重复
     114:          /// 长度小于256,1-3算法其实对现代计算机来说简直是小菜一碟,不值一提
     115:          /// 所以更有可能的应用场景是找到最常出现的字符,这比判断是否有重复字符看起来更有价值一些
     116:          /// </summary>
     117:          public char? FindMostFrequentlyUsedChar(string s)
     118:          {
     119:              int[] counters = new int[256];
     120:              int most_used_times = 0;
     121:              char? most_used_char=null;
     122:              foreach (char c in s)
     123:              {
     124:                  //错误处理:含有非ASCII码情况
     125:                  if (c > 255)
     126:                  {
     127:                      throw new ArgumentException("string contains UNICODE char");
     128:                  }
     129:   
     130:                  counters[c]++;
     131:   
     132:                  if (counters[c] > most_used_times)
     133:                  {
     134:                      most_used_times = counters[c];
     135:                      most_used_char = c;
     136:                  }
     137:              }
     138:              return most_used_char;
     139:          }
     140:   
     141:          /// <summary>
     142:          /// 处理FindMostFrequentlyUsedChar无法处理并列第一的情况
     143:          /// 时间复杂度依旧是O(n)
     144:          /// 
     145:          /// </summary>
     146:          public List<char> FindMostFrequentlyUsedChars(string s)
     147:          {
     148:              List<char> most_used_chars = new List<char>();
     149:              int[] counters = new int[256];
     150:              int most_used_times = 0;
     151:              foreach (char c in s)
     152:              {
     153:                  //错误处理:含有非ASCII码情况
     154:                  if (c > 255)
     155:                  {
     156:                      throw new ArgumentException("string contains UNICODE char");
     157:                  }
     158:   
     159:                  counters[c]++;
     160:   
     161:                  if (counters[c] > most_used_times)
     162:                  {
     163:                      most_used_chars.Clear();
     164:                      most_used_times = counters[c];
     165:                      most_used_chars.Add(c);
     166:                  }
     167:                  else if (counters[c] == most_used_times)
     168:                  {
     169:                      most_used_chars.Add(c);
     170:                  }
     171:              }
     172:              return most_used_chars;
     173:          }
     174:      }
  • 相关阅读:
    贪吃蛇模块划分
    你今天听见《风声》了吗“ 学习无处不在继潜伏后《风声》观后
    贪吃蛇结束条件分析
    CSS fieldset制作表单,抛弃Table
    两个路由器串联
    .net 几种获取路径方法
    C# 对文件与文件夹的操作 删除、移动与复制
    mvc3在IIS中的部署(转)
    HTTP 错误 500.21 Internal Server Error
    遗留问题
  • 原文地址:https://www.cnblogs.com/dancewithautomation/p/3489646.html
Copyright © 2011-2022 走看看