zoukankan      html  css  js  c++  java
  • 移除重复字符的几个算法简单比较

       1:      class Program
       2:      {
       3:          static void Main(string[] args)
       4:          {
       5:              string s = File.ReadAllText(@"e:	est.txt");
       6:              Program p = new Program();
       7:              string r = p.RemoveDuplicatedChar_1(s);
       8:              r = p.RemoveDuplicatedChar_2(s);
       9:              r = p.RemoveDuplicatedChar_6(s);
      10:          }
      11:   
      12:          /// <summary>
      13:          /// 既然是C#,首先应该想到的是利用类库现有的实现
      14:          /// HashSet是理智的选择,要注意这是3.5才有的类
      15:          /// </summary>
      16:          public string RemoveDuplicatedChar_1(string s)
      17:          {
      18:              if (string.IsNullOrEmpty(s) || s.Length < 2)
      19:              {
      20:                  return s;
      21:              }
      22:   
      23:              HashSet<char> hs = new HashSet<char>(s.ToCharArray());
      24:              return new string(hs.ToArray());
      25:          }
      26:   
      27:          /// <summary>
      28:          /// Distinct函数也可以做到
      29:          /// </summary>
      30:          public string RemoveDuplicatedChar_2(string s)
      31:          {
      32:              if (string.IsNullOrEmpty(s) || s.Length < 2)
      33:              {
      34:                  return s;
      35:              }
      36:        
      37:              return new string(s.Distinct().ToArray());
      38:          }
      39:   
      40:          //Distinct的实现使用到了链表和哈希,感兴趣的可以参考一下实现
      41:          /****
      42:                  [__DynamicallyInvokable]
      43:                  public static IEnumerable<TSource> Distinct<TSource>(this IEnumerable<TSource> source)
      44:                  {
      45:                      if (source == null)
      46:                      {
      47:                          throw Error.ArgumentNull("source");
      48:                      }
      49:                      return DistinctIterator<TSource>(source, null);
      50:                  }
      51:          
      52:                  private static IEnumerable<TSource> DistinctIterator<TSource>(IEnumerable<TSource> source, IEqualityComparer<TSource> comparer)
      53:                  {
      54:                      Set<TSource> iteratorVariable0 = new Set<TSource>(comparer);
      55:                      foreach (TSource iteratorVariable1 in source)
      56:                      {
      57:                          if (iteratorVariable0.Add(iteratorVariable1))
      58:                          {
      59:                              yield return iteratorVariable1;
      60:                          }
      61:                      }
      62:                  }
      63:           
      64:                  public bool Add(TElement value)
      65:                  {
      66:                      return !this.Find(value, true);
      67:                  }
      68:  
      69:                  private bool Find(TElement value, bool add)
      70:                  {
      71:                      int hashCode = this.InternalGetHashCode(value);
      72:                      for (int i = this.buckets[hashCode % this.buckets.Length] - 1; i >= 0; i = this.slots[i].next)
      73:                      {
      74:                          if ((this.slots[i].hashCode == hashCode) && this.comparer.Equals(this.slots[i].value, value))
      75:                          {
      76:                              return true;
      77:                          }
      78:                      }
      79:                      if (add)
      80:                      {
      81:                          int freeList;
      82:                          if (this.freeList >= 0)
      83:                          {
      84:                              freeList = this.freeList;
      85:                              this.freeList = this.slots[freeList].next;
      86:                          }
      87:                          else
      88:                          {
      89:                              if (this.count == this.slots.Length)
      90:                              {
      91:                                  this.Resize();
      92:                              }
      93:                              freeList = this.count;
      94:                              this.count++;
      95:                          }
      96:                          int index = hashCode % this.buckets.Length;
      97:                          this.slots[freeList].hashCode = hashCode;
      98:                          this.slots[freeList].value = value;
      99:                          this.slots[freeList].next = this.buckets[index] - 1;
     100:                          this.buckets[index] = freeList + 1;
     101:                      }
     102:                      return false;
     103:                  }
     104:           ****/
     105:   
     106:          /// <summary>
     107:          /// 别忘了我们是在做题!所以假设来了
     108:          /// 第一个假设:字符存在范围'a'-'z'
     109:          /// 于是26个字母可以用32位的整型值来影射
     110:          /// </summary>
     111:          public string RemoveDuplicatedChar_3(string s)
     112:          {
     113:              if (string.IsNullOrEmpty(s) || s.Length < 2)
     114:              {
     115:                  return s;
     116:              }
     117:   
     118:              char[] charArray = s.ToCharArray();
     119:              Int32 flags = 0;
     120:              int newIndex = 0;
     121:              for (int i = 0; i < charArray.Length; i++)
     122:              {
     123:                  if (charArray[i] < 'a' || charArray[i] > 'z')
     124:                  {
     125:                      throw new ArgumentException("char should be in range(a-z)");
     126:                  }
     127:   
     128:                  int relative_position = (charArray[i]-'a') % 32;
     129:                  if ((flags & (1 << relative_position))==0)
     130:                  {
     131:                      charArray[newIndex] = charArray[i];
     132:                      newIndex++;
     133:                      flags |= (1 << relative_position);
     134:                  }
     135:              }
     136:   
     137:              return new string(charArray, 0, newIndex);
     138:          }
     139:   
     140:          /// <summary>
     141:          /// 256个ASCII码可以用8个32位整型值映射
     142:          /// </summary>
     143:          public string RemoveDuplicatedChar_4(string s)
     144:          {
     145:              if (string.IsNullOrEmpty(s) || s.Length < 2)
     146:              {
     147:                  return s;
     148:              }
     149:   
     150:              char[] charArray = s.ToCharArray();
     151:              Int32[] flags = new Int32[8];
     152:              int newIndex = 0;
     153:   
     154:              for (int i = 0; i < charArray.Length; i++)
     155:              {
     156:                  if (charArray[i]>255)
     157:                  {
     158:                      throw new ArgumentException("char should be in ASCII");
     159:                  }
     160:   
     161:                  int index = charArray[i] / 32;
     162:                  int relative_position = charArray[i] % 32;
     163:                  if ((flags[index] & (1 << relative_position)) == 0)
     164:                  {
     165:                      charArray[newIndex]=charArray[i];
     166:                      newIndex++;
     167:                      flags[index] |= (1 << relative_position);
     168:                  }
     169:              }
     170:   
     171:              return new string(charArray, 0, newIndex);
     172:          }
     173:   
     174:          /// <summary>
     175:          /// 似曾相识
     176:          /// </summary>
     177:          public string RemoveDuplicatedChar_5(string s)
     178:          {
     179:              if (string.IsNullOrEmpty(s) || s.Length < 2)
     180:              {
     181:                  return s;
     182:              }
     183:   
     184:              char[] charArray = s.ToCharArray();
     185:              bool[] flags = new bool[256];
     186:              int newIndex = 0;
     187:   
     188:              for (int i = 0; i < charArray.Length; i++)
     189:              {
     190:                  if (charArray[i] > 255)
     191:                  {
     192:                      throw new ArgumentException("char should be in ASCII");
     193:                  }
     194:   
     195:                  char c = charArray[i];
     196:                  if (!flags[c])
     197:                  {
     198:                      charArray[newIndex] = charArray[i];
     199:                      newIndex++;
     200:                      flags[c] = true; 
     201:                  }
     202:              }
     203:   
     204:              return new string(charArray, 0, newIndex);
     205:          }
     206:   
     207:          /// <summary>
     208:          /// O(n平方)的实现,没有借助额外的buffer数组或高级数据结构
     209:          /// 不用考虑是否为ASCII,在实际使用中是唯一有通用意义的算法,
     210:          /// 但是至于在处理大字符串时其时间效率应该要远低于使用第一个和第二个方法
     211:          /// 稍后给出运行时间比较
     212:          /// </summary>
     213:          public string RemoveDuplicatedChar_6(string s)
     214:          {
     215:              if (string.IsNullOrEmpty(s) || s.Length < 2)
     216:              {
     217:                  return s;
     218:              }
     219:   
     220:              char[] charArray = s.ToCharArray();
     221:              int newIndex = 1;
     222:   
     223:              for (int i = 1; i < charArray.Length; i++)
     224:              {
     225:                  int j = 0;
     226:                  for (j = 0; j < newIndex; j++)
     227:                  {
     228:                      if (charArray[i] == charArray[j])
     229:                      {
     230:                          break;
     231:                      }
     232:                  }
     233:   
     234:                  //no duplcaited char found in existed read sub-string
     235:                  //then set the new char as the new index
     236:                  if (j == newIndex)
     237:                  {
     238:                      charArray[newIndex] = charArray[i];
     239:                      newIndex++;
     240:                  }
     241:              }
     242:   
     243:              return new string(charArray, 0, newIndex);
     244:          }
     245:      }

    在处理百万级别的文本时,1,2,6的运行时间比较:

    image

    /****
    对于纯ASCII的大字符串,给出了5个方法的时间比较如下图
    可以看出4,5在时间效率上超过了1和2
    所以在处理混合型字符串时,是否应该考虑综合应用这些算法呢?
    答案是明显的
    ****/

    (A1]6ZQ_W67HUTS@4S`OEOK

  • 相关阅读:
    Go中的结构实现它的的写法注意事项
    Go语言中的struct的初始化。
    python中的number类型
    python的类型
    今天起,每天记录python等语言的编程心得和体会
    destoon 下apache伪静态排除目录规则
    如何进行数据库设计?
    Spring IOC知识点
    SpringBoot框架:集成Security完成认证鉴权
    CentOS 7 本地安装kubernetes
  • 原文地址:https://www.cnblogs.com/dancewithautomation/p/3493772.html
Copyright © 2011-2022 走看看