zoukankan      html  css  js  c++  java
  • 字符串中判断存在的几种模式和效率(string.contains、string.IndexOf、Regex.Match)

      通常情况下,我们判断一个字符串中是否存在某值常常会用string.contains,其实判断一个字符串中存在某值的方法有很多种,最常用的就是前述所说的string.contains,相对来说比较常用的还有string.IndexOf和Regex.Match。直接上代码,后面在说些什么吧,通常情况下功能的实现最重要,作者的话,只对有心者有效。


    using System;
    using System.Collections.Generic;
    using System.Linq;
    using System.Text;
    using System.Text.RegularExpressions;
    
    namespace ExistsInString
    {
        class Program
        {
            static void Main(string[] args)
            {
                string str0 = "|456|";
                string str1 = "|444|";
                string str2 = "|111|222|333|444|555|666|777|888|999|000|";
    
                //------------------------------------------
                //String.Contains方法
    
                if (str2.Contains(str0))
                    Console.WriteLine("String.Contains->true");
                else
                    Console.WriteLine("String.Contains->false");
    
                if (str2.Contains(str1))
                    Console.WriteLine("String.Contains->true");
                else
                    Console.WriteLine("String.Contains->false");
    
                //------------------------------------------
                //String.IndexOf方法
                int val1 = str2.IndexOf(str0);//不存在返回-1
                Console.WriteLine("String.IndexOf(no exists)->" + val1);
                int val2 = str2.IndexOf(str1);//存在返回str1首字符所在str2中的位置(>=0)
                Console.WriteLine("String.IndexOf(exists)->" + val2);
    
                //------------------------------------------
                //正则匹配方法
                if (Regex.Match(str2, "[|]456[|]").Success)
                    Console.WriteLine("Regex.Match(no exists)->true");
                else
                    Console.WriteLine("Regex.Match(no exists)->false");
    
                if (Regex.Match(str2, "[|]444[|]").Success)
                    Console.WriteLine("Regex.Match(exists)->true");
                else
                    Console.WriteLine("Regex.Match(exists)->false");
    
                Console.ReadKey();
    
                /*
                 *如果上述三种方式都处理大量数据,效率如何呢?
                 *以下循环六组数据说明 
                 */
    
                int loopCount = (int)10e6;
                DateTime lasttime = DateTime.Now;
                DateTime nowtime = DateTime.Now;
    
                for (int loop = 1; loop < 7; loop++)
                {
                    Console.WriteLine("
    loop " + loop + " >>>>>>>");
    
                    //------------------------------------------
                    //String.Contains方法
    
                    //no exists
                    lasttime = DateTime.Now;
                    for (int i = 0; i < loopCount; i++)
                        if (str2.Contains(str0)) { };
                    nowtime = DateTime.Now;
                    TimeSpan tsStrConNoExists = nowtime - lasttime;
    
                    //exists
                    lasttime = DateTime.Now;
                    for (int i = 0; i < loopCount; i++)
                        if (str2.Contains(str1)) { };
                    nowtime = DateTime.Now;
                    TimeSpan tsStrConExists = nowtime - lasttime;
    
    
                    //------------------------------------------
                    //String.IndexOf方法
    
                    //no exists
                    lasttime = DateTime.Now;
                    for (int i = 0; i < loopCount; i++)
                        if (str2.IndexOf(str0) >= 0) { };//上述已经提到不存在返回-1,存在返回一个非负整数,这里为什么不用 == -1 ,而是用了 >= 0 ,这是一个值得深思的问题?
                    nowtime = DateTime.Now;
                    TimeSpan tsStrIndNoExists = nowtime - lasttime;
    
                    //exists
                    lasttime = DateTime.Now;
                    for (int i = 0; i < loopCount; i++)
                        if (str2.IndexOf(str1) >= 0) { };
                    nowtime = DateTime.Now;
                    TimeSpan tsStrIndExists = nowtime - lasttime;
    
                    //------------------------------------------
                    //Regex.Match方法
    
                    //no exists
                    Regex Reg0 = new Regex("[|]456[|]");
                    lasttime = DateTime.Now;
                    for (int i = 0; i < loopCount; i++)
                        if (Reg0.Match(str2).Success) { };
                    nowtime = DateTime.Now;
                    TimeSpan tsStrRegNoExists = nowtime - lasttime;
    
                    //exists
                    Regex Reg1 = new Regex("[|]444[|]");
                    lasttime = DateTime.Now;
                    for (int i = 0; i < loopCount; i++)
                        if (Reg1.Match(str2).Success) { };
                    nowtime = DateTime.Now;
                    TimeSpan tsStrRegExists = nowtime - lasttime;
    
                    Console.WriteLine("no exists >>>");
                    Console.WriteLine("tsStrConNoExists = " + tsStrConNoExists.Milliseconds);
                    Console.WriteLine("tsStrIndNoExists = " + tsStrIndNoExists.Milliseconds);
                    Console.WriteLine("tsStrRegNoExists = " + tsStrRegNoExists.Milliseconds);
                    Console.WriteLine("exists >>>");
                    Console.WriteLine("tsStrConExists = " + tsStrConExists.Milliseconds);
                    Console.WriteLine("tsStrIndExists = " + tsStrIndExists.Milliseconds);
                    Console.WriteLine("tsStrRegExists = " + tsStrRegExists.Milliseconds);
                }
    
                Console.ReadKey();
            }
        }
    }

    输入结果:

    String.Contains->false
    String.Contains->true
    String.IndexOf(no exists)->-1
    String.IndexOf(exists)->12
    Regex.Match(no exists)->false
    Regex.Match(exists)->true

    loop 1 >>>>>>>
    no exists >>>
    tsStrConNoExists = 796
    tsStrIndNoExists = 687
    tsStrRegNoExists = 171
    exists >>>
    tsStrConExists = 484
    tsStrIndExists = 234
    tsStrRegExists = 796

    loop 2 >>>>>>>
    no exists >>>
    tsStrConNoExists = 46
    tsStrIndNoExists = 671
    tsStrRegNoExists = 234
    exists >>>
    tsStrConExists = 546
    tsStrIndExists = 437
    tsStrRegExists = 734

    loop 3 >>>>>>>
    no exists >>>
    tsStrConNoExists = 62
    tsStrIndNoExists = 875
    tsStrRegNoExists = 171
    exists >>>
    tsStrConExists = 609
    tsStrIndExists = 562
    tsStrRegExists = 781

    loop 4 >>>>>>>
    no exists >>>
    tsStrConNoExists = 78
    tsStrIndNoExists = 921
    tsStrRegNoExists = 218
    exists >>>
    tsStrConExists = 609
    tsStrIndExists = 640
    tsStrRegExists = 828

    loop 5 >>>>>>>
    no exists >>>
    tsStrConNoExists = 156
    tsStrIndNoExists = 268
    tsStrRegNoExists = 265
    exists >>>
    tsStrConExists = 609
    tsStrIndExists = 578
    tsStrRegExists = 890

    loop 6 >>>>>>>
    no exists >>>
    tsStrConNoExists = 109
    tsStrIndNoExists = 46
    tsStrRegNoExists = 546
    exists >>>
    tsStrConExists = 625
    tsStrIndExists = 609
    tsStrRegExists = 953


    测试结果中不难发现,如果strA中不包括strB,使用strA.Contains(strB)更优;反之,如果strA中包括strB,使用strA.IndexOf(strB)更优。(Regex.Match在此方法中貌似没有体现出任何优势,它更适用于模糊匹配)

    具体要使用string.Contains,或是string.IndexOf要看形势。

    之前有看过string下很多方法实现的代码(微软的,非他人),string.Contains是基于string.IndexOf上的一个方法,使用string.Contains的时候,会调用

    string.IndexOf,按原理,使用string.IndexOf的效率是要高于string.Contains的,但是这个测试结果让我大跌眼镜,应该是我在上述代码中使用的判断语句造成的这种非理想的测试结果,按照个人的意愿,还是希望多使用string.IndexOf。


    其实一次微小的改变在当前可能影响不了什么,但是在日积月累中,它的优势就显而易见了。想要快速变得比他人更强,不需要多么费劲,只需要每天多做一点点(千分之一)  

    一年之后:(1 + 0.001)365 =  1.44倍

    十年之后(1 + 0.001)3650 =  38.4倍

  • 相关阅读:
    bzoj1505 [NOI2004]小H的小屋
    最大值
    数学
    OI中的快速傅里叶变换(FFT)
    旅游规划
    加分二叉树
    浅谈 字符串hash
    二分的弟弟“三分”
    Trie树(c++)
    克鲁斯卡尔
  • 原文地址:https://www.cnblogs.com/preacher/p/3931037.html
Copyright © 2011-2022 走看看