zoukankan      html  css  js  c++  java
  • 实践中更高效、实现起来相对简单的基于末尾坏字符原则的BM算法实现

    之前网上看的若干算法,无非两个原则:坏字符原则、好后缀原则。按照算法所述实现了一个版本,但发现其效率还不如本文所述的实现方式。个人分析效率较低的原因可能是因为不断地向前找坏字符或者好后缀来确定跳跃距离导致的,不断的比对操作应该是影响效率的根源。

    下面贴一段实现较简单的方法,感谢之前的领导磊哥,实现参照了他的代码。

    PS:大概看了下ClamAV的BM实现,感觉很复杂。

     1 #define BM_TAB_LEN  (256)
     2 
     3 uint64_t *InitBMTab(const uint8_t *In_ui8Pattern, uint64_t In_ui64PattLen)
     4 {
     5     uint64_t    *pui64RetVal    = NULL;
     6 
     7     if (In_ui8Pattern == NULL || In_ui64PattLen == 0)
     8     {
     9         goto fun_ret;
    10     }
    11 
    12     pui64RetVal = (uint64_t *)malloc(sizeof(uint64_t) * BM_TAB_LEN);
    13     if (pui64RetVal == NULL)
    14     {
    15         goto fun_ret;
    16     }
    17 
    18     for (uint16_t i = 0; i < BM_TAB_LEN; i ++)
    19     {
    20         pui64RetVal[i] = In_ui64PattLen;
    21     }
    22 
    23     for (uint64_t i = 0; i < In_ui64PattLen; i ++)
    24     {
    25         pui64RetVal[In_ui8Pattern[i]] = In_ui64PattLen - i - 1;
    26     }
    27 
    28 fun_ret:
    29     return pui64RetVal;
    30 }
    31 
    32 int8_t ReBuildBMTab(uint64_t *Out_pui64BMJmpTab, const uint8_t *In_ui8Pattern, uint64_t In_ui64PattLen)
    33 {
    34     int8_t  i8RetVal    = 0;
    35 
    36     if (Out_pui64BMJmpTab == NULL || In_ui8Pattern == NULL || In_ui64PattLen == 0)
    37     {
    38         i8RetVal = -1;
    39         goto fun_ret;
    40     }
    41 
    42     for (uint16_t i = 0; i < BM_TAB_LEN; i ++)
    43     {
    44         Out_pui64BMJmpTab[i] = In_ui64PattLen;
    45     }
    46 
    47     for (uint64_t i = 0; i < In_ui64PattLen; i ++)
    48     {
    49         Out_pui64BMJmpTab[In_ui8Pattern[i]] = In_ui64PattLen - i - 1;
    50     }
    51 
    52 fun_ret:
    53     return i8RetVal;
    54 }
    55 
    56 void ReleaseBMTab(uint64_t *Out_pui64BMJmpTab)
    57 {
    58     if (Out_pui64BMJmpTab != NULL)
    59     {
    60         free(Out_pui64BMJmpTab);
    61     }
    62 }
    63 
    64 uint64_t BMSearch(const uint64_t *In_pui64BMJmpTab, const uint8_t *In_pui8Pattern, uint64_t In_ui64PattLen,
    65     const uint8_t *In_pui8Buf, uint64_t In_ui64BufLen)
    66 {
    67     uint64_t    ui64RetVal  = -1;
    68     uint64_t    ui64EndIdx  = 0;
    69 
    70     if (In_pui64BMJmpTab == NULL || In_pui8Pattern == NULL
    71         || In_ui64PattLen == 0 || In_pui8Buf == NULL || In_ui64BufLen == 0
    72         || In_ui64BufLen < In_ui64PattLen)
    73     {
    74         goto fun_ret;
    75     }
    76 
    77     ui64EndIdx = In_ui64PattLen - 1;
    78     do 
    79     {
    80         if (In_pui64BMJmpTab[In_pui8Buf[ui64EndIdx]] != 0)
    81         {
    82             ui64EndIdx += In_pui64BMJmpTab[In_pui8Buf[ui64EndIdx]];
    83             continue;
    84         }
    85         if (memcmp(In_pui8Pattern, In_pui8Buf + ui64EndIdx - In_ui64PattLen + 1, In_ui64PattLen) == 0)
    86         {
    87             ui64RetVal = ui64EndIdx - In_ui64PattLen + 1;
    88             goto fun_ret;
    89         }
    90         ui64EndIdx ++;
    91     } while (ui64EndIdx < In_ui64BufLen);
    92 
    93 fun_ret:
    94     return ui64RetVal;
    95 }
  • 相关阅读:
    rabbitmq
    mysql
    redis
    vue整理
    crawlspider和中间件
    日志等级与请求传参
    Scrapy框架初级篇
    验证码操作
    图片懒加载、selenium&phantomjs
    《信息安全系统设计基础》 第二周学习总结
  • 原文地址:https://www.cnblogs.com/codeape/p/3270882.html
Copyright © 2011-2022 走看看