zoukankan      html  css  js  c++  java
  • 实践中更高效、实现起来相对简单的基于末尾坏字符原则的BM算法实现

    之前网上看的若干算法,无非两个原则:坏字符原则、好后缀原则。按照算法所述实现了一个版本,但发现其效率还不如本文所述的实现方式。个人分析效率较低的原因可能是因为不断地向前找坏字符或者好后缀来确定跳跃距离导致的,不断的比对操作应该是影响效率的根源。

    下面贴一段实现较简单的方法,感谢之前的领导磊哥,实现参照了他的代码。

    PS:大概看了下ClamAV的BM实现,感觉很复杂。

     1 #define BM_TAB_LEN  (256)
     2 
     3 uint64_t *InitBMTab(const uint8_t *In_ui8Pattern, uint64_t In_ui64PattLen)
     4 {
     5     uint64_t    *pui64RetVal    = NULL;
     6 
     7     if (In_ui8Pattern == NULL || In_ui64PattLen == 0)
     8     {
     9         goto fun_ret;
    10     }
    11 
    12     pui64RetVal = (uint64_t *)malloc(sizeof(uint64_t) * BM_TAB_LEN);
    13     if (pui64RetVal == NULL)
    14     {
    15         goto fun_ret;
    16     }
    17 
    18     for (uint16_t i = 0; i < BM_TAB_LEN; i ++)
    19     {
    20         pui64RetVal[i] = In_ui64PattLen;
    21     }
    22 
    23     for (uint64_t i = 0; i < In_ui64PattLen; i ++)
    24     {
    25         pui64RetVal[In_ui8Pattern[i]] = In_ui64PattLen - i - 1;
    26     }
    27 
    28 fun_ret:
    29     return pui64RetVal;
    30 }
    31 
    32 int8_t ReBuildBMTab(uint64_t *Out_pui64BMJmpTab, const uint8_t *In_ui8Pattern, uint64_t In_ui64PattLen)
    33 {
    34     int8_t  i8RetVal    = 0;
    35 
    36     if (Out_pui64BMJmpTab == NULL || In_ui8Pattern == NULL || In_ui64PattLen == 0)
    37     {
    38         i8RetVal = -1;
    39         goto fun_ret;
    40     }
    41 
    42     for (uint16_t i = 0; i < BM_TAB_LEN; i ++)
    43     {
    44         Out_pui64BMJmpTab[i] = In_ui64PattLen;
    45     }
    46 
    47     for (uint64_t i = 0; i < In_ui64PattLen; i ++)
    48     {
    49         Out_pui64BMJmpTab[In_ui8Pattern[i]] = In_ui64PattLen - i - 1;
    50     }
    51 
    52 fun_ret:
    53     return i8RetVal;
    54 }
    55 
    56 void ReleaseBMTab(uint64_t *Out_pui64BMJmpTab)
    57 {
    58     if (Out_pui64BMJmpTab != NULL)
    59     {
    60         free(Out_pui64BMJmpTab);
    61     }
    62 }
    63 
    64 uint64_t BMSearch(const uint64_t *In_pui64BMJmpTab, const uint8_t *In_pui8Pattern, uint64_t In_ui64PattLen,
    65     const uint8_t *In_pui8Buf, uint64_t In_ui64BufLen)
    66 {
    67     uint64_t    ui64RetVal  = -1;
    68     uint64_t    ui64EndIdx  = 0;
    69 
    70     if (In_pui64BMJmpTab == NULL || In_pui8Pattern == NULL
    71         || In_ui64PattLen == 0 || In_pui8Buf == NULL || In_ui64BufLen == 0
    72         || In_ui64BufLen < In_ui64PattLen)
    73     {
    74         goto fun_ret;
    75     }
    76 
    77     ui64EndIdx = In_ui64PattLen - 1;
    78     do 
    79     {
    80         if (In_pui64BMJmpTab[In_pui8Buf[ui64EndIdx]] != 0)
    81         {
    82             ui64EndIdx += In_pui64BMJmpTab[In_pui8Buf[ui64EndIdx]];
    83             continue;
    84         }
    85         if (memcmp(In_pui8Pattern, In_pui8Buf + ui64EndIdx - In_ui64PattLen + 1, In_ui64PattLen) == 0)
    86         {
    87             ui64RetVal = ui64EndIdx - In_ui64PattLen + 1;
    88             goto fun_ret;
    89         }
    90         ui64EndIdx ++;
    91     } while (ui64EndIdx < In_ui64BufLen);
    92 
    93 fun_ret:
    94     return ui64RetVal;
    95 }
  • 相关阅读:
    面试题汇总--1
    行内元素与块级元素
    前端面试题整理---JS基础
    springmvc处理一个请求的全流程
    反射+动态代理+类加载+JVM
    json注解及序列化
    加密
    Thread setUncaughtExceptionHandler
    RunTime.getRunTime().addShutdownHook 添加钩子
    MySQL日志
  • 原文地址:https://www.cnblogs.com/codeape/p/3270882.html
Copyright © 2011-2022 走看看