zoukankan      html  css  js  c++  java
  • 关于strlen

    strlen的实现是通过4个字节4个字节进行枚举,然后通过位运算来判断这4个字节中是否有一个字节含有0,这样的话,效率就提高了4倍。

    这个效率提高是假设a&b&c&d与a&b有差不多效率的前提下。

    那用8字节8字节来偏移的话,是不是更快呢?32位机上不会,64位机上会提高一倍。因为a&b在64位下会提高一倍,因为32位的寄存器大小是32位的,对于分别MOV高位与低位两次。

    本来实验a&b&c&d与a&b的速度的,经实验验证,这两个效率确实是差不多的,然后去看汇编,看指令条数,在没有使用-O优化下,指令的条数差别跟运算符号的个数的倍数相同,就让我感到疑惑了。

    下面附上实验的代码:

    #include <iostream>
    #include <time.h>
    #include <cstdio>
    #include <string>
    using namespace std;
    
    int _strlen(const char *str) {
        const unsigned int *p = (const unsigned int *) str;
        unsigned int low = 0x01010101;
        unsigned int high = 0x80808080;
        while (true) {
            unsigned int d = *p++;
            if (((d - low) & ~d & high) != 0) { // handle [0...256)
            //if (((d - low) & high) != 0) { // handle [0...128)
                break;
            }
        }
        const char *q = (const char *)(p - 1);
        for (int i = 0; i < (int)sizeof(unsigned int); i++) {
            if (q[i] == 0) {
                return q - str + i;
            }
        }
        return -1;
    }
    
    int _strlen2(const char *str) {
        const char *p = str;
        while (*p != 0) {
            p++;
        }
        return p - str;
    }
    
    int _strlen3(const char *str) {
        const unsigned long long *p = (const unsigned long long *) str;
        unsigned long long low = 0x0101010101010101;
        unsigned long long high = 0x8080808080808080;
        while (true) {
            unsigned long long d = *p++;
            if (((d - low) & ~d & high) != 0) { // handle [0...256)
            //if (((d - low) & high) != 0) { // handle [0...128)
                break;
            }
        }
        const char *q = (const char *)(p - 1);
        for (int i = 0; i < (int)sizeof(unsigned long long); i++) {
            if (q[i] == 0) {
                return q - str + i;
            }
        }
        return -1;
    }
    
    size_t _strlen4(const char *str) 
    {
      const char *char_ptr;
      const unsigned long int *longword_ptr;
      unsigned long int longword, himagic, lomagic;
    
      /* Handle the first few characters by reading one character at a time.
         Do this until CHAR_PTR is aligned on a longword boundary.  */
      for (char_ptr = str; ((unsigned long int) char_ptr
                & (sizeof (longword) - 1)) != 0;
           ++char_ptr)
        if (*char_ptr == '')
          return char_ptr - str;
    
      /* All these elucidatory comments refer to 4-byte longwords,
         but the theory applies equally well to 8-byte longwords.  */
    
      longword_ptr = (unsigned long int *) char_ptr;
    
      /* Bits 31, 24, 16, and 8 of this number are zero.  Call these bits
         the "holes."  Note that there is a hole just to the left of
         each byte, with an extra at the end:
    
         bits:  01111110 11111110 11111110 11111111
         bytes: AAAAAAAA BBBBBBBB CCCCCCCC DDDDDDDD
    
         The 1-bits make sure that carries propagate to the next 0-bit.
         The 0-bits provide holes for carries to fall into.  */
      himagic = 0x80808080L;
      lomagic = 0x01010101L;
      if (sizeof (longword) > 4)
        {
          /* 64-bit version of the magic.  */
          /* Do the shift in two steps to avoid a warning if long has 32 bits.  */
          himagic = ((himagic << 16) << 16) | himagic;
          lomagic = ((lomagic << 16) << 16) | lomagic;
        }
        /*j
      if (sizeof (longword) > 8)
        abort ();
        */
    
      /* Instead of the traditional loop which tests each character,
         we will test a longword at a time.  The tricky part is testing
         if *any of the four* bytes in the longword in question are zero.  */
      for (;;)
        {
          longword = *longword_ptr++;
    
          if (((longword - lomagic) & ~longword & himagic) != 0)
        {
          /* Which of the bytes was the zero?  If none of them were, it was
             a misfire; continue the search.  */
    
          const char *cp = (const char *) (longword_ptr - 1);
    
          if (cp[0] == 0)
            return cp - str;
          if (cp[1] == 0)
            return cp - str + 1;
          if (cp[2] == 0)
            return cp - str + 2;
          if (cp[3] == 0)
            return cp - str + 3;
          if (sizeof (longword) > 4)
            {
              if (cp[4] == 0)
            return cp - str + 4;
              if (cp[5] == 0)
            return cp - str + 5;
              if (cp[6] == 0)
            return cp - str + 6;
              if (cp[7] == 0)
            return cp - str + 7;
            }
        }
        }
    }
    
    string gen_data() {
        string a;
        for (int i = 0; i < 100000; i++) {
            a.push_back('a');
        }
        return a;
    }
    
    double get_run_time(int(*fp)(const char *), const char *str, int count) {
        clock_t start = clock();
        for (int i = 0; i < count; i++) {
            fp(str);
        }
        clock_t end = clock();
        return (double)(end - start) / CLOCKS_PER_SEC;
    }
    
    double get_run_time(size_t(*fp)(const char *), const char *str, int count) {
        clock_t start = clock();
        for (int i = 0; i < count; i++) {
            fp(str);
        }
        clock_t end = clock();
        return (double)(end - start) / CLOCKS_PER_SEC;
    }
    
    int main() {
        string a = gen_data(); 
        printf("%d
    ", _strlen(a.c_str()));
        printf("%d
    ", _strlen2(a.c_str()));
        printf("%d
    ", _strlen3(a.c_str()));
        printf("%d
    ", (int)strlen(a.c_str()));
        double time = get_run_time(&_strlen, a.c_str(), 10000);
        printf("%f
    ", time);
        double time2 = get_run_time(&_strlen2, a.c_str(), 10000);
        printf("%f
    ", time2);
        double time3 = get_run_time(&_strlen3, a.c_str(), 10000);
        printf("%f
    ", time3);
        double time4 = get_run_time(&strlen, a.c_str(), 10000);
        printf("%f
    ", time4);
        double time5 = get_run_time(&_strlen4, a.c_str(), 10000);
        printf("%f
    ", time5);
    }
  • 相关阅读:
    Container With Most Water(LintCode)
    Single Number III(LintCode)
    Single Number II(LintCode)
    Spiral Matrix(LintCode)
    Continuous Subarray Sum II(LintCode)
    kubernetes外部访问的几种方式
    kubernetes 数据持久化
    kubernetes deployment
    kubernetes service访问原理
    kubernetes namespace
  • 原文地址:https://www.cnblogs.com/litstrong/p/3329194.html
Copyright © 2011-2022 走看看