zoukankan      html  css  js  c++  java
  • redis 5.0.7 源码阅读——整数集合intset

    redis中整数集合intset相关的文件为:intset.h与intset.c

    intset的所有操作与操作一个排序整形数组 int a[N]类似,只是根据类型做了内存上的优化。

    一、数据结构

    1 typedef struct intset {
    2     uint32_t encoding;
    3     uint32_t length;
    4     int8_t contents[];
    5 } intset;

    intset的数据结构比较简单,使用了一个变长结构体,成员length记录当前成员数量,成员encoding记录当前的int类型,共有以下三种:

    1 #define INTSET_ENC_INT16 (sizeof(int16_t))
    2 #define INTSET_ENC_INT32 (sizeof(int32_t))
    3 #define INTSET_ENC_INT64 (sizeof(int64_t))

    并使用以下方法进行判断类型:

    1 static uint8_t _intsetValueEncoding(int64_t v) {
    2     if (v < INT32_MIN || v > INT32_MAX)
    3         return INTSET_ENC_INT64;
    4     else if (v < INT16_MIN || v > INT16_MAX)
    5         return INTSET_ENC_INT32;
    6     else
    7         return INTSET_ENC_INT16;
    8 }

    intset是已排序好的整数集合,其大致结构如下:

    1 /*
    2 +--------+--------+--------...--------------+
    3 |encoding|length  |contents(encoding*length)|
    4 +--------+--------+--------...--------------+
    5 */

    intset严格按照小端字节序进行存储,不论机器的字节序类型。如果是大端机器,需要进行转换,才进行存储。endianconv.h中有如下定义:

     1 #if (BYTE_ORDER == LITTLE_ENDIAN)
     2 #define memrev16ifbe(p) ((void)(0))
     3 #define memrev32ifbe(p) ((void)(0))
     4 #define memrev64ifbe(p) ((void)(0))
     5 #define intrev16ifbe(v) (v)
     6 #define intrev32ifbe(v) (v)
     7 #define intrev64ifbe(v) (v)
     8 #else
     9 #define memrev16ifbe(p) memrev16(p)
    10 #define memrev32ifbe(p) memrev32(p)
    11 #define memrev64ifbe(p) memrev64(p)
    12 #define intrev16ifbe(v) intrev16(v)
    13 #define intrev32ifbe(v) intrev32(v)
    14 #define intrev64ifbe(v) intrev64(v)
    15 #endif

    具体实现在endianconv.c中,此处略过。

    二、创建

    1 intset *intsetNew(void) {
    2     intset *is = zmalloc(sizeof(intset));
    3     is->encoding = intrev32ifbe(INTSET_ENC_INT16);
    4     is->length = 0;
    5     return is;
    6 }

    刚创建好的intset是空的,默认使用最小的类型。其结构为:

    1 /*此处用一根“-”表示一字节,后同
    2 +----+----+
    3 |  16|   0|
    4 +----+----+
    5 */

    三、 操作

    若有以下intset:

    1 /*
    2 +----+----+--+--+--+--+--+--+--+
    3 |  16|   7| 1| 2| 3| 4| 5| 7| 8|
    4 +----+----+--+--+--+--+--+--+--+
    5           |contents
    6 
    7 */

    现在插入一个数字6,需要调用以下方法:

     1 /* Insert an integer in the intset */
     2 intset *intsetAdd(intset *is, int64_t value, uint8_t *success) {
     3     uint8_t valenc = _intsetValueEncoding(value);
     4     uint32_t pos;
     5     if (success) *success = 1;
     6 
     7     /* Upgrade encoding if necessary. If we need to upgrade, we know that
     8      * this value should be either appended (if > 0) or prepended (if < 0),
     9      * because it lies outside the range of existing values. */
    10     if (valenc > intrev32ifbe(is->encoding)) {
    11         /* This always succeeds, so we don't need to curry *success. */
    12         return intsetUpgradeAndAdd(is,value);
    13     } else {
    14         /* Abort if the value is already present in the set.
    15          * This call will populate "pos" with the right position to insert
    16          * the value when it cannot be found. */
    17         if (intsetSearch(is,value,&pos)) {
    18             if (success) *success = 0;
    19             return is;
    20         }
    21 
    22         is = intsetResize(is,intrev32ifbe(is->length)+1);
    23         if (pos < intrev32ifbe(is->length)) intsetMoveTail(is,pos,pos+1);
    24     }
    25 
    26     _intsetSet(is,pos,value);
    27     is->length = intrev32ifbe(intrev32ifbe(is->length)+1);
    28     return is;
    29 }

    因int16_t足以存储数字“6”,所以新插入数字的int类型与intset一致,然后需要查找插入的pos:

     1 static uint8_t intsetSearch(intset *is, int64_t value, uint32_t *pos) {
     2     int min = 0, max = intrev32ifbe(is->length)-1, mid = -1;
     3     int64_t cur = -1;
     4 
     5     /* The value can never be found when the set is empty */
     6     if (intrev32ifbe(is->length) == 0) {
     7         if (pos) *pos = 0;
     8         return 0;
     9     } else {
    10         /* Check for the case where we know we cannot find the value,
    11          * but do know the insert position. */
    12         if (value > _intsetGet(is,max)) {
    13             if (pos) *pos = intrev32ifbe(is->length);
    14             return 0;
    15         } else if (value < _intsetGet(is,0)) {
    16             if (pos) *pos = 0;
    17             return 0;
    18         }
    19     }
    20 
    21     while(max >= min) {
    22         mid = ((unsigned int)min + (unsigned int)max) >> 1;
    23         cur = _intsetGet(is,mid);
    24         if (value > cur) {
    25             min = mid+1;
    26         } else if (value < cur) {
    27             max = mid-1;
    28         } else {
    29             break;
    30         }
    31     }
    32 
    33     if (value == cur) {
    34         if (pos) *pos = mid;
    35         return 1;
    36     } else {
    37         if (pos) *pos = min;
    38         return 0;
    39     }
    40 }

    因intset是已排序好的,所以使用了二分查找。过程如下

     1 /*
     2 find 6
     3         +----+----+--+--+--+--+--+--+--+
     4         |  16|   7| 1| 2| 3| 4| 5| 7| 8|
     5         +----+----+--+--+--+--+--+--+--+
     6 pos               | 0| 1| 2| 3| 4| 5| 6|
     7 step1             |min=0              
     8                                     |max=6
     9                            |mid=(0+6)>>1=3
    10                            |mid_val=4
    11                         
    12 pos               | 0| 1| 2| 3| 4| 5| 6|
    13 step2                         |min=4
    14                                     |max=6
    15                                  |mid=(4+6)>>1=5
    16                                  |mid_val=7
    17 
    18 pos               | 0| 1| 2| 3| 4| 5| 6|
    19 step3                         |min=4
    20                               |max=4
    21                               |mid=(4+4)>>1=5
    22                               |mid_val=5
    23 
    24 pos               | 0| 1| 2| 3| 4| 5| 6|
    25 step4                            |min=5
    26                               |max=4
    27 min>max  break
    28 */

    6在intset中不存在,查找到需要插入到pos=5的位置,此时首先要扩展intset的content:

    1 static intset *intsetResize(intset *is, uint32_t len) {
    2     uint32_t size = len*intrev32ifbe(is->encoding);
    3     is = zrealloc(is,sizeof(intset)+size);
    4     return is;
    5 }

    扩展后:

    1 /*        
    2 +----+----+--+--+--+--+--+--+--+--+
    3 |  16|   7| 1| 2| 3| 4| 5| 7| 8|  |
    4 +----+----+--+--+--+--+--+--+--+--+
    5 pos       | 0| 1| 2| 3| 4| 5| 6| 7|
    6 */

    然后把原来在pos=5及之后的所有的元素向后移一格:

     1 static void intsetMoveTail(intset *is, uint32_t from, uint32_t to) {
     2     void *src, *dst;
     3     uint32_t bytes = intrev32ifbe(is->length)-from;
     4     uint32_t encoding = intrev32ifbe(is->encoding);
     5 
     6     if (encoding == INTSET_ENC_INT64) {
     7         src = (int64_t*)is->contents+from;
     8         dst = (int64_t*)is->contents+to;
     9         bytes *= sizeof(int64_t);
    10     } else if (encoding == INTSET_ENC_INT32) {
    11         src = (int32_t*)is->contents+from;
    12         dst = (int32_t*)is->contents+to;
    13         bytes *= sizeof(int32_t);
    14     } else {
    15         src = (int16_t*)is->contents+from;
    16         dst = (int16_t*)is->contents+to;
    17         bytes *= sizeof(int16_t);
    18     }
    19     memmove(dst,src,bytes);
    20 }

    移动后:

    1 /*        
    2 +----+----+--+--+--+--+--+--+--+--+
    3 |  16|   7| 1| 2| 3| 4| 5| 7| 7| 8|
    4 +----+----+--+--+--+--+--+--+--+--+
    5 pos       | 0| 1| 2| 3| 4| 5| 6| 7|
    6 */

    其使用memmove,并不全修改未覆盖到的内存,所以此时pos=5的值 还是7

    最后修改pos=5的值:

     1 static void _intsetSet(intset *is, int pos, int64_t value) {
     2     uint32_t encoding = intrev32ifbe(is->encoding);
     3 
     4     if (encoding == INTSET_ENC_INT64) {
     5         ((int64_t*)is->contents)[pos] = value;
     6         memrev64ifbe(((int64_t*)is->contents)+pos);
     7     } else if (encoding == INTSET_ENC_INT32) {
     8         ((int32_t*)is->contents)[pos] = value;
     9         memrev32ifbe(((int32_t*)is->contents)+pos);
    10     } else {
    11         ((int16_t*)is->contents)[pos] = value;
    12         memrev16ifbe(((int16_t*)is->contents)+pos);
    13     }
    14 }

    修改后并增加了length:

    1 /*        
    2 +----+----+--+--+--+--+--+--+--+--+
    3 |  16|   8| 1| 2| 3| 4| 5| 6| 7| 8|
    4 +----+----+--+--+--+--+--+--+--+--+
    5 pos       | 0| 1| 2| 3| 4| 5| 6| 7|
    6 */

    如果此时要插入的数字是65536,超出了int16_t所能表示的范围,要先进行扩展int类型操作:

     1 static intset *intsetUpgradeAndAdd(intset *is, int64_t value) {
     2     uint8_t curenc = intrev32ifbe(is->encoding);
     3     uint8_t newenc = _intsetValueEncoding(value);
     4     int length = intrev32ifbe(is->length);
     5     int prepend = value < 0 ? 1 : 0;
     6 
     7     /* First set new encoding and resize */
     8     is->encoding = intrev32ifbe(newenc);
     9     is = intsetResize(is,intrev32ifbe(is->length)+1);
    10 
    11     /* Upgrade back-to-front so we don't overwrite values.
    12      * Note that the "prepend" variable is used to make sure we have an empty
    13      * space at either the beginning or the end of the intset. */
    14     while(length--)
    15         _intsetSet(is,length+prepend,_intsetGetEncoded(is,length,curenc));
    16 
    17     /* Set the value at the beginning or the end. */
    18     if (prepend)
    19         _intsetSet(is,0,value);
    20     else
    21         _intsetSet(is,intrev32ifbe(is->length),value);
    22     is->length = intrev32ifbe(intrev32ifbe(is->length)+1);
    23     return is;
    24 }

    因其超出原来的int类型所能表示的范围,若为正数,一定是最大的,则应该插入在intset最后,否则应该在最前面。扩展完之后,从后往前将原来的数字,以新的int类型,放置在新的位置上,保证不会有未处理的数字被覆盖,处理完整。

    删除操作:

     1 intset *intsetRemove(intset *is, int64_t value, int *success) {
     2     uint8_t valenc = _intsetValueEncoding(value);
     3     uint32_t pos;
     4     if (success) *success = 0;
     5 
     6     if (valenc <= intrev32ifbe(is->encoding) && intsetSearch(is,value,&pos)) {
     7         uint32_t len = intrev32ifbe(is->length);
     8 
     9         /* We know we can delete */
    10         if (success) *success = 1;
    11 
    12         /* Overwrite value with tail and update length */
    13         if (pos < (len-1)) intsetMoveTail(is,pos+1,pos);
    14         is = intsetResize(is,len-1);
    15         is->length = intrev32ifbe(len-1);
    16     }
    17     return is;
    18 }

    找到指定元素之后,直接把后面的内存移至前面,然后resize。

    redis 5.0.7 下载链接

    http://download.redis.io/releases/redis-5.0.7.tar.gz

    源码阅读顺序参考:

    https://github.com/huangz1990/blog/blob/master/diary/2014/how-to-read-redis-source-code.rst

  • 相关阅读:
    K8s环境搭建
    opencv一些重要的函数或者类
    opencv的点的表示
    opencv矩阵的格式输出
    opencv矩阵运算(二)
    opencv矩阵运算(一)
    如何安装指定版本的Kubernetes
    使用minikube快速部署k8s集群
    Ceph 存储集群
    学习tcpIp必备的抓包工具wireshark
  • 原文地址:https://www.cnblogs.com/chinxi/p/12262901.html
Copyright © 2011-2022 走看看