zoukankan      html  css  js  c++  java
  • 【redis源码】(八) Intset.c

    intset 实现了一个数字元素的集合。

    使用数组和元素的有序存放实现存取,查找过程使用二分查找法,所有插入删除的的效率为O(log2N)。 与其他数据结构类似,作者使用变编码方式实现对内存的高效利用。 初始化的intset中的数字定义为int16_t,即每个元素占用2个字节,而随着数据的插入,逐渐调整编码方式到int32_t或int64_t

    上代码

    intset.h

     1 #ifndef __INTSET_H
     2 #define __INTSET_H
     3 #include <stdint.h>
     4 
     5 typedef struct intset {
     6     uint32_t encoding;
     7     uint32_t length;
     8     int8_t contents[];
     9 } intset;
    10 
    11 intset *intsetNew(void);
    12 intset *intsetAdd(intset *is, int64_t value, uint8_t *success);
    13 intset *intsetRemove(intset *is, int64_t value, int *success);
    14 uint8_t intsetFind(intset *is, int64_t value);
    15 int64_t intsetRandom(intset *is);
    16 uint8_t intsetGet(intset *is, uint32_t pos, int64_t *value);
    17 uint32_t intsetLen(intset *is);
    18 
    19 #endif // __INTSET_H

    intset.c

      1 #include <stdio.h>
      2 #include <stdlib.h>
      3 #include <string.h>
      4 #include "intset.h"
      5 #include "zmalloc.h"
      6 
      7 /* Note that these encodings are ordered, so:
      8  * INTSET_ENC_INT16 < INTSET_ENC_INT32 < INTSET_ENC_INT64. */
      9 #define INTSET_ENC_INT16 (sizeof(int16_t)) 
     10 #define INTSET_ENC_INT32 (sizeof(int32_t))
     11 #define INTSET_ENC_INT64 (sizeof(int64_t))
     12 
     13 /* Return the required encoding for the provided value. */
     14 //根据v的值判断使用哪儿种int以节省内存
     15 static uint8_t _intsetValueEncoding(int64_t v) {
     16     if (v < INT32_MIN || v > INT32_MAX)
     17         return INTSET_ENC_INT64;
     18     else if (v < INT16_MIN || v > INT16_MAX)
     19         return INTSET_ENC_INT32;
     20     return INTSET_ENC_INT16;
     21 }
     22 
     23 //得到enc编码方式下的第pos个位置的值
     24 /* Return the value at pos, given an encoding. */
     25 static int64_t _intsetGetEncoded(intset *is, int pos, uint8_t enc) {
     26     if (enc == INTSET_ENC_INT64)
     27         return ((int64_t*)is->contents)[pos];
     28     else if (enc == INTSET_ENC_INT32)
     29         return ((int32_t*)is->contents)[pos];
     30     return ((int16_t*)is->contents)[pos];
     31 }
     32 
     33 //得到is中第pos个位置的value
     34 /* Return the value at pos, using the configured encoding. */
     35 static int64_t _intsetGet(intset *is, int pos) {
     36     return _intsetGetEncoded(is,pos,is->encoding);
     37 }
     38 
     39 //在第n个pos个位置存放value
     40 /* Set the value at pos, using the configured encoding. */
     41 static void _intsetSet(intset *is, int pos, int64_t value) {
     42     if (is->encoding == INTSET_ENC_INT64)
     43         ((int64_t*)is->contents)[pos] = value;
     44     else if (is->encoding == INTSET_ENC_INT32)
     45         ((int32_t*)is->contents)[pos] = value;
     46     else
     47         ((int16_t*)is->contents)[pos] = value;
     48 }
     49 
     50 //得到一个新的intset,编码采用INTSET_ENC_INT16
     51 /* Create an empty intset. */
     52 intset *intsetNew(void) {
     53     intset *is = zmalloc(sizeof(intset));
     54     is->encoding = INTSET_ENC_INT16;
     55     is->length = 0;
     56     return is;
     57 }
     58 
     59 /* Resize the intset */
     60 //重新为intset分配内存
     61 static intset *intsetResize(intset *is, uint32_t len) {
     62     uint32_t size = len*is->encoding;
     63     is = zrealloc(is,sizeof(intset)+size);
     64     return is;
     65 }
     66 
     67 
     68 //在排好序的iniset中查找value
     69 //如果找到,返回1,*pos为其所在位置
     70 //如果找不到,返回0,*pos为其能插入的位置
     71 //二分查找法
     72 /* Search for the position of "value". Return 1 when the value was found and
     73  * sets "pos" to the position of the value within the intset. Return 0 when
     74  * the value is not present in the intset and sets "pos" to the position
     75  * where "value" can be inserted. */
     76 static uint8_t intsetSearch(intset *is, int64_t value, uint32_t *pos) {
     77     int min = 0, max = is->length-1, mid = -1;
     78     int64_t cur = -1;
     79 
     80     /* The value can never be found when the set is empty */
     81     if (is->length == 0) {
     82         if (pos) *pos = 0;
     83         return 0;
     84     } else {
     85         /* Check for the case where we know we cannot find the value,
     86          * but do know the insert position. */
     87         //如果插入值大于最后一个元素,或者小于第一个元素,则可以认定无法找到该元素
     88         if (value > _intsetGet(is,is->length-1)) {
     89             if (pos) *pos = is->length;
     90             return 0;
     91         } else if (value < _intsetGet(is,0)) {
     92             if (pos) *pos = 0;
     93             return 0;
     94         }
     95     }
     96 
     97     while(max >= min) {
     98         mid = (min+max)/2;
     99         cur = _intsetGet(is,mid);
    100         if (value > cur) {
    101             min = mid+1;
    102         } else if (value < cur) {
    103             max = mid-1;
    104         } else {
    105             break;
    106         }
    107     }
    108 
    109     if (value == cur) {
    110         if (pos) *pos = mid;
    111         return 1;
    112     } else {
    113         if (pos) *pos = min;
    114         return 0;
    115     }
    116 }
    117 
    118 //升级intset的编码,并且出入一个新数字,因为新数字的绝对值一定大于
    119 //当前intset中所有的数字的绝对值,如果是负数,则最小,放在最前边,否则放在最后边
    120 /* Upgrades the intset to a larger encoding and inserts the given integer. */
    121 static intset *intsetUpgradeAndAdd(intset *is, int64_t value) {
    122     uint8_t curenc = is->encoding;
    123     uint8_t newenc = _intsetValueEncoding(value);
    124     int length = is->length;
    125     int prepend = value < 0 ? 1 : 0;
    126 
    127     /* First set new encoding and resize */
    128     is->encoding = newenc;
    129     is = intsetResize(is,is->length+1);
    130 
    131     /* Upgrade back-to-front so we don't overwrite values.
    132      * Note that the "prepend" variable is used to make sure we have an empty
    133      * space at either the beginning or the end of the intset. */
    134     while(length--)
    135         _intsetSet(is,length+prepend,_intsetGetEncoded(is,length,curenc));
    136 
    137     /* Set the value at the beginning or the end. */
    138     if (prepend)
    139         _intsetSet(is,0,value);
    140     else
    141         _intsetSet(is,is->length,value);
    142     is->length++;
    143     return is;
    144 }
    145 
    146 //把from开始直到最后intset最后的内容move到to开始的地方,在这之前,应该要resize一下
    147 static void intsetMoveTail(intset *is, uint32_t from, uint32_t to) {
    148     void *src, *dst;
    149     uint32_t bytes = is->length-from;
    150     if (is->encoding == INTSET_ENC_INT64) {
    151         src = (int64_t*)is->contents+from;
    152         dst = (int64_t*)is->contents+to;
    153         bytes *= sizeof(int64_t);
    154     } else if (is->encoding == INTSET_ENC_INT32) {
    155         src = (int32_t*)is->contents+from;
    156         dst = (int32_t*)is->contents+to;
    157         bytes *= sizeof(int32_t);
    158     } else {
    159         src = (int16_t*)is->contents+from;
    160         dst = (int16_t*)is->contents+to;
    161         bytes *= sizeof(int16_t);
    162     }
    163     memmove(dst,src,bytes);
    164 }
    165 
    166 //在intset中插入一个元素,如果返回0,表示已经存在,否则返回1
    167 /* Insert an integer in the intset */
    168 intset *intsetAdd(intset *is, int64_t value, uint8_t *success) {
    169     uint8_t valenc = _intsetValueEncoding(value);
    170     uint32_t pos;
    171     if (success) *success = 1;
    172 
    173     /* Upgrade encoding if necessary. If we need to upgrade, we know that
    174      * this value should be either appended (if > 0) or prepended (if < 0),
    175      * because it lies outside the range of existing values. */
    176     if (valenc > is->encoding) {
    177         /* This always succeeds, so we don't need to curry *success. */
    178         return intsetUpgradeAndAdd(is,value);
    179     } else {
    180         /* Abort if the value is already present in the set.
    181          * This call will populate "pos" with the right position to insert
    182          * the value when it cannot be found. */
    183         if (intsetSearch(is,value,&pos)) {
    184             if (success) *success = 0;
    185             return is;
    186         }
    187 
    188         is = intsetResize(is,is->length+1);
    189         if (pos < is->length) intsetMoveTail(is,pos,pos+1);
    190     }
    191 
    192     _intsetSet(is,pos,value);
    193     is->length++;
    194     return is;
    195 }
    196 
    197 //在intset中删除一个元素value
    198 /* Delete integer from intset */
    199 intset *intsetRemove(intset *is, int64_t value, int *success) {
    200     uint8_t valenc = _intsetValueEncoding(value);
    201     uint32_t pos;
    202     if (success) *success = 0;
    203 
    204     if (valenc <= is->encoding && intsetSearch(is,value,&pos)) {
    205         /* We know we can delete */
    206         if (success) *success = 1;
    207 
    208         /* Overwrite value with tail and update length */
    209         if (pos < (is->length-1)) intsetMoveTail(is,pos+1,pos);
    210         is = intsetResize(is,is->length-1);
    211         is->length--;
    212     }
    213     return is;
    214 }
    215 
    216 //判断value是否在is中
    217 /* Determine whether a value belongs to this set */
    218 uint8_t intsetFind(intset *is, int64_t value) {
    219     uint8_t valenc = _intsetValueEncoding(value);
    220     return valenc <= is->encoding && intsetSearch(is,value,NULL);
    221 }
    222 
    223 //随机取一个intset中的元素
    224 /* Return random member */
    225 int64_t intsetRandom(intset *is) {
    226     return _intsetGet(is,rand()%is->length);
    227 }
    228 
    229 //把pos位置上的元素取出,如果超出位置,返回0,找到了则返回1,*value为其值
    230 /* Sets the value to the value at the given position. When this position is
    231  * out of range the function returns 0, when in range it returns 1. */
    232 uint8_t intsetGet(intset *is, uint32_t pos, int64_t *value) {
    233     if (pos < is->length) {
    234         *value = _intsetGet(is,pos);
    235         return 1;
    236     }
    237     return 0;
    238 }
    239 
    240 //得到intset的元素数量
    241 /* Return intset length */
    242 uint32_t intsetLen(intset *is) {
    243     return is->length;
    244 }
    245 
    246 #ifdef INTSET_TEST_MAIN
    247 #include <sys/time.h>
    248 
    249 void intsetRepr(intset *is) {
    250     int i;
    251     for (i = 0; i < is->length; i++) {
    252         printf("%lld\n", (uint64_t)_intsetGet(is,i));
    253     }
    254     printf("\n");
    255 }
    256 
    257 void error(char *err) {
    258     printf("%s\n", err);
    259     exit(1);
    260 }
    261 
    262 void ok(void) {
    263     printf("OK\n");
    264 }
    265 
    266 long long usec(void) {
    267     struct timeval tv;
    268     gettimeofday(&tv,NULL);
    269     return (((long long)tv.tv_sec)*1000000)+tv.tv_usec;
    270 }
    271 
    272 #define assert(_e) ((_e)?(void)0:(_assert(#_e,__FILE__,__LINE__),exit(1)))
    273 void _assert(char *estr, char *file, int line) {
    274     printf("\n\n=== ASSERTION FAILED ===\n");
    275     printf("==> %s:%d '%s' is not true\n",file,line,estr);
    276 }
    277 
    278 intset *createSet(int bits, int size) {
    279     uint64_t mask = (1<<bits)-1;
    280     uint64_t i, value;
    281     intset *is = intsetNew();
    282 
    283     for (i = 0; i < size; i++) {
    284         if (bits > 32) {
    285             value = (rand()*rand()) & mask;
    286         } else {
    287             value = rand() & mask;
    288         }
    289         is = intsetAdd(is,value,NULL);
    290     }
    291     return is;
    292 }
    293 
    294 void checkConsistency(intset *is) {
    295     int i;
    296 
    297     for (i = 0; i < (is->length-1); i++) {
    298         if (is->encoding == INTSET_ENC_INT16) {
    299             int16_t *i16 = (int16_t*)is->contents;
    300             assert(i16[i] < i16[i+1]);
    301         } else if (is->encoding == INTSET_ENC_INT32) {
    302             int32_t *i32 = (int32_t*)is->contents;
    303             assert(i32[i] < i32[i+1]);
    304         } else {
    305             int64_t *i64 = (int64_t*)is->contents;
    306             assert(i64[i] < i64[i+1]);
    307         }
    308     }
    309 }
    310 
    311 int main(int argc, char **argv) {
    312     uint8_t success;
    313     int i;
    314     intset *is;
    315     sranddev();
    316 
    317     printf("Value encodings: "); {
    318         assert(_intsetValueEncoding(-32768) == INTSET_ENC_INT16);
    319         assert(_intsetValueEncoding(+32767) == INTSET_ENC_INT16);
    320         assert(_intsetValueEncoding(-32769) == INTSET_ENC_INT32);
    321         assert(_intsetValueEncoding(+32768) == INTSET_ENC_INT32);
    322         assert(_intsetValueEncoding(-2147483648) == INTSET_ENC_INT32);
    323         assert(_intsetValueEncoding(+2147483647) == INTSET_ENC_INT32);
    324         assert(_intsetValueEncoding(-2147483649) == INTSET_ENC_INT64);
    325         assert(_intsetValueEncoding(+2147483648) == INTSET_ENC_INT64);
    326         assert(_intsetValueEncoding(-9223372036854775808ull) == INTSET_ENC_INT64);
    327         assert(_intsetValueEncoding(+9223372036854775807ull) == INTSET_ENC_INT64);
    328         ok();
    329     }
    330 
    331     printf("Basic adding: "); {
    332         is = intsetNew();
    333         is = intsetAdd(is,5,&success); assert(success);
    334         is = intsetAdd(is,6,&success); assert(success);
    335         is = intsetAdd(is,4,&success); assert(success);
    336         is = intsetAdd(is,4,&success); assert(!success);
    337         ok();
    338     }
    339 
    340     printf("Large number of random adds: "); {
    341         int inserts = 0;
    342         is = intsetNew();
    343         for (i = 0; i < 1024; i++) {
    344             is = intsetAdd(is,rand()%0x800,&success);
    345             if (success) inserts++;
    346         }
    347         assert(is->length == inserts);
    348         checkConsistency(is);
    349         ok();
    350     }
    351 
    352     printf("Upgrade from int16 to int32: "); {
    353         is = intsetNew();
    354         is = intsetAdd(is,32,NULL);
    355         assert(is->encoding == INTSET_ENC_INT16);
    356         is = intsetAdd(is,65535,NULL);
    357         assert(is->encoding == INTSET_ENC_INT32);
    358         assert(intsetFind(is,32));
    359         assert(intsetFind(is,65535));
    360         checkConsistency(is);
    361 
    362         is = intsetNew();
    363         is = intsetAdd(is,32,NULL);
    364         assert(is->encoding == INTSET_ENC_INT16);
    365         is = intsetAdd(is,-65535,NULL);
    366         assert(is->encoding == INTSET_ENC_INT32);
    367         assert(intsetFind(is,32));
    368         assert(intsetFind(is,-65535));
    369         checkConsistency(is);
    370         ok();
    371     }
    372 
    373     printf("Upgrade from int16 to int64: "); {
    374         is = intsetNew();
    375         is = intsetAdd(is,32,NULL);
    376         assert(is->encoding == INTSET_ENC_INT16);
    377         is = intsetAdd(is,4294967295,NULL);
    378         assert(is->encoding == INTSET_ENC_INT64);
    379         assert(intsetFind(is,32));
    380         assert(intsetFind(is,4294967295));
    381         checkConsistency(is);
    382 
    383         is = intsetNew();
    384         is = intsetAdd(is,32,NULL);
    385         assert(is->encoding == INTSET_ENC_INT16);
    386         is = intsetAdd(is,-4294967295,NULL);
    387         assert(is->encoding == INTSET_ENC_INT64);
    388         assert(intsetFind(is,32));
    389         assert(intsetFind(is,-4294967295));
    390         checkConsistency(is);
    391         ok();
    392     }
    393 
    394     printf("Upgrade from int32 to int64: "); {
    395         is = intsetNew();
    396         is = intsetAdd(is,65535,NULL);
    397         assert(is->encoding == INTSET_ENC_INT32);
    398         is = intsetAdd(is,4294967295,NULL);
    399         assert(is->encoding == INTSET_ENC_INT64);
    400         assert(intsetFind(is,65535));
    401         assert(intsetFind(is,4294967295));
    402         checkConsistency(is);
    403 
    404         is = intsetNew();
    405         is = intsetAdd(is,65535,NULL);
    406         assert(is->encoding == INTSET_ENC_INT32);
    407         is = intsetAdd(is,-4294967295,NULL);
    408         assert(is->encoding == INTSET_ENC_INT64);
    409         assert(intsetFind(is,65535));
    410         assert(intsetFind(is,-4294967295));
    411         checkConsistency(is);
    412         ok();
    413     }
    414 
    415     printf("Stress lookups: "); {
    416         long num = 100000, size = 10000;
    417         int i, bits = 20;
    418         long long start;
    419         is = createSet(bits,size);
    420         checkConsistency(is);
    421 
    422         start = usec();
    423         for (i = 0; i < num; i++) intsetSearch(is,rand() % ((1<<bits)-1),NULL);
    424         printf("%ld lookups, %ld element set, %lldusec\n",num,size,usec()-start);
    425     }
    426 
    427     printf("Stress add+delete: "); {
    428         int i, v1, v2;
    429         is = intsetNew();
    430         for (i = 0; i < 0xffff; i++) {
    431             v1 = rand() % 0xfff;
    432             is = intsetAdd(is,v1,NULL);
    433             assert(intsetFind(is,v1));
    434 
    435             v2 = rand() % 0xfff;
    436             is = intsetRemove(is,v2,NULL);
    437             assert(!intsetFind(is,v2));
    438         }
    439         checkConsistency(is);
    440         ok();
    441     }
    442 }
    443 #endif
    喜欢一起简单,实用的东西,拒绝复杂花哨,我不是GEEK.
  • 相关阅读:
    KEIL5.25生成.bin文件步骤
    【转】树莓派网线直连笔记本电脑
    由编译器指定数组长度带来的一个问题
    【转】C/C++位域结构深入解析
    【转】大小端存储模式精解
    【转】树莓派入门之装系统
    【转】树莓派Raspberry Pi
    stm32的双向io口
    小记之while循环条件的操作位置
    【转】浮点数在计算机中存储方式
  • 原文地址:https://www.cnblogs.com/igloo1986/p/2670157.html
Copyright © 2011-2022 走看看