zoukankan      html  css  js  c++  java
  • 基于哈希表实现页面置换算法

    首先,在看这篇文章前,你需要了解缓存是干嘛的?

    缓存

        众所周知,程序运行时,数据一般存在内存或磁盘里,而内存中的数据总是可以被更快速的获取。但是内存空间是有限的,大多数人PC的内存可能在4G~16G之间,这意味着你必须要舍弃一部分不频繁被访问的数据,把它们存在磁盘里;而把经常需要被访问的数据存在内存里,这就是缓存的基本思路。

        但对于程序(和你)而言,无法预测哪些数据是被经常访问的,所以你只能根据访问数据的历史来推测和统计哪些数据是被经常访问的,并把它存在内存里,如果内存满了就找个最不被经常访问的数据替换掉。这个统计过程和改写内存的策略通常被称为“页面置换算法”,事实上,所有可实现的缓存策略(页面置换算法)都是基于历史访问信息来实现的。科学家经过几十年的努力,发明了许许多多的页面置换算法,比如:FIFO、LFU、LRU、ARC、MQ、GD、GDSF....,它们各有所长,没有孰优孰劣。

        缺页数(率):为了评判页面置换算法的优劣,针对访问数据的次数n,和数据未被缓存命中的次数(缺页数)p,缺页率=p/n。显然,缺页率越小,页面置换算法越优秀。

     

    正事

        本文基于哈希表的内存管理结构,简单地实现线程安全的缓存算法(LFU, LRU, ARC, MQ, GD, GDSF):

      首先看API:(cache.h)

     1 #pragma once
     2 
     3 #include <sys/time.h>
     4 #ifdef __cplusplus
     5 extern "C" {
     6 #endif
     7 
     8 typedef struct _cache *cache_t, _cache;
     9 typedef struct _cache_ele cache_pair;
    10 typedef struct _cache_ret { /// 读缓存返回结果
    11     long cost;        /// 代价
    12     const char*cache; /// 数据
    13 }cache_ret;
    14 /**
    15  * @API
    16  * create, delete, search, read
    17  */
    18 cache_t   new_cache        (unsigned capacity, cache_ret(*model)(cache_t, const char*)); /// 新建
    19 void      del_cache        (cache_t cache);                                              /// 删除
    20 unsigned  cache_page_fault (cache_t cache);                                              /// 缺页数
    21 cache_ret read_cache       (cache_t cache, const char*filename);                         /// 读缓存
    22 
    23 /**
    24  * @Cache_Algorithm_Model
    25  * cache_ret(*)(cache_t, const char*)
    26  */
    27 cache_ret LRU (cache_t cache, const char*request); /// 页面替换算法模型
    28 cache_ret LFU (cache_t cache, const char*request);
    29 cache_ret ARC (cache_t cache, const char*request);
    30 cache_ret MQ  (cache_t cache, const char*request);
    31 cache_ret GD  (cache_t cache, const char*request);
    32 cache_ret GDSF(cache_t cache, const char*request);
    33 
    34 #ifdef __cplusplus
    35 }
    36 #endif

        数据结构:

        缓存:  

     1 struct _cache_ele {                   /// 数据单元
     2     char *key, *file_cache;           /// 键值、数据 
     3     long cost;                        /// 代价(长度)
     4     struct timeval pre_t;             /// 上次访问时间
     5     unsigned int cnt;                 /// 访问次数
     6     struct _cache_ele *nxt, *pre;
     7 };
     8 
     9 struct _cache {
    10     cache_pair table[HASHTABELSIZE], *first_table;      /// 哈希表,first_table根据需要生成
    11     cache_ret (*f)(cache_t, const char *);              /// 页面置换策略
    12     pthread_mutex_t mutex;                              /// 线程安全的锁
    13     unsigned int capacity, _cur, first_cur, page_fault; /// 容量、当前量、ft当前量、缺页数
    14 };/// *cache_t

         缓存策略实现:

         缓存策略实际上是一个选择问题,如果缓存没有满,那么显然可以直接把新请求的数据直接读到缓存中,如果满了,则按照策略选一个数据替换掉。

         (cache.c完整代码)

      1 #include "cache.h"
      2 #include <zconf.h>
      3 #include "stdlib.h"
      4 #include <sys/mman.h>
      5 #include "pthread.h"
      6 #include "string.h"
      7 #include <sys/time.h>
      8 #include <fcntl.h>
      9 
     10 #define RMALLOC(type,n) (type*)malloc(sizeof(type)*(n))
     11 #define MALLOC(p,type,n) type*p = RMALLOC(type, n)
     12 #define MAX_BUFFER_LEN 1024ll * 1024
     13 #ifndef HASHTABLESZIE
     14 #define HASHTABELSIZE 10005
     15 #endif
     16 
     17 unsigned int string_hash(const char *str) {
     18     unsigned int hash = 0;
     19     int i;
     20     for (i = 0; *str; i++) {
     21         if ((i & 1) == 0)hash ^= ((hash << 7) ^ (*str++) ^ (hash >> 3));
     22         else hash ^= (~((hash << 11) ^ (*str++) ^ (hash >> 5)));
     23     }
     24     return (hash & 0x7FFFFFFF);
     25 }
     26 
     27 struct _cache_ele {
     28     char *key, *file_cache;
     29     long cost;
     30     struct timeval pre_t;
     31     unsigned int cnt;
     32     struct _cache_ele *nxt, *pre;
     33 };
     34 
     35 cache_pair*new_cache_pair(){
     36     MALLOC(res, cache_pair, 1);
     37     res->key = res->file_cache = NULL;
     38     res->cnt = res->cost = 0;
     39     res->nxt = res->pre = NULL;
     40     return res;
     41 }
     42 
     43 void del_cache_pair(cache_pair *del) {
     44     free(del->key);
     45     free(del->file_cache);
     46     free(del);
     47 }
     48 
     49 struct _cache {
     50     cache_pair table[HASHTABELSIZE], *first_table;      /// hash table
     51     cache_ret (*f)(cache_t, const char *);              /// function pointer
     52     pthread_mutex_t mutex;
     53     unsigned int capacity, _cur, first_cur, page_fault;
     54 };/// *cache_t
     55 
     56 cache_t new_cache(unsigned capacity, cache_ret(*model)(cache_t, const char *)) {
     57     if (model) {
     58         MALLOC(ret, _cache, 1);
     59         pthread_mutex_init(&ret->mutex, NULL);
     60         ret->capacity = capacity;
     61         ret->page_fault = ret->first_cur = ret->_cur = 0;
     62         memset(ret->table, 0, sizeof(cache_pair) * HASHTABELSIZE);
     63         if (model == ARC)ret->first_table = RMALLOC(cache_pair, HASHTABELSIZE);
     64         else if(model == MQ)ret->first_table = RMALLOC(cache_pair, HASHTABELSIZE * 3);
     65         else ret->first_table = NULL;
     66         ret->f = model;
     67         return ret;
     68     }
     69     return NULL;
     70 }
     71 
     72 cache_ret read_cache(cache_t cache, const char *filename) {
     73     pthread_mutex_lock(&cache->mutex);
     74     cache_ret res = cache->f(cache, filename);
     75     pthread_mutex_unlock(&cache->mutex);
     76     return res;
     77 }
     78 
     79 unsigned cache_page_fault(cache_t cache){
     80     return cache->page_fault;
     81 }
     82 
     83 void del_cache(cache_t cache) {
     84     pthread_mutex_destroy(&cache->mutex);
     85     for (int i = 0; i < HASHTABELSIZE; ++i) {
     86         cache_pair *p = cache->table[i].nxt;
     87         while (p) {
     88             cache_pair *tmp = p;
     89             p = p->nxt;
     90             del_cache_pair(tmp);
     91         }
     92     }
     93     if (cache->first_table) {
     94         for (int i = 0; i < HASHTABELSIZE; ++i) {
     95             cache_pair *p = cache->first_table[i].nxt;
     96             while (p) {
     97                 cache_pair *tmp = p;
     98                 p = p->nxt;
     99                 del_cache_pair(tmp);
    100             }
    101         }
    102         free(cache->first_table);
    103     }
    104     free(cache);
    105 }
    106 
    107 cache_pair *is_in_table(cache_pair *table, const char *request, int *ret) {
    108     unsigned int index = string_hash(request) % HASHTABELSIZE;
    109     cache_pair *src = table + index;
    110     if (!src->nxt) {
    111         *ret = 0;
    112         return src;
    113     }
    114     src = src->nxt;
    115     while (strcmp(src->key, request)) {
    116         cache_pair *pre = src;
    117         src = src->nxt;
    118         if (!src) { /// not in table: return pre node
    119             *ret = 0;
    120             return pre;
    121         }
    122     }
    123     *ret = 1;
    124     return src;
    125 }
    126 
    127 void replace_after_src(cache_pair *src, const char *request) {
    128     src = src->nxt;
    129     src->cnt = 1;
    130     gettimeofday(&src->pre_t, NULL);
    131     src->key = src->key ? (char *) realloc(src->key, strlen(request) + 1) : RMALLOC(char, strlen(request) + 1);
    132     strcpy(src->key, request);
    133     int fd = open(request, O_RDONLY);
    134     if (fd > 0) {
    135         char *fp = mmap(NULL, MAX_BUFFER_LEN, PROT_READ, MAP_SHARED, fd, 0);
    136         src->cost = strlen(fp) + 1;
    137         src->file_cache = src->file_cache ? (char *) realloc(src->file_cache, src->cost) : RMALLOC(char, src->cost);
    138         strcpy(src->file_cache, fp);
    139         munmap(fp, MAX_BUFFER_LEN);
    140         close(fd);
    141     } else {
    142         src->cost = -1;
    143         if (src->file_cache)free(src->file_cache);
    144         src->file_cache = NULL;
    145     }
    146 }
    147 
    148 void add_after_src(cache_pair *src, const char *request) {
    149     src->nxt = new_cache_pair();
    150     src->nxt->pre = src;
    151     replace_after_src(src, request);
    152 }
    153 
    154 void replace_copy(cache_pair *src, cache_pair *aim) {
    155     src = src->nxt;
    156     src->cnt = aim->cnt;
    157     gettimeofday(&src->pre_t, NULL);
    158     src->cost = aim->cost;
    159     free(src->key);
    160     free(src->file_cache);
    161     src->key = aim->key;
    162     src->file_cache = aim->file_cache;
    163     aim->pre->nxt = aim->nxt;
    164     free(aim);
    165 }
    166 
    167 void add_copy(cache_pair *src, cache_pair *aim) {
    168     src->nxt = new_cache_pair();
    169     src->nxt->pre = src;
    170     replace_copy(src, aim);
    171 }
    172 
    173 cache_pair *LRU_CHOOSE(cache_pair *table) {
    174     double mn = -1;
    175     cache_pair *res = NULL;
    176     for (int i = 0; i < HASHTABELSIZE; ++i)
    177         if (table[i].nxt) {
    178             cache_pair *ptr = table + i;
    179             while (ptr->nxt) {
    180                 cache_pair *pre = ptr;
    181                 ptr = ptr->nxt;
    182                 double cur = ptr->pre_t.tv_sec * 1000.0 + ptr->pre_t.tv_usec / 1000.0;
    183                 if (mn < 0 || cur < mn) {
    184                     mn = cur;
    185                     res = pre;
    186                 }
    187             }
    188         }
    189     return res;
    190 }
    191 
    192 cache_pair *LFU_CHOOSE(cache_pair *table) {
    193     int mn = -1;
    194     cache_pair *res = NULL;
    195     for (int i = 0; i < HASHTABELSIZE; ++i)
    196         if (table[i].nxt) {
    197             cache_pair *ptr = table + i;
    198             while (ptr->nxt) {
    199                 cache_pair *pre = ptr;
    200                 ptr = ptr->nxt;
    201                 int cur = ptr->cnt;
    202                 if (mn < 0 || cur < mn) {
    203                     mn = cur;
    204                     res = pre;
    205                 }
    206             }
    207         }
    208     return res;
    209 }
    210 
    211 cache_pair *GD_CHOOSE(cache_pair *table) {
    212     double mn = -1;
    213     cache_pair *res = NULL;
    214     for (int i = 0; i < HASHTABELSIZE; ++i)
    215         if (table[i].nxt) {
    216             cache_pair *ptr = table + i;
    217             while (ptr->nxt) {
    218                 cache_pair *pre = ptr;
    219                 ptr = ptr->nxt;
    220                 double cur = ptr->cost + ptr->pre_t.tv_sec;
    221                 if (mn < 0 || cur < mn) {
    222                     mn = cur;
    223                     res = pre;
    224                 }
    225             }
    226         }
    227     return res;
    228 }
    229 
    230 cache_pair *GDSF_CHOOSE(cache_pair *table) {
    231     double mn = -1;
    232     cache_pair *res = NULL;
    233     for (int i = 0; i < HASHTABELSIZE; ++i)
    234         if (table[i].nxt) {
    235             cache_pair *ptr = table + i;
    236             while (ptr->nxt) {
    237                 cache_pair *pre = ptr;
    238                 ptr = ptr->nxt;
    239                 double cur = ptr->cnt * ptr->cost + ptr->pre_t.tv_sec;
    240                 if (mn < 0 || cur < mn) {
    241                     mn = cur;
    242                     res = pre;
    243                 }
    244             }
    245         }
    246     return res;
    247 }
    248 
    249 cache_ret LRU(cache_t set, const char *request) {
    250     int flag;
    251     cache_pair *src = is_in_table(set->table, request, &flag);
    252     if (flag) { /// real node
    253         src->cnt++;
    254         gettimeofday(&src->pre_t, NULL);
    255     } else { /// pre node
    256         ++set->page_fault;
    257         if (set->_cur == set->capacity) { /// choose and replace
    258             src = LRU_CHOOSE(set->table);
    259             replace_after_src(src, request);
    260         } else { /// add node
    261             add_after_src(src, request);
    262             ++set->_cur;
    263         }
    264         src = src->nxt;
    265     }
    266     return (cache_ret) {src->cost, src->file_cache};
    267 }
    268 
    269 cache_ret LFU(cache_t set, const char *request) {
    270     int flag;
    271     cache_pair *src = is_in_table(set->table, request, &flag);
    272     if (flag) {
    273         src->cnt++;
    274         gettimeofday(&src->pre_t, NULL);
    275     } else {
    276         ++set->page_fault;
    277         if (set->_cur == set->capacity) {
    278             src = LFU_CHOOSE(set->table);
    279             replace_after_src(src, request);
    280         } else {
    281             add_after_src(src, request);
    282             ++set->_cur;
    283         }
    284         src = src->nxt;
    285     }
    286     return (cache_ret) {src->cost, src->file_cache};
    287 }
    288 
    289 cache_ret ARC(cache_t set, const char *request) {
    290     int flag;
    291     cache_pair *first_table = set->first_table;
    292     cache_pair *src = is_in_table(set->table, request, &flag);
    293     if (flag) { /// in second table
    294         src->cnt++;
    295         gettimeofday(&src->pre_t, NULL);
    296     } else {
    297         cache_pair *first_src = is_in_table(first_table, request, &flag);
    298         if (flag) { /// in first table
    299             ++first_src->cnt;
    300             if (set->_cur == set->capacity) { /// choose and replace
    301                 src = LRU_CHOOSE(set->table);
    302                 replace_copy(src, first_src); /// copy data to nxt src and delete first_src
    303             } else { /// add node
    304                 add_copy(src, first_src); /// create node and replace
    305                 ++set->_cur;
    306             }
    307             src = src->nxt;
    308         } else { /// not in first table
    309             ++set->page_fault;
    310             if (set->first_cur == set->capacity) {
    311                 first_src = LRU_CHOOSE(first_table);
    312                 replace_after_src(first_src, request);
    313             } else {
    314                 add_after_src(first_src, request);
    315                 ++set->first_cur;
    316             }
    317             src = first_src->nxt;
    318         }
    319     }
    320     return (cache_ret) {src->cost, src->file_cache};
    321 }
    322 
    323 cache_ret MQ(cache_t set, const char *request) {
    324     int flag;
    325     cache_pair *first_table = set->first_table;
    326     cache_pair *src = is_in_table(set->table, request, &flag);
    327     if (flag) { /// in second table
    328         src->cnt++;
    329         gettimeofday(&src->pre_t, NULL);
    330     } else {
    331         cache_pair *first_src = is_in_table(first_table, request, &flag);
    332         if (flag) { /// in first table
    333             ++first_src->cnt;
    334             if (set->_cur == set->capacity) { /// choose and replace
    335                 src = LRU_CHOOSE(set->table);
    336                 replace_copy(src, first_src); /// copy data to nxt src and delete first_src
    337             } else { /// add node
    338                 add_copy(src, first_src); /// create node and replace
    339                 ++set->_cur;
    340             }
    341             src = src->nxt;
    342         } else { /// not in first table
    343             ++set->page_fault;
    344             if (set->first_cur == set->capacity) {
    345                 first_src = LRU_CHOOSE(first_table);
    346                 replace_after_src(first_src, request);
    347             } else {
    348                 add_after_src(first_src, request);
    349                 ++set->first_cur;
    350             }
    351             src = first_src->nxt;
    352         }
    353     }
    354     return (cache_ret) {src->cost, src->file_cache};
    355 }
    356 
    357 cache_ret GD(cache_t set, const char *request) {
    358     int flag;
    359     cache_pair *src = is_in_table(set->table, request, &flag);
    360     if (flag) {
    361         src->cnt++;
    362         gettimeofday(&src->pre_t, NULL);
    363     } else {
    364         ++set->page_fault;
    365         if (set->_cur == set->capacity) {
    366             src = GD_CHOOSE(set->table);
    367             replace_after_src(src, request);
    368         } else {
    369             add_after_src(src, request);
    370             ++set->_cur;
    371         }
    372         src = src->nxt;
    373     }
    374     return (cache_ret) {src->cost, src->file_cache};
    375 }
    376 
    377 cache_ret GDSF(cache_t set, const char *request) {
    378     int flag;
    379     cache_pair *src = is_in_table(set->table, request, &flag);
    380     if (flag) {
    381         src->cnt++;
    382         gettimeofday(&src->pre_t, NULL);
    383     } else {
    384         ++set->page_fault;
    385         if (set->_cur == set->capacity) {
    386             src = GDSF_CHOOSE(set->table);
    387             replace_after_src(src, request);
    388         } else {
    389             add_after_src(src, request);
    390             ++set->_cur;
    391         }
    392         src = src->nxt;
    393     }
    394     return (cache_ret) {src->cost, src->file_cache};
    395 }
    View Code

    ---版权:代码遵从MIT协议开源,可以按需使用。(地址:cache.c

    ---转载需标明出处,侵权必究

    ---作者:RhythmLian: https://github.com/Rhythmicc

  • 相关阅读:
    大数据面经
    mysql复习(2)
    java容器
    内存管理
    垃圾收集
    输入/输出流
    排序算法的稳定性及其汇总
    java传值与传引用
    linux复习6
    linux复习5
  • 原文地址:https://www.cnblogs.com/Rhythm-/p/12106402.html
Copyright © 2011-2022 走看看