LRU算法
很多Cache都支持LRU(Least Recently Used)算法,LRU算法的设计原则是:如果一个数据在最近一段时间没有被访问到,那么在将来它被访问的可能性也很小。也就是说,当限定的空间已存满数据时,应当把最久没有被访问到的数据淘汰。
LRU Cache一般支持两个操作:
- get(key),如果key在cache中,则返回对应的value值,否则返回-1;
- set(key,value),如果key在cache中,则重置value的值;如果key不在cache中,则将该(key,value)插入cache中(注意,如果cache已满,则必须把最近最久未使用的元素从cache中删除);
而用什么数据结构来实现LRU算法呢?最常见的实现是使用一个链表保存缓存数据,如下图:
算法如下:
- 新数据插入到链表头部;
- 每当缓存命中(即缓存数据被访问),则将数据移到链表头部;
- 当链表满的时候,将链表尾部的数据丢弃。
这种链表结构实现简单,但效率不高,每次请求时都需要遍历链表,需要O(N)的复杂度;下面考虑一种更复杂的实现方式。
使用Hash表+双向链表的实现:hash表保证get操作在O(1)时间复杂度完成,双向链表保证增加/删除操作在O(1)时间完成;
实现原理:
get方法:
- 如果hash表不存在,直接返回;
- 若存在,则将这个key从双链表移动到头部;
set方法:
- 如果hash表不存在,写入hash表,并写入双链表头部;
- 若存在,则将这个key从双链表移动到头部;
一个C++实现版本
#include <algorithm> #include <cstdint> #include <list> #include <mutex> #include <stdexcept> #include <thread> #include <unordered_map> /* * a noop lockable concept that can be used in place of std::mutex */ class NullLock { public: void lock() {} void unlock() {} bool try_lock() { return true; } }; /** * error raised when a key not in cache is passed to get() */ class KeyNotFound : public std::invalid_argument { public: KeyNotFound() : std::invalid_argument("key_not_found") {} }; template <typename K, typename V> struct KeyValuePair { public: K key; V value; KeyValuePair(const K& k, const V& v) : key(k), value(v) {} }; /** * The LRUCache class templated by * Key - key type * Value - value type * MapType - an associative container like std::unordered_map * LockType - a lock type derived from the Lock class (default: *NullLock = no synchronization) * * The default NullLock based template is not thread-safe, however passing *Lock=std::mutex will make it * thread-safe */ template <class Key, class Value, class Lock = NullLock, class Map = std::unordered_map< Key, typename std::list<KeyValuePair<Key, Value>>::iterator>> class LRUCache { public: typedef KeyValuePair<Key, Value> node_type; typedef std::list<KeyValuePair<Key, Value>> list_type; typedef Map map_type; typedef Lock lock_type; using Guard = std::lock_guard<lock_type>; /** * the maxSize is the soft limit of keys and (maxSize + elasticity) is the * hard limit * the cache is allowed to grow till (maxSize + elasticity) and is pruned back * to maxSize keys * set maxSize = 0 for an unbounded cache (but in that case, you're better off * using a std::unordered_map * directly anyway! :) */ explicit LRUCache(size_t maxSize = 1024, size_t elasticity = 10) : maxSize_(maxSize), elasticity_(elasticity) { hit_count_ = 0; miss_count_ = 0; } virtual ~LRUCache() = default; size_t size() const { Guard g(lock_); return cache_.size(); } bool empty() const { Guard g(lock_); return cache_.empty(); } void clear() { Guard g(lock_); cache_.clear(); keys_.clear(); } void insert(const Key& k, const Value& v) { Guard g(lock_); const auto iter = cache_.find(k); if (iter != cache_.end()) { iter->second->value = v; keys_.splice(keys_.begin(), keys_, iter->second); return; } keys_.emplace_front(k, v); cache_[k] = keys_.begin(); prune(); } bool tryGet(const Key& kIn, Value& vOut) { Guard g(lock_); const auto iter = cache_.find(kIn); if (iter == cache_.end()) { miss_count_++; return false; } keys_.splice(keys_.begin(), keys_, iter->second); vOut = iter->second->value; hit_count_++; return true; } /** * The const reference returned here is only * guaranteed to be valid till the next insert/delete */ const Value& get(const Key& k) { Guard g(lock_); const auto iter = cache_.find(k); if (iter == cache_.end()) { miss_count_++; throw KeyNotFound(); } keys_.splice(keys_.begin(), keys_, iter->second); hit_count_++; return iter->second->value; } /** * returns a copy of the stored object (if found) */ Value getCopy(const Key& k) { return get(k); } bool remove(const Key& k) { Guard g(lock_); auto iter = cache_.find(k); if (iter == cache_.end()) { return false; } keys_.erase(iter->second); cache_.erase(iter); return true; } bool contains(const Key& k) const { Guard g(lock_); return cache_.find(k) != cache_.end(); } void setMaxSize(size_t maxSize) { maxSize_ = maxSize; } size_t getMaxSize() const { return maxSize_; } size_t getElasticity() const { return elasticity_; } size_t getMaxAllowedSize() const { return maxSize_ + elasticity_; } template <typename F> void cwalk(F& f) const { Guard g(lock_); std::for_each(keys_.begin(), keys_.end(), f); } protected: size_t prune() { size_t maxAllowed = maxSize_ + elasticity_; if (maxSize_ == 0 || cache_.size() < maxAllowed) { return 0; } size_t count = 0; while (cache_.size() > maxSize_) { cache_.erase(keys_.back().key); keys_.pop_back(); ++count; } return count; }
private: // Dissallow copying. LRUCache(const LRUCache&) = delete; LRUCache& operator=(const LRUCache&) = delete; mutable Lock lock_; Map cache_; list_type keys_; size_t maxSize_; size_t elasticity_; uint64_t hit_count_; uint64_t miss_count_; };
一种改进的有预读的LRU: