zoukankan      html  css  js  c++  java
  • 一致性Hash简单介绍和使用

    背景:
    一致性Hash用于分布式缓存系统,将Key值映射到详细机器Ip上,而且添加和删除1台机器的数据移动量较小,对现网影响较小

    实现:
    1 Hash环:将节点的Hash值映射到一个Hash环中。每一个Key顺时针第一个找到的节点。就是这个Key被路由到的机器
    2 "虚拟节点":将节点虚拟成多个"虚拟节点"分布在Hash环上,使得分布更均匀。扩缩容影响较小

    代码实例:

    /*
     * @ 一致性Hash模拟測试
     * @ 结论:模拟4台机器扩容1台。遍历Key[0,999983]
    		- 一致性Hash需移动181161个Key,约占18%(1/5左右,符合预期效果)
    		- 取模Hash需移动799984个Key,约占80%
     * @ 2014.05.30 
     */
    
    #include <stdint.h>
    #include <iostream>
    #include <string.h>
    #include <sstream>
    #include <map>
    #include <vector>
    using namespace std;
    
    #define HASH_MOD (999983) 
    
    template <class T> 
    string ToStr(const T &t)
    {
    	stringstream stream;
    	stream << t;
    	return stream.str();
    }
    
    uint32_t APHash(string &sKey)
    {
    	char *key = (char*)sKey.c_str();
    	unsigned int hash = 0;
    	for (int i=0; *key; i++)
    	{
    		if ((i & 1) == 0) {
    			hash ^= ((hash<<7)^(*key++)^(hash>>3));
    		} else {
    			hash ^= (~((hash<<11)^(*key++)^(hash>>5)));
    		}
    	}
    	return hash%HASH_MOD;
    }
    
    class CMyConHash
    {
    public:
    	/* 加入一台机器(IP) */
    	void AddIp(const string &sIp) 
    	{
    		// 每一个IP分配128个虚拟节点,原因:结合APHash实验结果分布较均匀
    		for (int i = 0; i < 128; i ++)
    		{
    			string sCode = sIp + ToStr(i) + "#Hash";
    			uint32_t uVirKey = APHash(sCode);
    			mapVirKey2Ip[uVirKey] = sIp;
    			mapIp2VirKey[sIp].push_back(uVirKey);
    		}
    	}
    	
    	/* 删除一台机器(IP) */
    	void DelIp(const string &sIp) 
    	{
    		if (mapIp2VirKey.count(sIp) == 0) {	
    			cout << "DelIp Err: mapIp2VirKey Don`t Has Ip=" << sIp << endl;
    			return;
    		}
    		vector<uint32_t> vecVirKey = mapIp2VirKey[sIp];
    		for (int i = 0; i < vecVirKey.size(); i ++)
    		{
    			uint32_t uVirKey = vecVirKey[i];
    			if (mapVirKey2Ip[uVirKey] == sIp) {
    				// 得推断下。有可能2个IP虚拟节点相等后覆盖了
    				mapVirKey2Ip.erase(uVirKey);
    			}
    		}
    		mapIp2VirKey.erase(sIp);
    	}
    	
    	/* 路由:给每一个Key找到负责的机器(IP) */
    	int FindIp(uint32_t uKey, string &sIp)
    	{
    		if (mapVirKey2Ip.size() == 0) {
    			cout << "FindIp Err: mapVirKey2Ip.size() == 0" << endl;
    			return -1;
    		}
    		bool bFind = false;
    		uint32_t uVirKey;
    		map<uint32_t, string>::iterator iter;
    		// 遍历std::map是按Key大小顺序输出(差别std::tr1::unordered_map)
    		for(iter = mapVirKey2Ip.begin(); iter != mapVirKey2Ip.end(); iter ++)
    		{
    			uVirKey = iter->first;
    			if (uVirKey > uKey%HASH_MOD) {
    				sIp = iter->second; 
    				bFind = true;
    				break;
    			}
    		}
    		if (!bFind) {
    			// 找不到比Key小的虚拟节点,故使用最小的虚拟节点(环)
    			iter = mapVirKey2Ip.begin();
    			uVirKey = iter->first;
    			sIp = iter->second; 
    		}
    		//cout << "FindIp Suc:" << uKey%HASH_MOD << "=>" << uVirKey << "," << sIp << endl;
    		return 0;
    	}
    	
    	/* 打印各个IP负责的Key区域大小。影响因素:1 Hash函数 2 虚拟节点个数 */
    	/* 4台机器的情况,相对还是较均匀:
    		Ip=202.168.14.241,Cnt=251649
    		Ip=202.168.14.242,Cnt=257902
    		Ip=202.168.14.243,Cnt=245945
    		Ip=202.168.14.244,Cnt=235516 */
    	void EchoIpState()
    	{
    		map<string, uint32_t> mapIpCnt; 
    		map<uint32_t, string>::iterator iter = mapVirKey2Ip.end(); 
    		iter --;
    		uint32_t uPreKey = iter->first;
    		string sPreIp = iter->second;
    		do {
    			iter --;
    			uint32_t uVirKey = iter->first;
    			string sIp = iter->second;
    			if (mapIpCnt.count(sPreIp) == 0) {
    				mapIpCnt[sPreIp] = uPreKey-uVirKey;
    			} else {
    				mapIpCnt[sPreIp] += uPreKey-uVirKey;
    			} 
    			uPreKey = uVirKey;
    			sPreIp = sIp;
    		} while (iter != mapVirKey2Ip.begin()); 
    		
    		cout << "Ip Size=" << mapIpCnt.size() << endl;
    		map<string, uint32_t>::iterator iter1;
    		for(iter1 = mapIpCnt.begin(); iter1 != mapIpCnt.end(); iter1 ++)
    		{
    			cout << "Ip=" << iter1->first << ",Cnt=" << iter1->second << endl;
    		}
    	}
    private:
    	map< uint32_t, string > mapVirKey2Ip;
    	map< string, vector<uint32_t> > mapIp2VirKey; 
    };
    
    class CMyModHash
    {
    public: 
    	void AddIp(const string &sIp) 
    	{
    		vecIpList.push_back(sIp);
    	}
    	void FindIp(uint32_t uKey, string &sIp)
    	{
    		sIp = vecIpList[uKey%vecIpList.size()];
    	}
    	void EchoIpState()
    	{
    		cout << "Ip Cnt=" << vecIpList.size() << endl;
    	}
    private: 
    	vector<string> vecIpList;
    };
    
    int main()
    {
    	CMyConHash oMyHash;
    	// CMyModHash oMyHash;
    	
    	// 模拟初始化4台机器
    	oMyHash.AddIp("202.168.14.241"); 
    	oMyHash.AddIp("202.168.14.242"); 
    	oMyHash.AddIp("202.168.14.243"); 
    	oMyHash.AddIp("202.168.14.244"); 
    	oMyHash.EchoIpState();
    	
    	// 保存下各个Key路由的机器 
    	string sIp, arrKeyIp[HASH_MOD];
    	for (uint32_t key = 0; key < HASH_MOD; key ++)
    	{
    		oMyHash.FindIp(key, sIp);
    		arrKeyIp[key] = sIp;
    	}
    	
    	// 模拟加入1台机器
    	oMyHash.AddIp("202.168.14.245"); 
    	oMyHash.EchoIpState();
    	
    	// 推断多少Key相应数据须要移动机器
    	uint32_t uCnt = 0;
    	for (uint32_t key = 0; key < HASH_MOD; key ++)
    	{
    		oMyHash.FindIp(key, sIp);
    		if (arrKeyIp[key] != sIp) {
    			uCnt ++;
    		}
    	}
    	cout << "Key Sum=" << HASH_MOD << " , Need To Move:" << uCnt << endl;
    	
    	return 0;
    }


  • 相关阅读:
    linux命令整理
    各种提权姿势总结
    常用端口信息说明和利用
    近年来爆发的CVE漏洞编号
    一个优秀的SSH远程终端工具
    python-读写文件的方式
    kali安装ssh服务
    一套实用的渗透测试岗位面试题
    使用 python快速搭建http服务
    asciinema使用
  • 原文地址:https://www.cnblogs.com/wzzkaifa/p/7202808.html
Copyright © 2011-2022 走看看