微软实现的字典功能简单方便,出于全局性考虑,其内部实现比较复杂,在对海量数据处理时,仅仅存放一个对象地址就将占据32个bit(64位机器中占据64个bit),而且其内部通过int[]来处理hash桶,占用空间较大。
如果对于一些特定的海量数据的处理进行字典的自我定制,将极大地节省空间。
最近做的一个项目中字典中存放的数据达到几百万条,不同日期数值重复率极大,故实现了一个定制字典,实际结果和普通的Dictionay比较,压缩率达到一半以上。
该字典针对key为DateTime类型的数据进行了优化:同一个value的地址(value为引用类型)只保存一次(普通的字典中key不同value相同时,value都指向同一个引用类型的地址,即有多少个key则value的地址会保存多少份,同时还会维护一个hash桶),通过维护一个bitMap来标明该对象适用的日期,并做了一些相应的优化(如全局bitmap的实现)。同时,比较日期是否适用时全部采用位操作,因为代码在64位机器上运行,故bit操作时都以64bit作为操作对象。
这种字典数据重复率越高,压缩约明显,所以需要针对数据特征判断该实现是否适用。
理论上,只要key在一个有限可量化的集合里,都可以实现类似的字典。
key为dateTime的实现如下:
using System; using System.Collections.Generic; using System.Linq; using System.Text; using System.Collections; using System.Data; namespace BitmapDictionary { class Program { /// <summary> ///Author: zhao-ewwj@163.com ///AuthorDate: 2015-01-22 14:12:10 +0800 ///Commit: zhao-ewwj@163.com ///CommitDate: 2015-01-22 14:12:10 +0800 ///根据酒店数据特征定制的字典,key为日期形式,key与value以bit位的形式对应,value以链表的形式组织 /// <typeparam name="TKey"></typeparam> /// <typeparam name="TValue"></typeparam> public class BitMapDictionary<TValue> : IDictionary<DateTime, TValue> { /// <summary> /// 判断字典中元素相等 /// </summary> /// <param name="t1"></param> /// <param name="t2"></param> /// <returns></returns> private static bool ObjEqual(TValue t1, TValue t2) { return t1 == null ? t2 == null : t1.Equals(t2); } public BitMapDictionary() : this(DateTime.Now.AddDays(-30), ObjEqual) { } public BitMapDictionary(int days) : this(DateTime.Now.AddDays(days), ObjEqual) { } public BitMapDictionary(BitMapDictionary<TValue> dictionary) : this(dictionary.relativeDateTime, dictionary.checkEqual) { this.GlobalBitmap = dictionary.GlobalBitmap; if (dictionary.Head == null) { this.Head = null; return; } this.Head = dictionary.Head.Clone() as Entry; var newCurrent = this.Head; var oldCurrent = dictionary.Head; while (oldCurrent.next != null) { newCurrent.next = oldCurrent.next.Clone() as Entry; newCurrent = newCurrent.next; oldCurrent = oldCurrent.next; } } /// <summary> /// 构造器 /// </summary> /// <param name="relativeDateTime">相对时间</param> /// <param name="checkEqual">比较对象相等</param> public BitMapDictionary(DateTime dateTime, Func<TValue, TValue, bool> checkEqual) { this.relativeDateTime = dateTime; this.checkEqual = checkEqual; } /// <summary> /// 构造器 /// </summary> /// <param name="relativeDateTime">相对时间</param> /// <param name="checkEqual">比较对象相等</param> public BitMapDictionary(Dictionary<DateTime, TValue> dictionary, DateTime dateTime, Func<TValue, TValue, bool> checkEqual) : this(dateTime,checkEqual) { foreach (var pair in dictionary) { this[pair.Key] = pair.Value; } } /// <summary> /// 相对时间 /// </summary> private DateTime relativeDateTime; /// <summary> /// 相等比较函数 /// </summary> private Func<TValue, TValue, bool> checkEqual; /// <summary> /// 链表头:第一版本,使用线性链表,遍历查询,待续.. /// </summary> public Entry Head; /// <summary> /// 链表元素 /// </summary> public class Entry { public TValue value; public BitMap bitMap; public Entry next; /// <summary> /// 复制 /// </summary> /// <returns>返回复制</returns> public object Clone() { return this.MemberwiseClone(); } } //预存256天的价格数据的bit位图 public struct BitMap { public ulong Days193To256; public ulong Days129To192; public ulong Day65To128; public ulong Day1To64; /// <summary> /// 将Bit置1 /// </summary> /// <param name="days"></param> public void SetBit(int day) { int index = day & 0x3F;//得到mod 64的结果 int unit = day >> 6;//得到除64的整数结果 ulong subCode = 1; subCode <<= index; switch (unit) { case 0: this.Days193To256 |= subCode; break; case 1: this.Days129To192 |= subCode; break; case 2: this.Day65To128 |= subCode; break; case 3: this.Day1To64 |= subCode; break; } } /// <summary> /// 将Bit置0 /// </summary> /// <param name="day"></param> public void ResetBit(int day) { int index = day & 0x3F;//得到mod 64的结果 int unit = day >> 6;//得到除64的整数结果 ulong subCode = 1; subCode <<= index; subCode = ulong.MaxValue ^ subCode;//异或取掩码 switch (unit) { case 0: this.Days193To256 &= subCode; break; case 1: this.Days129To192 &= subCode; break; case 2: this.Day65To128 &= subCode; break; case 3: this.Day1To64 &= subCode; break; } } /// <summary> /// 看Bit是否是1 /// </summary> /// <param name="day"></param> /// <returns></returns> public bool CheckBit(int day) { bool exists = false; int index = day & 0x3F;//得到mod 64的结果 int unit = day >> 6;//得到除64的整数结果 ulong subCode = 1; subCode <<= index; switch (unit) { case 0: exists = (this.Days193To256 & subCode) >= 1; break; case 1: exists = (this.Days129To192 & subCode) >= 1; break; case 2: exists = (this.Day65To128 & subCode) >= 1; break; case 3: exists = (this.Day1To64 & subCode) >= 1; break; } return exists; } public bool Equals(BitMap bitmap) { return (bitmap.Days193To256 == this.Days193To256 && bitmap.Days129To192 == this.Days129To192 && bitmap.Day65To128 == this.Day65To128 && bitmap.Day1To64 == this.Day1To64); } } /// <summary> /// 全局位图信息,提高性能 /// </summary> public BitMap GlobalBitmap; /// <summary> /// 索引器 /// </summary> /// <param name="key"></param> /// <returns></returns> public TValue this[DateTime key] { get { int relativeDay = this.RelativeDay(key); if (this.GlobalBitmap.CheckBit(relativeDay)) { var current = this.Head; while (current != null) { if (current.bitMap.CheckBit(relativeDay)) { return current.value; } current = current.next; } //如果代码执行到这里,说明全局bit位图有而实际没有,此时,纠正全局比特位图 this.GlobalBitmap.ResetBit(relativeDay); } throw new Exception(BitMapDicException.NotFountElement); } set { this.AddElement(this.RelativeDay(key), value); } } /// <summary> /// 向链表添加,判断是否重复 /// </summary> /// <param name="key"></param> /// <param name="value"></param> public void Add(DateTime key, TValue value) { int relativeDay = this.RelativeDay(key); if (this.GlobalBitmap.CheckBit(relativeDay)) { throw new Exception(BitMapDicException.AlreadyExists); } this.AddElement(relativeDay, value); } /// <summary> /// 校验全局位图是否有误 /// </summary> /// <returns></returns> public bool CheckGlobalBitmap() { BitMap bitmapChk = new BitMap(); var current = this.Head; int i = 0; while (current != null) { bitmapChk.Day1To64 |= current.bitMap.Day1To64; bitmapChk.Day65To128 |= current.bitMap.Day65To128; bitmapChk.Days129To192 |= current.bitMap.Days129To192; bitmapChk.Days193To256 |= current.bitMap.Days193To256; current = current.next; i++; } return this.GlobalBitmap.Equals(bitmapChk); } /// <summary> /// 无条件向链表添加,不判断是否重复(++先添加,在删除) /// </summary> /// <param name="relativeDay"></param> /// <param name="value"></param> public void AddElement(int relativeDay, TValue value) { if (this.Head == null) { this.Head = new Entry() { value = value }; this.Head.bitMap.SetBit(relativeDay); } else { Entry before = null; var current = this.Head; Entry bookUpCurrent = null;//添加时保留旧对象 Entry bookUpBefore = null;//添加时保留旧对象前一个对象 bool dayNotFound = true; bool objNotEqual = true; while (true) { if (objNotEqual) { //如果对象相等,则设置objNotEqual = false,看他的bitMap是否已设置,有,则设置dayNotFound =false并break;没有则设置它的bitMap并continue; if (this.checkEqual(current.value, value)) { objNotEqual = false; if (current.bitMap.CheckBit(relativeDay)) { break; } else { current.bitMap.SetBit(relativeDay); if (current.next == null) { break; } else { before = current; current = current.next; } } } } if (dayNotFound) { //如果日期发现已设,则看是否已有相等对象,如果有,则直接reset bitmap并break;如果没有,则继续进行bit clear操作; if (current.bitMap.CheckBit(relativeDay)) { dayNotFound = false; if (false == objNotEqual) { //先不做bit操作,保留现场 bookUpCurrent = current; bookUpBefore = before; break; } else { //先不做bit操作,保留现场 bookUpCurrent = current; bookUpBefore = before; } } } if (current.next == null) { break; } else { before = current; current = current.next; } } if (objNotEqual) { //如果一直没有找到相等元素,则直接在链表最后追加一个,且设置一下bitMap current.next = new Entry() { value = value }; current.next.bitMap.SetBit(relativeDay); } //对保留的现场进行处理 if (bookUpCurrent != null) { bookUpCurrent.bitMap.ResetBit(relativeDay); DeleteElementFromChain(bookUpCurrent, bookUpBefore); } } //添加成功,全局位设置 this.GlobalBitmap.SetBit(relativeDay); } /// <summary> /// 看Key是否存在 /// </summary> /// <param name="key"></param> /// <returns></returns> public bool ContainsKey(DateTime key) { int relativeDay = this.RelativeDay(key); //直接看全局bitMap,有则直接返回 if (this.GlobalBitmap.CheckBit(relativeDay)) { return true; } return false; } /// <summary> /// 去掉某天数据 /// </summary> /// <param name="key"></param> /// <returns></returns> public bool Remove(DateTime key) { int relativeDay = this.RelativeDay(key); if (this.GlobalBitmap.CheckBit(relativeDay)) { var current = this.Head; Entry before = null; while (current != null) { if (current.bitMap.CheckBit(relativeDay)) { current.bitMap.ResetBit(relativeDay); DeleteElementFromChain(current, before); break; } else { before = current; current = current.next; } } //纠正全局比特位图 this.GlobalBitmap.ResetBit(relativeDay); } return false; } /// <summary> /// 获取某天数据 /// </summary> /// <param name="key"></param> /// <param name="value"></param> /// <returns></returns> public bool TryGetValue(DateTime key, out TValue value) { value = default(TValue); int relativeDay = this.RelativeDay(key); if (this.GlobalBitmap.CheckBit(relativeDay)) { var current = this.Head; while (current != null) { if (current.bitMap.CheckBit(relativeDay)) { value = current.value; return true; } else { current = current.next; } } //如果代码执行到这里,说明全局bit位图有而实际没有,此时,纠正全局比特位图 this.GlobalBitmap.ResetBit(relativeDay); } return false; } /// <summary> /// 统计key value数量 /// </summary> public int Count { get { int num; int num2 = 0; int bitA = GetBitNum(this.GlobalBitmap.Days193To256); int bitB = GetBitNum(this.GlobalBitmap.Days129To192); int bitC = GetBitNum(this.GlobalBitmap.Day65To128); int bitD = GetBitNum(this.GlobalBitmap.Day1To64); num = bitA + bitB + bitC + bitD; var current = this.Head; while (current != null) { num2++; current = current.next; } if (num != num2) { //写报警 } return num; } } /// <summary> /// 非只读 /// </summary> public bool IsReadOnly { get { return false; } } /// <summary> /// 清空链表 /// </summary> public void Clear() { this.GlobalBitmap = new BitMap(); this.Head = null; } #region 功能函数 /// <summary> /// 去掉某个失效元素 /// </summary> /// <param name="current"></param> /// <param name="before"></param> public void DeleteElementFromChain(Entry current, Entry before) { //如果去掉当日生效后当前实体对任何一天都不生效,则remove实体 if (current.bitMap.Days193To256 == 0 && current.bitMap.Days129To192 == 0 && current.bitMap.Day65To128 == 0 && current.bitMap.Day1To64 == 0) { if (before == null) { this.Head = current.next; } else { before.next = current.next; } } } /// <summary> /// 获取时间差,并按位取反,如设定相对时间是2015-1-5,则2015-1-5号则记为第1天,2015-1-6号则记为第2天 /// </summary> /// <param name="dateTime"></param> /// <returns></returns> public int RelativeDay(DateTime dateTime) { int relativedays = -1; if (dateTime >= this.relativeDateTime) { relativedays = (dateTime - this.relativeDateTime).Days; } if (relativedays >= 256) { throw new Exception(BitMapDicException.DateOver); } else if (relativedays == -1) { throw new Exception(BitMapDicException.DateLow); } return 255 - relativedays; } /// <summary> /// 取得bit为1的个数 /// </summary> /// <param name="dayBit"></param> /// <returns></returns> private int GetBitNum(ulong dayBit) { int count = 0; ulong subCode = 1; for (int i = 0; i < 64; i++) { if ((dayBit & subCode) >= 1) { count++; } subCode <<= 1; } return count; } #endregion #region 未实现的一些方法 public void Add(KeyValuePair<DateTime, TValue> item) { throw new Exception(BitMapDicException.NotImplement); } public bool Contains(KeyValuePair<DateTime, TValue> item) { throw new Exception(BitMapDicException.NotImplement); } public ICollection<DateTime> Keys { get { //Keys接口暂不实现,如果日后需要实现,可通过Global比特位图的方式实现ICollection接口,不要 throw new Exception(BitMapDicException.KeysNotImplement); } } public ICollection<TValue> Values { get { //values接口暂不实现,如果日后需要实现,可通过Head头的方式实现ICollection接口 throw new Exception(BitMapDicException.ValuesNotImplement); } } public bool Remove(KeyValuePair<DateTime, TValue> item) { throw new Exception(BitMapDicException.AlreadyExists); } public void CopyTo(KeyValuePair<DateTime, TValue>[] array, int arrayIndex) { throw new Exception(BitMapDicException.AlreadyExists); } IEnumerator IEnumerable.GetEnumerator() { throw new Exception(BitMapDicException.AlreadyExists); } IEnumerator<KeyValuePair<DateTime, TValue>> IEnumerable<KeyValuePair<DateTime, TValue>>.GetEnumerator() { throw new Exception(BitMapDicException.AlreadyExists); } #endregion public class BitMapDicException { public static readonly String ChkTkey = "Tkey必须是时间类型"; public static readonly String ChkEqualFunc ="必须设定相等比较方法"; public static readonly String KeysNotImplement = "Keys接口不实现"; public static readonly String ValuesNotImplement ="Values接口不实现"; public static readonly String NotFountElement = "索引器未发现指定元素"; public static readonly String DateOver = "data OutOfRange最大只能存放256天数据"; public static readonly String DateLow = "data OutOfRange不能存放相对日期之前的数据"; public static readonly String AlreadyExists ="元素已存在"; public static readonly String NotImplement = "该成员未实现"; public static readonly String ValueCannotBeNull = "如果要去掉某个元素,请用Remove功能!赋值null表示增加一个value为null的元素,对定制的字典而言无意义!"; public static readonly String NotExistsEqualFunction ="不存在对象相等比较函数!"; } } /// <summary> /// Main中包含了一些基本的单元测试 /// </summary> /// <param name="args"></param> static void Main(string[] args) { RoomInfo r1 = new RoomInfo(); RoomInfo r2 = new RoomInfo(); Console.WriteLine("== 检查:" + (r1 == r2)); Console.WriteLine("Equals检查:" + r1.Equals(r2)); //添加300天 BitMapDictionary<RoomInfo> dic = new BitMapDictionary<RoomInfo>(-10); for (int i = 0; i <= 255; i++) { dic.Add(DateTime.Now.AddDays(i - 10), new RoomInfo() { FGPrice = (i - 10) * 0.1m, PPPrice = (i - 10) * 0.1m + 0.1m }); } //全局位是否正确 bool globalCheck = true; globalCheck = globalCheck && dic.CheckGlobalBitmap(); Console.WriteLine("全局检查:" + globalCheck); BitMapDictionary<RoomInfo> dic2 = new BitMapDictionary<RoomInfo>(dic); dic = dic2; //看看取出来和刚才放进去是否一致; bool checkElement = true; for (int i = 0; i <= 255; i++) { RoomInfo roomTemp; dic.TryGetValue(DateTime.Now.AddDays(i - 10), out roomTemp); checkElement = checkElement && RoomInfo.Equals(roomTemp, new RoomInfo() { FGPrice = (i - 10) * 0.1m, PPPrice = (i - 10) * 0.1m + 0.1m }); } Console.WriteLine("对象逐步比较:" + checkElement); globalCheck = globalCheck && dic.CheckGlobalBitmap(); Console.WriteLine("全局检查:" + globalCheck); ///各自移除第一天,看看bit位对不对 for (int i = 0; i < 4; i++) { dic.Remove(DateTime.Now.AddDays(i * 64 - 10)); } Console.WriteLine("设置了256天,各自减去最高位,检查:" + (dic.GlobalBitmap.Equals(new BitMapDictionary<RoomInfo>.BitMap() { Day1To64 = 0x7fffffffffffffff, Day65To128 = 0x7fffffffffffffff, Days129To192 = 0x7fffffffffffffff, Days193To256 = 0x7fffffffffffffff }))); globalCheck = globalCheck && dic.CheckGlobalBitmap(); Console.WriteLine("全局检查:" + globalCheck); for (int i = 0; i <= 255; i++) { RoomInfo roomTemp; dic.Remove(DateTime.Now.AddDays(i - 10)); } Console.WriteLine("移除所有数据,字典数量为0:" + (dic.Count == 0)); //检查字典global globalCheck = globalCheck && dic.CheckGlobalBitmap(); Console.WriteLine("全局检查:" + globalCheck); ///将增加逻辑改成索引器 for (int i = 0; i <= 255; i++) { dic[DateTime.Now.AddDays(i - 10)] = new RoomInfo() { FGPrice = (i - 10) * 0.1m, PPPrice = (i - 10) * 0.1m + 0.1m }; } globalCheck = globalCheck && dic.CheckGlobalBitmap(); Console.WriteLine("改成索引器赋值后,全局检查:" + globalCheck); ///添加256天,看看是不是满位 Console.WriteLine("设置了256天,是否满位:" + (dic.GlobalBitmap.Equals(new BitMapDictionary<RoomInfo>.BitMap() { Day1To64 = 0xffffffffffffffff, Day65To128 = 0xffffffffffffffff, Days129To192 = 0xffffffffffffffff, Days193To256 = 0xffffffffffffffff }))); globalCheck = globalCheck && dic.CheckGlobalBitmap(); Console.WriteLine("全局检查:" + globalCheck); //将每个bit组中的第一个作为共享对象,其它的和他一样,看看是不是共享了 for (int i = 0; i <= 255; i++) { int unit = (int)(i / 64);//得到除64的整数结果 dic[DateTime.Now.AddDays(i - 10)] = new RoomInfo() { FGPrice = (unit - 10) * 0.1m, PPPrice = (unit - 10) * 0.1m + 0.1m }; } bool global = true; if (!dic.Head.bitMap.Equals(new BitMapDictionary<RoomInfo>.BitMap() { Day1To64 = 0xffffffffffffffff, Day65To128 = 0x0, Days129To192 = 0x0, Days193To256 = 0x0 }) || !dic.Head.next.bitMap.Equals(new BitMapDictionary<RoomInfo>.BitMap() { Day1To64 = 0x0, Day65To128 = 0xffffffffffffffff, Days129To192 = 0x0, Days193To256 = 0x0 }) || !dic.Head.next.next.bitMap.Equals(new BitMapDictionary<RoomInfo>.BitMap() { Day1To64 = 0x0, Day65To128 = 0x0, Days129To192 = 0xffffffffffffffff, Days193To256 = 0x0 }) || !dic.Head.next.next.next.bitMap.Equals(new BitMapDictionary<RoomInfo>.BitMap() { Day1To64 = 0x0, Day65To128 = 0x0, Days129To192 = 0x0, Days193To256 = 0xffffffffffffffff }) || !dic.GlobalBitmap.Equals(new BitMapDictionary<RoomInfo>.BitMap() { Day1To64 = 0xffffffffffffffff, Day65To128 = 0xffffffffffffffff, Days129To192 = 0xffffffffffffffff, Days193To256 = 0xffffffffffffffff }) ) { global = false; } Console.WriteLine("对象共享检查:" + global); ///在上面的基础上,将2-10改成一天数据,看看是否共享了对象 globalCheck = globalCheck && dic.CheckGlobalBitmap(); Console.WriteLine("全局检查:" + globalCheck); for (int i = 0; i <= 255; i++) { dic[DateTime.Now.AddDays(i - 10)] = null; } Console.WriteLine("测试结束"); Console.ReadKey(); } } public class RoomInfo { public decimal PPPrice; public decimal FGPrice; public override bool Equals(Object r1) { RoomInfo r = (RoomInfo)r1; return r != null && r.PPPrice == this.PPPrice && r.FGPrice == this.FGPrice; } public override int GetHashCode() { return this.FGPrice.GetHashCode() ^ this.PPPrice.GetHashCode(); } } }
System.Configuration.ConfigurationManager.AppSettings.Get("QueryInfoBusinType")