zoukankan      html  css  js  c++  java
  • C#源码(十二) HashSet

    基础介绍

    仓储地址

    https://github.com/dotnet/runtime/

    我本地的项目位置

    C:projectSourceCode untime-5.0.0-preview.3.20214.6srclibrariesSystem.Collections

    实现原理和Dictionary差不多,都是链地址法解决冲突。

    Dictionary 有Key Value

    HashSet只有Value

    实际容器为Slot[] m_slots;

    internal struct Slot 
    {
      internal int hashCode;      // Lower 31 bits of hash code, -1 if unused
      internal int next;          // Index of next entry, -1 if last
      internal T value;
    }

    HashSet操作元素的时间复杂度接近O(1)

    定义int[] m_buckets 数组来保存元素在实际容器Slot[] m_slots 位置

    即 Value的保存在 m_slots[m_buckets[value.GetHashCode()%m_buckets.Length]].value

    容器长度为质数

    质数只能被1和自身整除

    减少位置冲突

    数据已满时添加数据扩容会自动扩充当前容量的2倍

    新建一个2倍大小的容器

    数据拷贝过去 重新计算位置

    使用优化点

    已知容器大小的情况 直接初始化对应大小

    自定义元素可以实现IEqualityComparer可以更高效判断相等和获取HashCode

    哈希函数

    当位置冲突时使用Slot.next保存数据,也就是拉链法解决冲突。

    hashCode = value == null ? 0 : InternalGetHashCode(comparer.GetHashCode(value));

    这里comparer就是IEqualityComparer<T>? comparer = _comparer;可以是默认的,也可以构造函数传入

    InternalGetHashCode方法如下

    private static int InternalGetHashCode(T item, IEqualityComparer<T>? comparer)
    {
        if (item == null)
        {
            return 0;
        }
    
        int hashCode = comparer?.GetHashCode(item) ?? item.GetHashCode();
        return hashCode & Lower31BitMask;
    }

    最后通过hashCode对桶长度求余获取bucket

    bucket = hashCode % _buckets!.Length;

    内部AddIfNotPresent方法

    /// <summary>
    /// Adds value to MyHashSet if not contained already
    /// Returns true if added and false if already present
    /// </summary>
    /// <param name="value">value to find</param>
    /// <returns></returns>
    private bool AddIfNotPresent(T value)
    {
        if (_buckets == null)
        {
            Initialize(0);
        }
    
        int hashCode = InternalGetHashCode(value);
        int bucket = hashCode % _buckets.Length;
        int collisionCount = 0;
        //把快照保存下来
        Slot[] slots = _slots;
    
        //遍历整个链 _buckets[bucket]- 1 是第一个要查找的位置  如果没找到 i就是-1 可以一直走下一步
        for (int i = _buckets[bucket] - 1; i >= 0; i = slots[i].next)
        {
            //已存在相同的元素
            if (slots[i].hashCode == hashCode && _comparer.Equals(slots[i].value, value))
            {
                return false;
            }
    
            //冲突次数大于slots的长度了
            if (collisionCount >= slots.Length)
            {
                // The chain of entries forms a loop, which means a concurrent update has happened.
                throw new InvalidOperationException( );
            }
            collisionCount++;
        }
    
        int index;
    
        //获取空闲位置
        if (_freeList >= 0)
        {
            index = _freeList;
            _freeList = slots[index].next;
        }
        else
        {
            if (_lastIndex == slots.Length)
            {
                IncreaseCapacity();
                // this will change during resize
                slots = _slots;
                bucket = hashCode % _buckets.Length;
            }
            index = _lastIndex;
            _lastIndex++;
        }
        //存入数据,记录索引
        slots[index].hashCode = hashCode;
        slots[index].value = value;
        slots[index].next = _buckets[bucket] - 1;
        _buckets[bucket] = index + 1;
        _count++;
        _version++;
    
        return true;
    }

    HashHelpers辅助类

    这里HashHelpers是用来求素数和获取下一次扩容的大小的辅助类,里面有一个数组存放基础素数,如果容量超过已有素数,会通过数学的方法计算出需要的素数。

    public class HashHelpers
    {
        public const uint HashCollisionThreshold = 100;
    
        // This is the maximum prime smaller than Array.MaxArrayLength
        public const int MaxPrimeArrayLength = 0x7FEFFFFD;
        public const int HashPrime = 101;
    
        private static readonly int[] s_primes =
        {
            3, 7, 11, 17, 23, 29, 37, 47, 59, 71, 89, 107, 131, 163, 197, 239, 293, 353, 431, 521, 631, 761, 919,
            1103, 1327, 1597, 1931, 2333, 2801, 3371, 4049, 4861, 5839, 7013, 8419, 10103, 12143, 14591,
            17519, 21023, 25229, 30293, 36353, 43627, 52361, 62851, 75431, 90523, 108631, 130363, 156437,
            187751, 225307, 270371, 324449, 389357, 467237, 560689, 672827, 807403, 968897, 1162687, 1395263,
            1674319, 2009191, 2411033, 2893249, 3471899, 4166287, 4999559, 5999471, 7199369
        };
    
        /// <summary>
        /// 判断是否为质数(素数)
        /// </summary>
        /// <param name="candidate"></param>
        /// <returns></returns>
        public static bool IsPrime(int candidate)
        {
            //按位与1不等于0 如果等于0那么只能为2  不然就肯定可以被2整除
            if ((candidate & 1) != 0)
            {
                //求该数的平方根  
                int limit = (int)Math.Sqrt(candidate);
                //从3开始遍历,一直到平方根  大于平方根的数去除肯定是1.xxxx的,不用去判断
                //每次+2是跳过 偶数
                for (int divisor = 3; divisor <= limit; divisor += 2)
                {
                    if ((candidate % divisor) == 0)
                        return false;
                }
                return true;
            }
            return candidate == 2;
        }
    
        /// <summary>
        /// 获取质数
        /// </summary>
        /// <param name="min">最小值</param>
        /// <returns></returns>
        public static int GetPrime(int min)
        {
            if (min < 0)
                throw new ArgumentException();
            //遍历已有数组
            foreach (int prime in s_primes)
            {
                if (prime >= min)
                    return prime;
            }
    
            // Outside of our predefined table. Compute the hard way.
            //不在数组范围内,进行计算  把i的最后一位与1求或   要么不变要么加1  因为最后一位不为1的是不能为素数的(除了2)  每次+2是跳过 偶数
            for (int i = (min | 1); i < int.MaxValue; i += 2)
            {
                if (IsPrime(i) && ((i - 1) % HashPrime != 0))
                    return i;
            }
            return min;
        }
    
        // Returns size of hashtable to grow to.
        public static int ExpandPrime(int oldSize)
        {
            int newSize = 2 * oldSize;
    
            // Allow the hashtables to grow to maximum possible size (~2G elements) before encountering capacity overflow.
            // Note that this check works even when _items.Length overflowed thanks to the (uint) cast
            if ((uint)newSize > MaxPrimeArrayLength && MaxPrimeArrayLength > oldSize)
            {
               
                return MaxPrimeArrayLength;
            }
    
            return GetPrime(newSize);
        }
    }

    对外公有方法

    /// <summary>
    /// 与另一个集合合并
    /// </summary>
    /// <param name="other"></param>
    public void UnionWith(IEnumerable<T> other)
    {
        if (other == null)
        {
            throw new ArgumentNullException(nameof(other));
        }
    
        foreach (T item in other)
        {
            AddIfNotPresent(item);
        }
    }
    
    /// <summary>
    /// 删除和other相等的项
    /// </summary>
    /// <param name="other"></param>
    public void ExceptWith(IEnumerable<T> other)
    {
        if (other == null)
        {
            throw new ArgumentNullException("other");
        }
    
        // this is already the enpty set; return
        if (m_count == 0)
        {
            return;
        }
    
        // special case if other is this; a set minus itself is the empty set
        if (other == this)
        {
            Clear();
            return;
        }
    
        // remove every element in other from this
        foreach (T element in other)
        {
            Remove(element);
        }
    }
    
    /// <summary>
    /// 修改自身 删除存在自身和other的元素
    /// </summary>
    /// <param name="other"></param>
    public void SymmetricExceptWith(IEnumerable<T> other)
    {
        if (other == null)
        {
            throw new ArgumentNullException("other");
        }
    
        // if set is empty, then symmetric difference is other
        if (m_count == 0)
        {
            UnionWith(other);
            return;
        }
    
        // special case this; the symmetric difference of a set with itself is the empty set
        if (other == this)
        {
            Clear();
            return;
        }
    
        MyHashSet<T> otherAsSet = other as MyHashSet<T>;
        // If other is a HashSet, it has unique elements according to its equality comparer,
        // but if they're using different equality comparers, then assumption of uniqueness
        // will fail. So first check if other is a hashset using the same equality comparer;
        // symmetric except is a lot faster and avoids bit array allocations if we can assume
        // uniqueness
        if (otherAsSet != null && AreEqualityComparersEqual(this, otherAsSet))
        {
            SymmetricExceptWithUniqueHashSet(otherAsSet);
        }
        else
        {
            SymmetricExceptWithEnumerable(other);
        }
    }
    对外公有方法

    交集和子集等判断

    /// <summary>
    /// 求和other交集
    /// </summary>
    /// <param name="other"></param>
    public void IntersectWith(IEnumerable<T> other)
    {
        if (other == null)
        {
            throw new ArgumentNullException("other");
        }
    
        if (m_count == 0)
        {
            return;
        }
    
        ICollection<T> otherAsCollection = other as ICollection<T>;
        if (otherAsCollection != null)
        {
            if (otherAsCollection.Count == 0)
            {
                Clear();
                return;
            }
    
            MyHashSet<T> otherAsSet = other as MyHashSet<T>;
            // faster if other is a hashset using same equality comparer; so check 
            // that other is a hashset using the same equality comparer.
            if (otherAsSet != null && AreEqualityComparersEqual(this, otherAsSet))
            {
                IntersectWithHashSetWithSameEC(otherAsSet);
                return;
            }
        }
    
        IntersectWithEnumerable(other);
    }
    
    private void IntersectWithMyHashSetWithSameEC(MyHashSet<T> other)
    {
        for (int i = 0; i < _lastIndex; i++)
        {
            if (_slots[i].hashCode >= 0)
            {
                T item = _slots[i].value;
                if (!other.Contains(item))
                {
                    Remove(item);
                }
            }
        }
    }
    
    /// <summary>
    /// Iterate over other. If contained in this, mark an element in bit array corresponding to
    /// its position in _slots. If anything is unmarked (in bit array), remove it.
    /// This attempts to allocate on the stack, if below StackAllocThreshold.
    /// 这里用了指针数组等,所以是unsafe方法
    /// </summary>
    /// <param name="other"></param>
    private unsafe void IntersectWithEnumerable(IEnumerable<T> other)
    {
        // keep track of current last index; don't want to move past the end of our bit array
        // (could happen if another thread is modifying the collection)
        int originalLastIndex = _lastIndex;
        int intArrayLength = BitHelper.ToIntArrayLength(originalLastIndex);
    
        BitHelper bitHelper;
        //根据长度选择不同的BitHelper实例化方式
        if (intArrayLength <= StackAllocThreshold)
        {
            int* bitArrayPtr = stackalloc int[intArrayLength];
            bitHelper = new BitHelper(bitArrayPtr, intArrayLength);
        }
        else
        {
            int[] bitArray = new int[intArrayLength];
            bitHelper = new BitHelper(bitArray, intArrayLength);
        }
    
        // mark if contains: find index of in slots array and mark corresponding element in bit array
        foreach (T item in other)
        {
            int index = InternalIndexOf(item);
            if (index >= 0)
            {
                bitHelper.MarkBit(index);
            }
        }
    
        // if anything unmarked, remove it. Perf can be optimized here if BitHelper had a 
        // FindFirstUnmarked method.
        for (int i = 0; i < originalLastIndex; i++)
        {
            if (_slots[i].hashCode >= 0 && !bitHelper.IsMarked(i))
            {
                Remove(_slots[i].value);
            }
        }
    }
    求和other交集
    /// <summary>
    /// 是否为other的子集
    /// </summary>
    /// <param name="other"></param>
    /// <returns></returns>
    public bool IsSubsetOf(IEnumerable<T> other)
    {
        if (other == null)
        {
            throw new ArgumentNullException("other");
        }
    
        // The empty set is a subset of any set
        if (m_count == 0)
        {
            return true;
        }
    
        MyHashSet<T> otherAsSet = other as MyHashSet<T>;
        // faster if other has unique elements according to this equality comparer; so check 
        // that other is a hashset using the same equality comparer.
        if (otherAsSet != null && AreEqualityComparersEqual(this, otherAsSet))
        {
            // if this has more elements then it can't be a subset
            if (m_count > otherAsSet.Count)
            {
                return false;
            }
    
            // already checked that we're using same equality comparer. simply check that 
            // each element in this is contained in other.
            return IsSubsetOfHashSetWithSameEC(otherAsSet);
        }
        else
        {
            ElementCount result = CheckUniqueAndUnfoundElements(other, false);
            return (result.uniqueCount == m_count && result.unfoundCount >= 0);
        }
    }
    是否为other的子集
  • 相关阅读:
    深度优先和广度优先
    水管工游戏(深度优先)
    炸弹人
    广度优先(迷宫找人)
    System.Data.Entity.Core.MetadataException: 无法加载指定的无数据资源
    Element Cascader 级联选择器 单选操作优化
    Windows服务 ProjectInstaller 获取 路径
    Quartz.NET ScheduledFireTimeUtc 当超过1分钟时出现的问题。
    记录:一个SQL SERVER奇怪的问题。
    log4.net 配置
  • 原文地址:https://www.cnblogs.com/qixinbo/p/13360411.html
Copyright © 2011-2022 走看看