打开APP
userphoto
未登录

开通VIP,畅享免费电子书等14项超值服

开通VIP
DotNet源码学习-HASHSET(初探)

命名空间:System.Collections.Generic

先看一下官方说明:类提供了高级的设置操作。集是不包含重复元素的集合,其元素无特定顺序 

HashSet <T>对象的容量是对象可以容纳的元素数。当向对象添加元素时,HashSet <T>对象的容量会自动增加。

HashSet<String> hashSet = new HashSet<string>();hashSet.Add("test");hashSet.Add("test");Console.WriteLine(hashSet.Count);

打印结果:1

HashSet的默认构造方法:

public HashSet()            : this((IEqualityComparer<T>?)null)        { }

注:: this语法为调用自身对象的其他构造方法。

public HashSet(IEqualityComparer<T>? comparer){     if (comparer == EqualityComparer<T>.Default)     {          comparer = null;     }     _comparer = comparer;     _lastIndex = 0;     _count = 0;     _freeList = -1;     _version = 0;}

第二中创建方式,将集合作为参数。

List<string> list = new List<string>();list.Add("test");list.Add("test");HashSet<string> hSet = new HashSet<string>(list);Console.WriteLine(hSet.Count);

此时控台输出:1

此时调用的构造方法为:

public HashSet(IEnumerable<T> collection, IEqualityComparer<T>? comparer)            : this(comparer){    if (collection == null)    {        throw new ArgumentNullException(nameof(collection));    }    var otherAsHashSet = collection as HashSet<T>;    if (otherAsHashSet != null && AreEqualityComparersEqual(this, otherAsHashSet))    {        CopyFrom(otherAsHashSet);    }    else    {        // to avoid excess resizes, first set size based on collection's count. Collection        // may contain duplicates, so call TrimExcess if resulting hashset is larger than        // threshold        ICollection<T>? coll = collection as ICollection<T>;        int suggestedCapacity = coll == null ? 0 : coll.Count;        Initialize(suggestedCapacity);        UnionWith(collection);        if (_count > 0 && _slots.Length / _count > ShrinkThreshold)        {            TrimExcess();        }    }}

在该构造方法中若存在重复值则通过查找大于或等于容量的下一个质数来使用建议的容量。

private int Initialize(int capacity){    Debug.Assert(_buckets == null, "Initialize was called but _buckets was non-null");    int size = HashHelpers.GetPrime(capacity);    _buckets = new int[size];    _slots = new Slot[size];    return size;}

下面为生成质数方法:

public static int GetPrime(int min){    if (min < 0)        throw new ArgumentException(SR.Arg_HTCapacityOverflow);    foreach (int prime in s_primes)    {        if (prime >= min)            return prime;    }    // Outside of our predefined table. Compute the hard way.    for (int i = (min | 1); i < int.MaxValue; i += 2)    {        if (IsPrime(i) && ((i - 1) % HashPrime != 0))            return i;    }    return min;}

再次扩展-》

public static bool IsPrime(int candidate){  if ((candidate & 1) != 0)  {    int limit = (int)Math.Sqrt(candidate);//取平方    for (int divisor = 3; divisor <= limit; divisor += 2)    {      if ((candidate % divisor) == 0)        return false;    }    return true;  }  return candidate == 2;}
internal struct Slot{  internal int hashCode;      // Lower 31 bits of hash code, -1 if unused  internal int next;          // Index of next entry, -1 if last  internal T value;}

存储LIst集合:

public void UnionWith(IEnumerable<T> other){  if (other == null)  {    throw new ArgumentNullException(nameof(other));  }  foreach (T item in other)  {    AddIfNotPresent(item);  }}

继续往下追踪:

private bool AddIfNotPresent(T value){  if (_buckets == null)  {    Initialize(0);  }  int hashCode;  int bucket;  int collisionCount = 0;  Slot[] slots = _slots;  IEqualityComparer<T>? comparer = _comparer;  if (comparer == null)  {  //取HASHCODE    hashCode = value == null ? 0 : InternalGetHashCode(value.GetHashCode());    bucket = hashCode % _buckets!.Length;    if (default(T)! != null) // TODO-NULLABLE: default(T) == null warning (https://github.com/dotnet/roslyn/issues/34757)    {      for (int i = _buckets[bucket] - 1; i >= 0; i = slots[i].next)      {        if (slots[i].hashCode == hashCode && EqualityComparer<T>.Default.Equals(slots[i].value, value))        {          return false;        }        if (collisionCount >= slots.Length)        {          // The chain of entries forms a loop, which means a concurrent update has happened.          throw new InvalidOperationException(SR.InvalidOperation_ConcurrentOperationsNotSupported);        }        collisionCount++;      }    }    else    {      // Object type: Shared Generic, EqualityComparer<TValue>.Default won't devirtualize      // https://github.com/dotnet/coreclr/issues/17273      // So cache in a local rather than get EqualityComparer per loop iteration      EqualityComparer<T> defaultComparer = EqualityComparer<T>.Default;      for (int i = _buckets[bucket] - 1; i >= 0; i = slots[i].next)      {        if (slots[i].hashCode == hashCode && defaultComparer.Equals(slots[i].value, value))        {          return false;        }        if (collisionCount >= slots.Length)        {          // The chain of entries forms a loop, which means a concurrent update has happened.          throw new InvalidOperationException(SR.InvalidOperation_ConcurrentOperationsNotSupported);        }        collisionCount++;      }    }  }  else  {    hashCode = value == null ? 0 : InternalGetHashCode(comparer.GetHashCode(value));    bucket = hashCode % _buckets!.Length;    for (int i = _buckets[bucket] - 1; i >= 0; i = slots[i].next)    {      if (slots[i].hashCode == hashCode && comparer.Equals(slots[i].value, value))      {        return false;      }      if (collisionCount >= slots.Length)      {        // The chain of entries forms a loop, which means a concurrent update has happened.        throw new InvalidOperationException(SR.InvalidOperation_ConcurrentOperationsNotSupported);      }      collisionCount++;    }  }  int index;  if (_freeList >= 0)  {    index = _freeList;    _freeList = slots[index].next;  }  else  {    if (_lastIndex == slots.Length)    {      IncreaseCapacity();      // this will change during resize      slots = _slots;      bucket = hashCode % _buckets.Length;    }    index = _lastIndex;    _lastIndex++;  }  slots[index].hashCode = hashCode;  slots[index].value = value;  slots[index].next = _buckets[bucket] - 1;  _buckets[bucket] = index + 1;  _count++;  _version++;  return true;}private const int Lower31BitMask = 0x7FFFFFFF;

获取内部的HASHCODE

private static int InternalGetHashCode(T item, IEqualityComparer<T>? comparer){  if (item == null)  {    return 0;  }  int hashCode = comparer?.GetHashCode(item) ?? item.GetHashCode();  return hashCode & Lower31BitMask;}

划重点-》

slots[index].hashCode = hashCode;slots[index].value = value;slots[index].next = _buckets[bucket] - 1;

最终列表中的值存储到结构中。

使用对象初始化HASHSET时,如果相同

 

HashSet<string> hashSet = new HashSet<string>(); hashSet.Add("test"); hashSet.Add("test"); HashSet<string> hSet = new HashSet<string>(hashSet);
private void CopyFrom(HashSet<T> source){  int count = source._count;  if (count == 0)  {    // As well as short-circuiting on the rest of the work done,    // this avoids errors from trying to access otherAsHashSet._buckets    // or otherAsHashSet._slots when they aren't initialized.    return;  }  int capacity = source._buckets!.Length;  int threshold = HashHelpers.ExpandPrime(count + 1);  if (threshold >= capacity)  {    _buckets = (int[])source._buckets.Clone();    _slots = (Slot[])source._slots.Clone();    _lastIndex = source._lastIndex;    _freeList = source._freeList;  }  else  {    int lastIndex = source._lastIndex;    Slot[] slots = source._slots;    Initialize(count);    int index = 0;    for (int i = 0; i < lastIndex; ++i)    {      int hashCode = slots[i].hashCode;      if (hashCode >= 0)      {        AddValue(index, hashCode, slots[i].value);        ++index;      }    }    Debug.Assert(index == count);    _lastIndex = index;  }  _count = count;}
public static int ExpandPrime(int oldSize)//返回要增长到的哈希表大小{  int newSize = 2 * oldSize;  // Allow the hashtables to grow to maximum possible size (~2G elements) before encountering capacity overflow.  // Note that this check works even when _items.Length overflowed thanks to the (uint) cast  if ((uint)newSize > MaxPrimeArrayLength && MaxPrimeArrayLength > oldSize)  {    Debug.Assert(MaxPrimeArrayLength == GetPrime(MaxPrimeArrayLength), "Invalid MaxPrimeArrayLength");    return MaxPrimeArrayLength;  }  return GetPrime(newSize);}

这里只贴出HashSet声明创建对象的两种方式。

下篇再研究具体实现〜

本站仅提供存储服务,所有内容均由用户发布,如发现有害或侵权内容,请点击举报
打开APP,阅读全文并永久保存 查看更多类似文章
猜你喜欢
类似文章
Java提高篇(二六)-----hashCode
HashMap的实现原理和底层数据结构
Java 如何重写对象的 equals 方法和 hashCode 方法
java指纹识别+谷歌图片识别技术=======源码实现===分享出来===
java常用集合类详解(有例子,集合类糊涂的来看!) .
Java中的HashCode 之Hashset造成的内存泄露
更多类似文章 >>
生活服务
热点新闻
分享 收藏 导长图 关注 下载文章
绑定账号成功
后续可登录账号畅享VIP特权!
如果VIP功能使用有故障,
可点击这里联系客服!

联系客服