一、HashMap的数据结构:
数组、链表、红黑树
二、HashMap参数:
负载因子(衡量时间和空间) load_factor 0.75
树化阈值 (应对最坏情况) treeify_threshold 8
树化最小容量 MIN_TREEIFY_CAPACITY 64
树化的意义:最坏情况处理,使算法在hash冲突严重的情况下仍然有良好的性能。
capacity * load_factor 才是一个hashmap结构中实际可以存放的元素数量,元素数量超过这个数值会引起扩容。
三、hash计算
HashMap的容量总是2的n次幂,采用的是散列算法是余数法(n-1)&hash,这导致hashcode的高位字节会被忽略。HashMap认为对象的hashCode本身具有良好的分布,且已经使用了树化来应对糟糕的情况,所以它使用高位低位异或来作为真正使用的hash值。
/**
* Computes key.hashCode() and spreads (XORs) higher bits of hash
* to lower. Because the table uses power-of-two masking, sets of
* hashes that vary only in bits above the current mask will
* always collide. (Among known examples are sets of Float keys
* holding consecutive whole numbers in small tables.) So we
* apply a transform that spreads the impact of higher bits
* downward. There is a tradeoff between speed, utility, and
* quality of bit-spreading. Because many common sets of hashes
* are already reasonably distributed (so don't benefit from
* spreading), and because we use trees to handle large sets of
* collisions in bins, we just XOR some shifted bits in the
* cheapest possible way to reduce systematic lossage, as well as
* to incorporate impact of the highest bits that would otherwise
* never be used in index calculations because of table bounds.
*/
static final int hash(Object key) {
int h;
return (key == null) ? 0 : (h = key.hashCode()) ^ (h >>> 16);
}
四、put流程分析
put函数对hash值对应的节点做新增或更新操作,新增时需要考虑扩容,更新时需要考虑onlyIfAbsent参数
final V putVal(int hash, K key, V value, boolean onlyIfAbsent,
boolean evict) {
Node<K,V>[] tab; Node<K,V> p; int n, i;
// 初始化数据结构,初始化也是通过resize来实现的
if ((tab = table) == null || (n = tab.length) == 0)
n = (tab = resize()).length;
// put函数的处理包括新增和更新,新增需要考虑扩容,修改需要考虑onlyIfAbsent
// 1、桶上无元素、新增
if ((p = tab[i = (n - 1) & hash]) == null)
tab[i] = newNode(hash, key, value, null);
else {
Node<K,V> e; K k;
// 2、桶上第一个元素就是想要操作的位置,更新操作
if (p.hash == hash &&
((k = p.key) == key || (key != null && key.equals(k))))
e = p;
// 3、在链表或者树中新增或者更新,更新时不会直接覆盖旧值
else if (p instanceof TreeNode)
e = ((TreeNode<K,V>)p).putTreeVal(this, tab, hash, key, value);
else {
for (int binCount = 0; ; ++binCount) {
if ((e = p.next) == null) {
p.next = newNode(hash, key, value, null);
if (binCount >= TREEIFY_THRESHOLD - 1) // -1 for 1st
treeifyBin(tab, hash);
break;
}
if (e.hash == hash &&
((k = e.key) == key || (key != null && key.equals(k))))
break;
p = e;
}
}
// 4、对于更新操作判断是否可以覆盖,函数直接返回
if (e != null) { // existing mapping for key
V oldValue = e.value;
if (!onlyIfAbsent || oldValue == null)
e.value = value;
afterNodeAccess(e);
return oldValue;
}
}
// 5、更新不会引起ConcurrentModifyException,新增操作需要考虑扩容
++modCount;
if (++size > threshold)
resize();
afterNodeInsertion(evict);
return null;
}
五、resize流程
resize函数执行初始化数据结构或者扩容操作,二者都需要计算新的容量和阈值。扩容的时候容量和阈值都乘以二,初始化的时候使用负载因子和容量的乘积计算阈值。扩容需要迁移元素,而初始化不需要。
/**
* Initializes or doubles table size. If null, allocates in
* accord with initial capacity target held in field threshold.
* Otherwise, because we are using power-of-two expansion, the
* elements from each bin must either stay at same index, or move
* with a power of two offset in the new table.
*
* @return the table
*/
final Node<K,V>[] resize() {
// resize函数执行初始化数据结构或者扩容操作,二者都需要计算新的容量和阈值
// 扩容的时候容量和阈值都乘以二,初始化的时候使用负载因子和容量的乘积计算阈值
// 扩容需要迁移元素
Node<K,V>[] oldTab = table;
int oldCap = (oldTab == null) ? 0 : oldTab.length;
int oldThr = threshold;
int newCap, newThr = 0;
if (oldCap > 0) {
if (oldCap >= MAXIMUM_CAPACITY) {
threshold = Integer.MAX_VALUE;
return oldTab;
}
else if ((newCap = oldCap << 1) < MAXIMUM_CAPACITY &&
oldCap >= DEFAULT_INITIAL_CAPACITY)
newThr = oldThr << 1; // double threshold
}
else if (oldThr > 0) // initial capacity was placed in threshold
newCap = oldThr;
else { // zero initial threshold signifies using defaults
newCap = DEFAULT_INITIAL_CAPACITY;
newThr = (int)(DEFAULT_LOAD_FACTOR * DEFAULT_INITIAL_CAPACITY);
}
if (newThr == 0) {
float ft = (float)newCap * loadFactor;
newThr = (newCap < MAXIMUM_CAPACITY && ft < (float)MAXIMUM_CAPACITY ?
(int)ft : Integer.MAX_VALUE);
}
threshold = newThr;
@SuppressWarnings({"rawtypes","unchecked"})
Node<K,V>[] newTab = (Node<K,V>[])new Node[newCap];
table = newTab;
// 只有扩容才需要迁移元素
if (oldTab != null) {
for (int j = 0; j < oldCap; ++j) {
Node<K,V> e;
if ((e = oldTab[j]) != null) {
oldTab[j] = null;
// 只有一个元素的时候直接放在新数组里
if (e.next == null)
newTab[e.hash & (newCap - 1)] = e;
else if (e instanceof TreeNode)
((TreeNode<K,V>)e).split(this, newTab, j, oldCap);
else { // preserve order
Node<K,V> loHead = null, loTail = null;
Node<K,V> hiHead = null, hiTail = null;
Node<K,V> next;
do {
// jdk1.8 链表迁移修改了尾部插入,避免了并发扩容的死循环问题,hashmap本身不支持并发
next = e.next;
if ((e.hash & oldCap) == 0) {
if (loTail == null)
loHead = e;
else
loTail.next = e;
loTail = e;
}
else {
if (hiTail == null)
hiHead = e;
else
hiTail.next = e;
hiTail = e;
}
} while ((e = next) != null);
if (loTail != null) {
loTail.next = null;
newTab[j] = loHead;
}
if (hiTail != null) {
hiTail.next = null;
newTab[j + oldCap] = hiHead;
}
}
}
}
}
return newTab;
}
六、红黑树拆分
红黑树可能拆分成两个,高位一个,低位一个,此时需要转换成链表或者重新树化。也可能没有拆分,这个时候就不需要重新树化了。
/**
* Splits nodes in a tree bin into lower and upper tree bins,
* or untreeifies if now too small. Called only from resize;
* see above discussion about split bits and indices.
*
* @param map the map
* @param tab the table for recording bin heads
* @param index the index of the table being split
* @param bit the bit of hash to split on
*/
final void split(HashMap<K,V> map, Node<K,V>[] tab, int index, int bit) {
TreeNode<K,V> b = this;
// Relink into lo and hi lists, preserving order
TreeNode<K,V> loHead = null, loTail = null;
TreeNode<K,V> hiHead = null, hiTail = null;
int lc = 0, hc = 0;
for (TreeNode<K,V> e = b, next; e != null; e = next) {
next = (TreeNode<K,V>)e.next;
e.next = null;
if ((e.hash & bit) == 0) {
if ((e.prev = loTail) == null)
loHead = e;
else
loTail.next = e;
loTail = e;
++lc;
}
else {
if ((e.prev = hiTail) == null)
hiHead = e;
else
hiTail.next = e;
hiTail = e;
++hc;
}
}
if (loHead != null) {
if (lc <= UNTREEIFY_THRESHOLD)
tab[index] = loHead.untreeify(map);
else {
tab[index] = loHead;
if (hiHead != null) // (else is already treeified)
// 只有一棵树,没有拆分,节点之间已经维持了红黑树的结构了
loHead.treeify(tab);
}
}
if (hiHead != null) {
if (hc <= UNTREEIFY_THRESHOLD)
tab[index + bit] = hiHead.untreeify(map);
else {
tab[index + bit] = hiHead;
if (loHead != null)
hiHead.treeify(tab);
}
}
}