public ConcurrentHashMap(int initialCapacity, float loadFactor, int concurrencyLevel) { if (!(loadFactor > 0.0f) || initialCapacity < 0 || concurrencyLevel <= 0) throw new IllegalArgumentException(); if (initialCapacity < concurrencyLevel) // Use at least as many bins initialCapacity = concurrencyLevel; // as estimated threads long size = (long)(1.0 + (long)initialCapacity / loadFactor); int cap = (size >= (long)MAXIMUM_CAPACITY) ? MAXIMUM_CAPACITY : tableSizeFor((int)size); this.sizeCtl = cap; }先是对负载因子、初始化容量、并行度三个参数验证,再保证初始化容量不小于并行度,然后通过初始化容量跟负载因子计算出map容量:size = (long)(1.0 + (long)initialCapacity / loadFactor),然后保证容量大小不大于MAXIMUM_CAPACITY并且是2的n次方。
private static final int tableSizeFor(int c) { int n = c - 1; n |= n >>> 1; n |= n >>> 2; n |= n >>> 4; n |= n >>> 8; n |= n >>> 16; return (n < 0) ? 1 : (n >= MAXIMUM_CAPACITY) ? MAXIMUM_CAPACITY : n + 1; }
通过位运算巧妙算出正好不小于c的2的n次方的数值,作为map的容量。初识化操作仅仅计算下参数,并未对真正的存储结构进行初始化。
我们来看下put()操作。
public V put(K key, V value) { return putVal(key, value, false); } /** Implementation for put and putIfAbsent */ final V putVal(K key, V value, boolean onlyIfAbsent) { if (key == null || value == null) throw new NullPointerException(); int hash = spread(key.hashCode()); int binCount = 0; for (Node<K,V>[] tab = table;;) { Node<K,V> f; int n, i, fh; if (tab == null || (n = tab.length) == 0) tab = initTable(); else if ((f = tabAt(tab, i = (n - 1) & hash)) == null) { if (casTabAt(tab, i, null, new Node<K,V>(hash, key, value, null))) break; // no lock when adding to empty bin } else if ((fh = f.hash) == MOVED) tab = helpTransfer(tab, f); else { V oldVal = null; synchronized (f) { if (tabAt(tab, i) == f) { if (fh >= 0) { binCount = 1; for (Node<K,V> e = f;; ++binCount) { K ek; if (e.hash == hash && ((ek = e.key) == key || (ek != null && key.equals(ek)))) { oldVal = e.val; if (!onlyIfAbsent) e.val = value; break; } Node<K,V> pred = e; if ((e = e.next) == null) { pred.next = new Node<K,V>(hash, key, value, null); break; } } } else if (f instanceof TreeBin) { Node<K,V> p; binCount = 2; if ((p = ((TreeBin<K,V>)f).putTreeVal(hash, key, value)) != null) { oldVal = p.val; if (!onlyIfAbsent) p.val = value; } } } } if (binCount != 0) { if (binCount >= TREEIFY_THRESHOLD) treeifyBin(tab, i); if (oldVal != null) return oldVal; break; } } } addCount(1L, binCount); return null; }如果第一次调用put,此时tab为null,于是调用initTable()
private final Node<K,V>[] initTable() { Node<K,V>[] tab; int sc; while ((tab = table) == null || tab.length == 0) { if ((sc = sizeCtl) < 0) Thread.yield(); // lost initialization race; just spin else if (U.compareAndSwapInt(this, SIZECTL, sc, -1)) { try { if ((tab = table) == null || tab.length == 0) { int n = (sc > 0) ? sc : DEFAULT_CAPACITY; @SuppressWarnings("unchecked") Node<K,V>[] nt = (Node<K,V>[])new Node<?,?>[n]; table = tab = nt; sc = n - (n >>> 2); } } finally { sizeCtl = sc; } break; } } return tab; }
在initTab()方法中,先通过cas的方法将sizeCtl设为-1,表示table正在初始化过程中。然后创建sc大小的Node数组来作为存储数据结构。创建完之后,将sc设为默认负载因子大小的数组容量(0.75*n)。最后把sizeCtl设为sc,表示初始化过程完成。
创建完之后,在下一轮循环时,通过key的hash跟n取余,再通过tableAt取得node数组上对应的位置。
static final <K,V> Node<K,V> tabAt(Node<K,V>[] tab, int i) { return (Node<K,V>)U.getObjectVolatile(tab, ((long)i << ASHIFT) + ABASE); }
其中ABASE是node数组中第一个元素的偏移量,(1<<ASHIFT)是每一个元素相对前一个元素的偏移量,通过usafe的方法定位到table中相应位置。
Class<?> ak = Node[].class; ABASE = U.arrayBaseOffset(ak); int scale = U.arrayIndexScale(ak); if ((scale & (scale - 1)) != 0) throw new Error("data type scale not a power of two"); ASHIFT = 31 - Integer.numberOfLeadingZeros(scale);
得到相应位置后:如果这一位置上为null,那么直接通过cas将新创建的node赋值在该位置上;如果该位置的元素的hash为-1,那么说明此map处于扩容阶段,于是通过helpTransfer()方法来帮助扩容;如果该位置已经存在相应的节点,那么对该节点加锁,从头开始查找如果满足e.hash == hash &&((ek = e.key) == key || (ek != null && key.equals(ek))即找到相应key,那么直接将新的value赋值在原来上,否则创建一个新节点插入在链表的最后。如果这个数组上元素已经是TreeBin类型,那么说明这里已经使用红黑树来存储,于是调用putTreeVal()来向红黑树中插入这一节点。
最后判断下该数组元素的链表节点是否已经大于8,如果大于8,则通过treeifyBin()将链表转化为红黑树形式存储。最后addCount(1,binCount)将map内存储的元素个数加一,并判断是否需要扩容。
private final void addCount(long x, int check) { CounterCell[] as; long b, s; if ((as = counterCells) != null || !U.compareAndSwapLong(this, BASECOUNT, b = baseCount, s = b + x)) { CounterCell a; long v; int m; boolean uncontended = true; if (as == null || (m = as.length - 1) < 0 || (a = as[ThreadLocalRandom.getProbe() & m]) == null || !(uncontended = U.compareAndSwapLong(a, CELLVALUE, v = a.value, v + x))) { fullAddCount(x, uncontended); return; } if (check <= 1) return; s = sumCount(); } if (check >= 0) { Node<K,V>[] tab, nt; int n, sc; while (s >= (long)(sc = sizeCtl) && (tab = table) != null && (n = tab.length) < MAXIMUM_CAPACITY) { int rs = resizeStamp(n); if (sc < 0) { if ((sc >>> RESIZE_STAMP_SHIFT) != rs || sc == rs + 1 || sc == rs + MAX_RESIZERS || (nt = nextTable) == null || transferIndex <= 0) break; if (U.compareAndSwapInt(this, SIZECTL, sc, sc + 1)) transfer(tab, nt); } else if (U.compareAndSwapInt(this, SIZECTL, sc, (rs << RESIZE_STAMP_SHIFT) + 2)) transfer(tab, null); s = sumCount(); } } }
先通过cas将刚才put方法插入元素的个数加进mpa中元素个数(baseCount字段上,即map中现有元素个数),然后比较元素个数与实际容量sizeCtl的大小关系,如果大于则map则需要开始扩容。
static final int resizeStamp(int n) { return Integer.numberOfLeadingZeros(n) | (1 << (RESIZE_STAMP_BITS - 1)); }
先计算rs,通过调用resizeStamp(),扩容前大小在32位的高位0的个数与2的15次方相或的结果。
在扩容的时候,会把sizeCtl设为-n,所以如果此时发现sizeCtl<0,那么说明已经有别的线程开始扩容;如果没有,那表示本线程是这情况下第一个开始扩容的线程,于是在扩容前使用cas更新sizeCtl的大小为rs左移16位加2。(此时sizeCtl = 扩容前大小在32位下高位0的个数与2的15次相或的结果,之后继续左移16位再加2)这样生成的标志来代表本次扩容,防止扩容重叠,(此时sizeCtl是一个负数,很有意思的是它的低RESIZE_STAMP_SHIFT位的数减一就是参与扩容的线程数)。设置完sizeCtl后,调用transfer开始扩容。
private final void transfer(Node<K,V>[] tab, Node<K,V>[] nextTab) { int n = tab.length, stride; if ((stride = (NCPU > 1) ? (n >>> 3) / NCPU : n) < MIN_TRANSFER_STRIDE) stride = MIN_TRANSFER_STRIDE; // subdivide range if (nextTab == null) { // initiating try { @SuppressWarnings("unchecked") Node<K,V>[] nt = (Node<K,V>[])new Node<?,?>[n << 1]; nextTab = nt; } catch (Throwable ex) { // try to cope with OOME sizeCtl = Integer.MAX_VALUE; return; } nextTable = nextTab; transferIndex = n; } int nextn = nextTab.length; ForwardingNode<K,V> fwd = new ForwardingNode<K,V>(nextTab); boolean advance = true; boolean finishing = false; // to ensure sweep before committing nextTab for (int i = 0, bound = 0;;) { Node<K,V> f; int fh; while (advance) { int nextIndex, nextBound; if (--i >= bound || finishing) advance = false; else if ((nextIndex = transferIndex) <= 0) { i = -1; advance = false; } else if (U.compareAndSwapInt (this, TRANSFERINDEX, nextIndex, nextBound = (nextIndex > stride ? nextIndex - stride : 0))) { bound = nextBound; i = nextIndex - 1; advance = false; } } if (i < 0 || i >= n || i + n >= nextn) { int sc; if (finishing) { nextTable = null; table = nextTab; sizeCtl = (n << 1) - (n >>> 1); return; } if (U.compareAndSwapInt(this, SIZECTL, sc = sizeCtl, sc - 1)) { if ((sc - 2) != resizeStamp(n) << RESIZE_STAMP_SHIFT) return; finishing = advance = true; i = n; // recheck before commit } } else if ((f = tabAt(tab, i)) == null) advance = casTabAt(tab, i, null, fwd); else if ((fh = f.hash) == MOVED) advance = true; // already processed else { synchronized (f) { if (tabAt(tab, i) == f) { Node<K,V> ln, hn; if (fh >= 0) { int runBit = fh & n; Node<K,V> lastRun = f; for (Node<K,V> p = f.next; p != null; p = p.next) { int b = p.hash & n; if (b != runBit) { runBit = b; lastRun = p; } } if (runBit == 0) { ln = lastRun; hn = null; } else { hn = lastRun; ln = null; } for (Node<K,V> p = f; p != lastRun; p = p.next) { int ph = p.hash; K pk = p.key; V pv = p.val; if ((ph & n) == 0) ln = new Node<K,V>(ph, pk, pv, ln); else hn = new Node<K,V>(ph, pk, pv, hn); } setTabAt(nextTab, i, ln); setTabAt(nextTab, i + n, hn); setTabAt(tab, i, fwd); advance = true; } else if (f instanceof TreeBin) { TreeBin<K,V> t = (TreeBin<K,V>)f; TreeNode<K,V> lo = null, loTail = null; TreeNode<K,V> hi = null, hiTail = null; int lc = 0, hc = 0; for (Node<K,V> e = t.first; e != null; e = e.next) { int h = e.hash; TreeNode<K,V> p = new TreeNode<K,V> (h, e.key, e.val, null, null); if ((h & n) == 0) { if ((p.prev = loTail) == null) lo = p; else loTail.next = p; loTail = p; ++lc; } else { if ((p.prev = hiTail) == null) hi = p; else hiTail.next = p; hiTail = p; ++hc; } } ln = (lc <= UNTREEIFY_THRESHOLD) ? untreeify(lo) : (hc != 0) ? new TreeBin<K,V>(lo) : t; hn = (hc <= UNTREEIFY_THRESHOLD) ? untreeify(hi) : (lc != 0) ? new TreeBin<K,V>(hi) : t; setTabAt(nextTab, i, ln); setTabAt(nextTab, i + n, hn); setTabAt(tab, i, fwd); advance = true; } } } } } }在transfer()方法中,先根据原本node数组大小,生成一个新的原数组两倍大小的node数组。之后生成一个新的ForwardingNode节点,其中nextTable字段存放新的node数组,重点是该hash值设为-1(后面该节点可以作为一个“占位”,-1可以表示该节点所占的位置处于扩容状态)。
ForwardingNode<K,V> fwd = new ForwardingNode<K,V>(nextTab); static final class ForwardingNode<K,V> extends Node<K,V> { final Node<K,V>[] nextTable; ForwardingNode(Node<K,V>[] tab) { super(MOVED, null, null, null); this.nextTable = tab; }
后面通过while循环来控制原数组的下标i来取出相应的链进行具体操作,通过控制advance表示是否进入循环进行相应的--i操作,第一次进入时,i的大小为原数组的大小-1,即指向原数组的最后一个元素。通过下标i取得原数组的元素:
如果该元素是null,那么直接把之前生成的ForwardingNode节点通过cas赋值在原数组的这一位置上,表示处理过,然后改变advance的值进入下一循环--i,取得下一元素;
如果该元素的hash为-1,说明已经处理过,那么改变advance的值进入下一循环--i,取得下一元素;
如果该元素是简单的链表类型的node,那么要开始在这条链上复制元素至新的node数组中。由于node数组大小是2的n次方,且每次扩容均是乘2,那么hash%新数组大小,要么在原位置i,要么在原位置i再加上原数组长度n即(i+n),于是通过判断hash与原数组大小相与是否为0( (ph & n) == 0),为0则原位置,为1则原位置再加上原数组长度的位置。
如果该元素是红黑树的节点,与上面情况类似,根据其hash与原本数组大小相与的结果来判断其在新的位置,拆成两颗红黑树,由于数目减少,所以需要树判断下数目是否小于8,应转化为链表。
操作完毕后,将原数组相应位置上赋值为ForwardingNode节点,表示操作完毕,告知其他线程该节点扩容情况。
遍历所有节点完毕后,我们看下收尾的代码。
int sc; if (finishing) { nextTable = null; table = nextTab; sizeCtl = (n << 1) - (n >>> 1); return; } if (U.compareAndSwapInt(this, SIZECTL, sc = sizeCtl, sc - 1)) { if ((sc - 2) != resizeStamp(n) << RESIZE_STAMP_SHIFT) return; finishing = advance = true; i = n; // recheck before commit }
这一轮循环finishing为false,进入到下一条件中,由于线程要离开,于是通过cas将sizeCtl减一,同时sc保留着原sizeCtl的值。若该线程是第一个到这里的线程,那么此时sc中保留着原sizeCtl的值还是进入扩容时设置的值,rs 左移16位加2,那么此时等号成立,于是finishing为true,且advance为true进入下一轮循环,i=n保证下轮循环能进入这块代码段。第二次循环直接进入到finishing里面,将map的table赋值为新的node数组,sizeCtl通过原大小左移一位减去右移一位的结果来表示默认负载因子下的容量。若第二个线程进入,由于第一个线程将sizeCtl-1,那么等号不成立,直接退出。
int rs = resizeStamp(n); if (sc < 0) { if ((sc >>> RESIZE_STAMP_SHIFT) != rs || sc == rs + 1 || sc == rs + MAX_RESIZERS || (nt = nextTable) == null || transferIndex <= 0) break; if (U.compareAndSwapInt(this, SIZECTL, sc, sc + 1)) transfer(tab, nt); } else if (U.compareAndSwapInt(this, SIZECTL, sc, (rs << RESIZE_STAMP_SHIFT) + 2)) transfer(tab, null);
我们看下上面的逻辑,它是对sc与rs的检验,sc的无符号右移16位与rs相等,说明其扩容是出于同一轮的扩容(从n -> 2n),同时sc+1代表扩容的线程数+1,并调用transfer(tab,nt)方法开始参与帮助扩容。
说明一下:这里通过rs跟sc的关系来保证是否处于同一轮扩容,通过sc的低16控制参与扩容的线程数,用数据关系保证仅有一个线程参与新的node数组的生成,多个线程参与节点的复制,通过ForwardingNode节点互相告知节点扩容情况。最后也仅保证一个线程参与map的table的赋值。实现很有意思,构造巧妙的sizeCtl,巧妙的位运算。
我们之前有看到put的时候,如果节点的hash为-1,那么处于扩容状态,于是调用helpTransfer()来帮助扩容。
final Node<K,V>[] helpTransfer(Node<K,V>[] tab, Node<K,V> f) { Node<K,V>[] nextTab; int sc; if (tab != null && (f instanceof ForwardingNode) && (nextTab = ((ForwardingNode<K,V>)f).nextTable) != null) { int rs = resizeStamp(tab.length); while (nextTab == nextTable && table == tab && (sc = sizeCtl) < 0) { if ((sc >>> RESIZE_STAMP_SHIFT) != rs || sc == rs + 1 || sc == rs + MAX_RESIZERS || transferIndex <= 0) break; if (U.compareAndSwapInt(this, SIZECTL, sc, sc + 1)) { transfer(tab, nextTab); break; } } return nextTab; } return table; }这块的逻辑基本上与addCount中的部分逻辑基本一样,需要实现的功能基本一样,只是参与的时机不同。
我们再来看下get()的逻辑
public V get(Object key) { Node<K,V>[] tab; Node<K,V> e, p; int n, eh; K ek; int h = spread(key.hashCode()); if ((tab = table) != null && (n = tab.length) > 0 && (e = tabAt(tab, (n - 1) & h)) != null) { if ((eh = e.hash) == h) { if ((ek = e.key) == key || (ek != null && key.equals(ek))) return e.val; } else if (eh < 0) return (p = e.find(h, key)) != null ? p.val : null; while ((e = e.next) != null) { if (e.hash == h && ((ek = e.key) == key || (ek != null && key.equals(ek)))) return e.val; } } return null; }先找到table[i]节点,然后比较链表头如果是则返回,如果该节点为红黑树,那么查找红黑树。如果是链表且链表头不是要找的,那么遍历链表查找。这部分并没有加锁,很显然在多线程操作的过程中,并不能完全的保证一致性。
有点之前没注意到,int hash = spread(key.hashCode()
static final int spread(int h) { return (h ^ (h >>> 16)) & HASH_BITS; }使用代价最小的异或操作降低散列在树结构中的碰撞,将较高的散列位数降低,最高位为0。算是一个小的技巧。