From 70c81c4cd532e86f1baca84f8a54a54f495c511e Mon Sep 17 00:00:00 2001 From: Charles Blake Date: Tue, 31 Mar 2020 09:25:35 -0400 Subject: [PATCH 1/2] Unwind just the "pseudorandom probing" (whole hash-code-keyed variable stride double hashing) part of recent sets & tables changes (which has still been causing bugs over a month later (e.g., two days ago https://github.com/nim-lang/Nim/issues/13794) as well as still having several "figure this out" implementation question comments in them (see just diffs of this PR). This topic has been discussed in many places: https://github.com/nim-lang/Nim/issues/13393 https://github.com/nim-lang/Nim/pull/13418 https://github.com/nim-lang/Nim/pull/13440 https://github.com/nim-lang/Nim/issues/13794 Alternative/non-mandatory stronger integer hashes (or vice-versa opt-in identity hashes) are a better solution that is more general (no illusion of one hard-coded sequence solving all problems) while retaining the virtues of linear probing such as cache obliviousness and age-less tables under delete-heavy workloads (still untested after a month of this change). The only real solution for truly adversarial keys is a hash keyed off of data unobservable to attackers. That all fits better with a few families of user-pluggable/define-switchable hashes which can be provided in a separate PR more about `hashes.nim`. This PR carefully preserves the better (but still hard coded!) probing of the `intsets` and other recent fixes like `move` annotations, hash order invariant tests, `intsets.missingOrExcl` fixing, and the move of `rightSize` into `hashcommon.nim`. --- lib/pure/collections/hashcommon.nim | 80 +++++----------------- lib/pure/collections/intsets.nim | 8 +-- lib/pure/collections/setimpl.nim | 28 +++++--- lib/pure/collections/sets.nim | 14 ++-- lib/pure/collections/sharedtables.nim | 4 +- lib/pure/collections/tableimpl.nim | 42 +++++++----- lib/pure/collections/tables.nim | 96 ++++++++------------------- tests/arc/trepr.nim | 2 +- 8 files changed, 99 insertions(+), 175 deletions(-) diff --git a/lib/pure/collections/hashcommon.nim b/lib/pure/collections/hashcommon.nim index bfc09ec3cc92..851b6cae94e4 100644 --- a/lib/pure/collections/hashcommon.nim +++ b/lib/pure/collections/hashcommon.nim @@ -18,87 +18,43 @@ when not defined(nimHasDefault): var v: T v -const freeMarker = 0 -const deletedMarker = -1 - -type UHash = uint - # hcode for real keys cannot be zero. hcode==0 signifies an empty slot. These # two procs retain clarity of that encoding without the space cost of an enum. -proc isFilledAndValid(hcode: Hash): bool {.inline.} = - result = hcode != 0 and hcode != deletedMarker - # performance: we could use bit magic if needed +proc isEmpty(hcode: Hash): bool {.inline.} = + result = hcode == 0 proc isFilled(hcode: Hash): bool {.inline.} = result = hcode != 0 - -proc translateBits(a: UHash, numBitsMask: int): UHash {.inline.} = - result = (a shr numBitsMask) or (a shl (UHash.sizeof * 8 - numBitsMask)) - -proc nextTry(h, maxHash: Hash, perturb: var UHash): Hash {.inline.} = - # FACTOR between hashcommon.nextTry, intsets.nextTry - # an optimization would be to use `(h + 1) and maxHash` for a few iterations - # and then switch to the formula below, to get "best of both worlds": good - # cache locality, except when a collision cluster is detected (ie, large number - # of iterations). - const PERTURB_SHIFT = 5 # consider tying this to `numBitsMask = fastLog2(t.dataLen)` - result = cast[Hash]((5*cast[uint](h) + 1 + perturb) and cast[uint](maxHash)) - perturb = perturb shr PERTURB_SHIFT +proc nextTry(h, maxHash: Hash): Hash {.inline.} = + result = (h + 1) and maxHash proc mustRehash[T](t: T): bool {.inline.} = - # FACTOR between hashcommon.mustRehash, intsets.mustRehash - let counter2 = t.counter + t.countDeleted - let length = t.dataLen - assert(length > counter2) - result = (length * 2 < counter2 * 3) or (length - counter2 < 4) # synchronize with `rightSize` + assert(t.data.len > t.counter) + result = (t.data.len * 2 < t.counter * 3) or (t.data.len - t.counter < 4) proc rightSize*(count: Natural): int {.inline.} = - ## Return the value of `initialSize` to support `count` items. + ## Return the value of ``initialSize`` to support ``count`` items. ## ## If more items are expected to be added, simply add that ## expected extra amount to the parameter before calling this. - ## - ## Internally, we want `mustRehash(t) == false` for t that was just resized. + # # Make sure to synchronize with `mustRehash` result = nextPowerOfTwo(count * 3 div 2 + 4) -template getPerturb(t: typed, hc: Hash): UHash = - # we can't use `fastLog2(dataLen(t))` because importing `bitops` would cause codegen errors - # so we use a practical value of half the bit width (eg 64 / 2 = 32 on 64bit machines) - let numBitsMask = sizeof(Hash) * 4 # ie, sizeof(Hash) * 8 / 2 - # this makes a major difference for cases like #13393; it causes the bits - # that were masked out in 1st position so they'll be masked in instead, and - # influence the recursion in nextTry earlier rather than later. - translateBits(cast[uint](hc), numBitsMask) - template rawGetKnownHCImpl() {.dirty.} = if t.dataLen == 0: return -1 var h: Hash = hc and maxHash(t) # start with real hash value - var perturb = t.getPerturb(hc) - var deletedIndex = -1 - while true: - if isFilledAndValid(t.data[h].hcode): - # Compare hc THEN key with boolean short circuit. This makes the common case - # zero ==key's for missing (e.g.inserts) and exactly one ==key for present. - # It does slow down succeeding lookups by one extra Hash cmp&and..usually - # just a few clock cycles, generally worth it for any non-integer-like A. - # performance: we optimize this: depending on type(key), skip hc comparison - if t.data[h].hcode == hc and t.data[h].key == key: - return h - h = nextTry(h, maxHash(t), perturb) - elif t.data[h].hcode == deletedMarker: - if deletedIndex == -1: - deletedIndex = h - h = nextTry(h, maxHash(t), perturb) - else: - break - if deletedIndex == -1: - result = -1 - h # < 0 => MISSING; insert idx = -1 - result - else: - # we prefer returning a (in fact the 1st found) deleted index - result = -1 - deletedIndex + while isFilled(t.data[h].hcode): + # Compare hc THEN key with boolean short circuit. This makes the common case + # zero ==key's for missing (e.g.inserts) and exactly one ==key for present. + # It does slow down succeeding lookups by one extra Hash cmp&and..usually + # just a few clock cycles, generally worth it for any non-integer-like A. + if t.data[h].hcode == hc and t.data[h].key == key: + return h + h = nextTry(h, maxHash(t)) + result = -1 - h # < 0 => MISSING; insert idx = -1 - result proc rawGetKnownHC[X, A](t: X, key: A, hc: Hash): int {.inline.} = rawGetKnownHCImpl() @@ -107,8 +63,6 @@ template genHashImpl(key, hc: typed) = hc = hash(key) if hc == 0: # This almost never taken branch should be very predictable. hc = 314159265 # Value doesn't matter; Any non-zero favorite is fine. - elif hc == deletedMarker: - hc = 214159261 template genHash(key: typed): Hash = var res: Hash diff --git a/lib/pure/collections/intsets.nim b/lib/pure/collections/intsets.nim index 1967a01498d0..eae3fa447126 100644 --- a/lib/pure/collections/intsets.nim +++ b/lib/pure/collections/intsets.nim @@ -46,20 +46,16 @@ type IntSet* = object ## An efficient set of `int` implemented as a sparse bit set. elems: int # only valid for small numbers counter, max: int - countDeleted: int head: PTrunk data: TrunkSeq a: array[0..33, int] # profiling shows that 34 elements are enough proc mustRehash[T](t: T): bool {.inline.} = - # FACTOR between hashcommon.mustRehash, intsets.mustRehash - let counter2 = t.counter + t.countDeleted let length = t.max + 1 - assert length > counter2 - result = (length * 2 < counter2 * 3) or (length - counter2 < 4) + assert length > t.counter + result = (length * 2 < t.counter * 3) or (length - t.counter < 4) proc nextTry(h, maxHash: Hash, perturb: var Hash): Hash {.inline.} = - # FACTOR between hashcommon.nextTry, intsets.nextTry const PERTURB_SHIFT = 5 var perturb2 = cast[uint](perturb) shr PERTURB_SHIFT perturb = cast[Hash](perturb2) diff --git a/lib/pure/collections/setimpl.nim b/lib/pure/collections/setimpl.nim index 9e68cb0722b4..d798cbcb3d6b 100644 --- a/lib/pure/collections/setimpl.nim +++ b/lib/pure/collections/setimpl.nim @@ -35,11 +35,10 @@ proc rawInsert[A](s: var HashSet[A], data: var KeyValuePairSeq[A], key: A, proc enlarge[A](s: var HashSet[A]) = var n: KeyValuePairSeq[A] - newSeq(n, s.counter.rightSize) + newSeq(n, len(s.data) * growthFactor) swap(s.data, n) # n is now old seq - s.countDeleted = 0 for i in countup(0, high(n)): - if isFilledAndValid(n[i].hcode): + if isFilled(n[i].hcode): var j = -1 - rawGetKnownHC(s, n[i].key, n[i].hcode) rawInsert(s, s.data, n[i].key, n[i].hcode, j) @@ -69,6 +68,11 @@ template containsOrInclImpl() {.dirty.} = rawInsert(s, s.data, key, hc, -1 - index) inc(s.counter) +template doWhile(a, b) = + while true: + b + if not a: break + proc exclImpl[A](s: var HashSet[A], key: A): bool {.inline.} = var hc: Hash var i = rawGet(s, key, hc) @@ -78,9 +82,17 @@ proc exclImpl[A](s: var HashSet[A], key: A): bool {.inline.} = if i >= 0: result = false dec(s.counter) - inc(s.countDeleted) - s.data[i].hcode = deletedMarker - s.data[i].key = default(type(s.data[i].key)) + while true: # KnuthV3 Algo6.4R adapted for i=i+1 instead of i=i-1 + var j = i # The correctness of this depends on (h+1) in nextTry, + var r = j # though may be adaptable to other simple sequences. + s.data[i].hcode = 0 # mark current EMPTY + s.data[i].key = default(type(s.data[i].key)) + doWhile((i >= r and r > j) or (r > j and j > i) or (j > i and i >= r)): + i = (i + 1) and msk # increment mod table size + if isEmpty(s.data[i].hcode): # end of collision cluster; So all done + return + r = s.data[i].hcode and msk # "home" location of key@i + s.data[j] = move(s.data[i]) # data[i] will be marked EMPTY next loop template dollarImpl() {.dirty.} = result = "{" @@ -113,7 +125,7 @@ proc enlarge[A](s: var OrderedSet[A]) = swap(s.data, n) while h >= 0: var nxt = n[h].next - if isFilled(n[h].hcode): # should be isFilledAndValid once tombstones are used + if isFilled(n[h].hcode): var j = -1 - rawGetKnownHC(s, n[h].key, n[h].hcode) rawInsert(s, s.data, n[h].key, n[h].hcode, j) h = nxt @@ -131,7 +143,7 @@ proc exclImpl[A](s: var OrderedSet[A], key: A): bool {.inline.} = result = true while h >= 0: var nxt = n[h].next - if isFilled(n[h].hcode): # should be isFilledAndValid once tombstones are used + if isFilled(n[h].hcode): if n[h].hcode == hc and n[h].key == key: dec s.counter result = false diff --git a/lib/pure/collections/sets.nim b/lib/pure/collections/sets.nim index caa25dbb31e2..8c04a05e3c85 100644 --- a/lib/pure/collections/sets.nim +++ b/lib/pure/collections/sets.nim @@ -68,7 +68,6 @@ type ## before calling other procs on it. data: KeyValuePairSeq[A] counter: int - countDeleted: int type OrderedKeyValuePair[A] = tuple[ @@ -81,7 +80,6 @@ type ## <#initOrderedSet,int>`_ before calling other procs on it. data: OrderedKeyValuePairSeq[A] counter, first, last: int - countDeleted: int const defaultInitialSize* = 64 @@ -249,7 +247,7 @@ iterator items*[A](s: HashSet[A]): A = ## echo b ## # --> {(a: 1, b: 3), (a: 0, b: 4)} for h in 0 .. high(s.data): - if isFilledAndValid(s.data[h].hcode): yield s.data[h].key + if isFilled(s.data[h].hcode): yield s.data[h].key proc containsOrIncl*[A](s: var HashSet[A], key: A): bool = ## Includes `key` in the set `s` and tells if `key` was already in `s`. @@ -341,7 +339,7 @@ proc pop*[A](s: var HashSet[A]): A = doAssertRaises(KeyError, echo s.pop) for h in 0 .. high(s.data): - if isFilledAndValid(s.data[h].hcode): + if isFilled(s.data[h].hcode): result = s.data[h].key excl(s, result) return result @@ -574,8 +572,7 @@ proc map*[A, B](data: HashSet[A], op: proc (x: A): B {.closure.}): HashSet[B] = proc hash*[A](s: HashSet[A]): Hash = ## Hashing of HashSet. for h in 0 .. high(s.data): - if isFilledAndValid(s.data[h].hcode): - result = result xor s.data[h].hcode + result = result xor s.data[h].hcode result = !$result proc `$`*[A](s: HashSet[A]): string = @@ -593,6 +590,7 @@ proc `$`*[A](s: HashSet[A]): string = ## # --> {no, esc'aping, is " provided} dollarImpl() + proc initSet*[A](initialSize = defaultInitialSize): HashSet[A] {.deprecated: "Deprecated since v0.20, use 'initHashSet'".} = initHashSet[A](initialSize) @@ -624,7 +622,7 @@ template forAllOrderedPairs(yieldStmt: untyped) {.dirty.} = var idx = 0 while h >= 0: var nxt = s.data[h].next - if isFilledAndValid(s.data[h].hcode): + if isFilled(s.data[h].hcode): yieldStmt inc(idx) h = nxt @@ -858,7 +856,7 @@ proc `==`*[A](s, t: OrderedSet[A]): bool = while h >= 0 and g >= 0: var nxh = s.data[h].next var nxg = t.data[g].next - if isFilledAndValid(s.data[h].hcode) and isFilledAndValid(t.data[g].hcode): + if isFilled(s.data[h].hcode) and isFilled(t.data[g].hcode): if s.data[h].key == t.data[g].key: inc compared else: diff --git a/lib/pure/collections/sharedtables.nim b/lib/pure/collections/sharedtables.nim index 27ac5e84f7e3..2a1c0543f3b3 100644 --- a/lib/pure/collections/sharedtables.nim +++ b/lib/pure/collections/sharedtables.nim @@ -25,7 +25,6 @@ type SharedTable*[A, B] = object ## generic hash SharedTable data: KeyValuePairSeq[A, B] counter, dataLen: int - countDeleted: int lock: Lock template maxHash(t): untyped = t.dataLen-1 @@ -50,10 +49,9 @@ proc enlarge[A, B](t: var SharedTable[A, B]) = for i in 0..= 0: dec(t.counter) - inc(t.countDeleted) - t.data[i].hcode = deletedMarker - t.data[i].key = default(type(t.data[i].key)) - t.data[i].val = default(type(t.data[i].val)) - # mustRehash + enlarge not needed because counter+countDeleted doesn't change + block outer: + while true: # KnuthV3 Algo6.4R adapted for i=i+1 instead of i=i-1 + var j = i # The correctness of this depends on (h+1) in nextTry, + var r = j # though may be adaptable to other simple sequences. + t.data[i].hcode = 0 # mark current EMPTY + t.data[i].key = default(type(t.data[i].key)) + t.data[i].val = default(type(t.data[i].val)) + while true: + i = (i + 1) and msk # increment mod table size + if isEmpty(t.data[i].hcode): # end of collision cluster; So all done + break outer + r = t.data[i].hcode and msk # "home" location of key@i + if not ((i >= r and r > j) or (r > j and j > i) or (j > i and i >= r)): + break + when defined(js): + t.data[j] = t.data[i] + else: + t.data[j] = move(t.data[i]) # data[j] will be marked EMPTY next loop template delImpl() {.dirty.} = var hc: Hash @@ -108,7 +115,6 @@ template clearImpl() {.dirty.} = t.counter = 0 template ctAnd(a, b): bool = - # pending https://github.com/nim-lang/Nim/issues/13502 when a: when b: true else: false @@ -126,7 +132,7 @@ template initImpl(result: typed, size: int) = result.last = -1 template insertImpl() = # for CountTable - checkIfInitialized() + if t.dataLen == 0: initImpl(t, defaultInitialSize) if mustRehash(t): enlarge(t) ctRawInsert(t, t.data, key, val) inc(t.counter) diff --git a/lib/pure/collections/tables.nim b/lib/pure/collections/tables.nim index b79e396fd176..131e0ad80360 100644 --- a/lib/pure/collections/tables.nim +++ b/lib/pure/collections/tables.nim @@ -233,7 +233,6 @@ type ## For creating an empty Table, use `initTable proc<#initTable,int>`_. data: KeyValuePairSeq[A, B] counter: int - countDeleted: int TableRef*[A, B] = ref Table[A, B] ## Ref version of `Table<#Table>`_. ## ## For creating a new empty TableRef, use `newTable proc @@ -266,16 +265,14 @@ template get(t, key): untyped = proc enlarge[A, B](t: var Table[A, B]) = var n: KeyValuePairSeq[A, B] - newSeq(n, t.counter.rightSize) + newSeq(n, len(t.data) * growthFactor) swap(t.data, n) - t.countDeleted = 0 for i in countup(0, high(n)): let eh = n[i].hcode - if isFilledAndValid(eh): + if isFilled(eh): var j: Hash = eh and maxHash(t) - var perturb = t.getPerturb(eh) while isFilled(t.data[j].hcode): - j = nextTry(j, maxHash(t), perturb) + j = nextTry(j, maxHash(t)) when defined(js): rawInsert(t, t.data, n[i].key, n[i].val, eh, j) else: @@ -662,7 +659,7 @@ iterator pairs*[A, B](t: Table[A, B]): (A, B) = ## # value: [1, 5, 7, 9] let L = len(t) for h in 0 .. high(t.data): - if isFilledAndValid(t.data[h].hcode): + if isFilled(t.data[h].hcode): yield (t.data[h].key, t.data[h].val) assert(len(t) == L, "the length of the table changed while iterating over it") @@ -684,7 +681,7 @@ iterator mpairs*[A, B](t: var Table[A, B]): (A, var B) = let L = len(t) for h in 0 .. high(t.data): - if isFilledAndValid(t.data[h].hcode): + if isFilled(t.data[h].hcode): yield (t.data[h].key, t.data[h].val) assert(len(t) == L, "the length of the table changed while iterating over it") @@ -705,7 +702,7 @@ iterator keys*[A, B](t: Table[A, B]): A = let L = len(t) for h in 0 .. high(t.data): - if isFilledAndValid(t.data[h].hcode): + if isFilled(t.data[h].hcode): yield t.data[h].key assert(len(t) == L, "the length of the table changed while iterating over it") @@ -726,7 +723,7 @@ iterator values*[A, B](t: Table[A, B]): B = let L = len(t) for h in 0 .. high(t.data): - if isFilledAndValid(t.data[h].hcode): + if isFilled(t.data[h].hcode): yield t.data[h].val assert(len(t) == L, "the length of the table changed while iterating over it") @@ -748,27 +745,10 @@ iterator mvalues*[A, B](t: var Table[A, B]): var B = let L = len(t) for h in 0 .. high(t.data): - if isFilledAndValid(t.data[h].hcode): + if isFilled(t.data[h].hcode): yield t.data[h].val assert(len(t) == L, "the length of the table changed while iterating over it") -template hasKeyOrPutCache(cache, h): bool = - # using `IntSet` would be an option worth considering to avoid quadratic - # behavior in case user misuses Table with lots of duplicate keys; but it - # has overhead in the common case of small number of duplicates. - # However: when lots of duplicates are used, all operations would be slow - # anyway because the full `hash(key)` is identical for these, which makes - # `nextTry` follow the exact same path for each key, resulting in large - # collision clusters. Alternatives could involve modifying the hash/retrieval - # based on duplicate key count. - var ret = false - for hi in cache: - if hi == h: - ret = true - break - if not ret: cache.add h - ret - iterator allValues*[A, B](t: Table[A, B]; key: A): B = ## Iterates over any value in the table ``t`` that belongs to the given ``key``. ## @@ -782,20 +762,13 @@ iterator allValues*[A, B](t: Table[A, B]; key: A): B = for i in 1..3: a.add('z', 10*i) doAssert toSeq(a.pairs).sorted == @[('a', 3), ('b', 5), ('z', 10), ('z', 20), ('z', 30)] doAssert sorted(toSeq(a.allValues('z'))) == @[10, 20, 30] - - let hc = genHash(key) - var h: Hash = hc and high(t.data) + var h: Hash = genHash(key) and high(t.data) let L = len(t) - var perturb = t.getPerturb(hc) - - var num = 0 - var cache: seq[Hash] - while isFilled(t.data[h].hcode): # `isFilledAndValid` would be incorrect, see test for `allValues` - if t.data[h].hcode == hc and t.data[h].key == key: - if not hasKeyOrPutCache(cache, h): - yield t.data[h].val - assert(len(t) == L, "the length of the table changed while iterating over it") - h = nextTry(h, high(t.data), perturb) + while isFilled(t.data[h].hcode): + if t.data[h].key == key: + yield t.data[h].val + assert(len(t) == L, "the length of the table changed while iterating over it") + h = nextTry(h, high(t.data)) @@ -1118,7 +1091,7 @@ iterator pairs*[A, B](t: TableRef[A, B]): (A, B) = ## # value: [1, 5, 7, 9] let L = len(t) for h in 0 .. high(t.data): - if isFilledAndValid(t.data[h].hcode): + if isFilled(t.data[h].hcode): yield (t.data[h].key, t.data[h].val) assert(len(t) == L, "the length of the table changed while iterating over it") @@ -1140,7 +1113,7 @@ iterator mpairs*[A, B](t: TableRef[A, B]): (A, var B) = let L = len(t) for h in 0 .. high(t.data): - if isFilledAndValid(t.data[h].hcode): + if isFilled(t.data[h].hcode): yield (t.data[h].key, t.data[h].val) assert(len(t) == L, "the length of the table changed while iterating over it") @@ -1161,7 +1134,7 @@ iterator keys*[A, B](t: TableRef[A, B]): A = let L = len(t) for h in 0 .. high(t.data): - if isFilledAndValid(t.data[h].hcode): + if isFilled(t.data[h].hcode): yield t.data[h].key assert(len(t) == L, "the length of the table changed while iterating over it") @@ -1182,7 +1155,7 @@ iterator values*[A, B](t: TableRef[A, B]): B = let L = len(t) for h in 0 .. high(t.data): - if isFilledAndValid(t.data[h].hcode): + if isFilled(t.data[h].hcode): yield t.data[h].val assert(len(t) == L, "the length of the table changed while iterating over it") @@ -1203,7 +1176,7 @@ iterator mvalues*[A, B](t: TableRef[A, B]): var B = let L = len(t) for h in 0 .. high(t.data): - if isFilledAndValid(t.data[h].hcode): + if isFilled(t.data[h].hcode): yield t.data[h].val assert(len(t) == L, "the length of the table changed while iterating over it") @@ -1229,8 +1202,6 @@ type ## <#initOrderedTable,int>`_. data: OrderedKeyValuePairSeq[A, B] counter, first, last: int - countDeleted: int - OrderedTableRef*[A, B] = ref OrderedTable[A, B] ## Ref version of ## `OrderedTable<#OrderedTable>`_. ## @@ -1252,7 +1223,7 @@ proc rawGet[A, B](t: OrderedTable[A, B], key: A, hc: var Hash): int = proc rawInsert[A, B](t: var OrderedTable[A, B], data: var OrderedKeyValuePairSeq[A, B], key: A, val: B, hc: Hash, h: Hash) = - rawInsertImpl(t) + rawInsertImpl() data[h].next = -1 if t.first < 0: t.first = h if t.last >= 0: data[t.last].next = h @@ -1260,20 +1231,18 @@ proc rawInsert[A, B](t: var OrderedTable[A, B], proc enlarge[A, B](t: var OrderedTable[A, B]) = var n: OrderedKeyValuePairSeq[A, B] - newSeq(n, t.counter.rightSize) + newSeq(n, len(t.data) * growthFactor) var h = t.first t.first = -1 t.last = -1 swap(t.data, n) - t.countDeleted = 0 while h >= 0: var nxt = n[h].next let eh = n[h].hcode - if isFilledAndValid(eh): + if isFilled(eh): var j: Hash = eh and maxHash(t) - var perturb = t.getPerturb(eh) while isFilled(t.data[j].hcode): - j = nextTry(j, maxHash(t), perturb) + j = nextTry(j, maxHash(t)) rawInsert(t, t.data, move n[h].key, move n[h].val, n[h].hcode, j) h = nxt @@ -1282,10 +1251,6 @@ template forAllOrderedPairs(yieldStmt: untyped) {.dirty.} = var h = t.first while h >= 0: var nxt = t.data[h].next - # For OrderedTable/OrderedTableRef, isFilled is ok because `del` is O(n) - # and doesn't create tombsones, but if it does start using tombstones, - # carefully replace `isFilled` by `isFilledAndValid` as appropriate for these - # table types only, ditto with `OrderedSet`. if isFilled(t.data[h].hcode): yieldStmt h = nxt @@ -2229,7 +2194,6 @@ type ## <#initCountTable,int>`_. data: seq[tuple[key: A, val: int]] counter: int - countDeleted: int isSorted: bool CountTableRef*[A] = ref CountTable[A] ## Ref version of ## `CountTable<#CountTable>`_. @@ -2242,10 +2206,8 @@ type proc ctRawInsert[A](t: CountTable[A], data: var seq[tuple[key: A, val: int]], key: A, val: int) = - let hc = hash(key) - var perturb = t.getPerturb(hc) - var h: Hash = hc and high(data) - while data[h].val != 0: h = nextTry(h, high(data), perturb) # TODO: handle deletedMarker + var h: Hash = hash(key) and high(data) + while data[h].val != 0: h = nextTry(h, high(data)) data[h].key = key data[h].val = val @@ -2272,12 +2234,10 @@ proc remove[A](t: var CountTable[A], key: A) = proc rawGet[A](t: CountTable[A], key: A): int = if t.data.len == 0: return -1 - let hc = hash(key) - var perturb = t.getPerturb(hc) - var h: Hash = hc and high(t.data) # start with real hash value - while t.data[h].val != 0: # TODO: may need to handle t.data[h].hcode == deletedMarker? + var h: Hash = hash(key) and high(t.data) # start with real hash value + while t.data[h].val != 0: if t.data[h].key == key: return h - h = nextTry(h, high(t.data), perturb) + h = nextTry(h, high(t.data)) result = -1 - h # < 0 => MISSING; insert idx = -1 - result template ctget(t, key, default: untyped): untyped = diff --git a/tests/arc/trepr.nim b/tests/arc/trepr.nim index 391622ff6d04..7a92112eda25 100644 --- a/tests/arc/trepr.nim +++ b/tests/arc/trepr.nim @@ -1,7 +1,7 @@ discard """ cmd: "nim c --gc:arc $file" nimout: '''(a: true, n: doAssert) -Table[system.string, trepr.MyType](data: @[], counter: 0, countDeleted: 0) +Table[system.string, trepr.MyType](data: @[], counter: 0) nil ''' """ From 0433f4bec04eff183426be60a4361dd932ff5ebe Mon Sep 17 00:00:00 2001 From: Charles Blake Date: Tue, 31 Mar 2020 10:10:16 -0400 Subject: [PATCH 2/2] Fix `data.len` -> `dataLen` problem. --- lib/pure/collections/hashcommon.nim | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/pure/collections/hashcommon.nim b/lib/pure/collections/hashcommon.nim index 851b6cae94e4..e998145e7202 100644 --- a/lib/pure/collections/hashcommon.nim +++ b/lib/pure/collections/hashcommon.nim @@ -30,8 +30,8 @@ proc nextTry(h, maxHash: Hash): Hash {.inline.} = result = (h + 1) and maxHash proc mustRehash[T](t: T): bool {.inline.} = - assert(t.data.len > t.counter) - result = (t.data.len * 2 < t.counter * 3) or (t.data.len - t.counter < 4) + assert(t.dataLen > t.counter) + result = (t.dataLen * 2 < t.counter * 3) or (t.dataLen - t.counter < 4) proc rightSize*(count: Natural): int {.inline.} = ## Return the value of ``initialSize`` to support ``count`` items.