From 12417e7bfddde01cd0f353f2a9bcd79197bd35c7 Mon Sep 17 00:00:00 2001 From: Folkvir Date: Mon, 3 Jan 2022 14:09:39 +0100 Subject: [PATCH] move all hash related functions to the BaseFilter class --- src/base-filter.ts | 214 ++++++++++++++++++++- src/bloom/bloom-filter.ts | 18 +- src/bloom/counting-bloom-filter.ts | 23 ++- src/bloom/partitioned-bloom-filter.ts | 16 +- src/bloom/xor-filter.ts | 23 ++- src/cuckoo/cuckoo-filter.ts | 14 +- src/iblt/cell.ts | 4 +- src/iblt/invertible-bloom-lookup-tables.ts | 21 +- src/sketch/count-min-sketch.ts | 16 +- src/sketch/hyperloglog.ts | 4 +- src/utils.ts | 214 +-------------------- 11 files changed, 306 insertions(+), 261 deletions(-) diff --git a/src/base-filter.ts b/src/base-filter.ts index 56dbbda..babe6c8 100644 --- a/src/base-filter.ts +++ b/src/base-filter.ts @@ -24,8 +24,17 @@ SOFTWARE. 'use strict' -import * as utils from './utils' import seedrandom from 'seedrandom' +import XXH from 'xxhashjs' +import { + doubleHashing, + getDefaultSeed, + HashableInput, + numberToHex, + TwoHashes, + TwoHashesIntAndString, + TwoHashesTemplated, +} from './utils' /** * Exported prng type because it is not from seedrandom @@ -49,14 +58,14 @@ export default abstract class BaseFilter { private _rng: prng constructor() { - this._seed = utils.getDefaultSeed() + this._seed = getDefaultSeed() this._rng = seedrandom(`${this._seed}`) as prng } /** * Get the seed used in this structure */ - get seed(): number { + public get seed(): number { return this._seed } @@ -64,7 +73,7 @@ export default abstract class BaseFilter { * Set the seed for this structure * @param seed the new seed that will be used in this structure */ - set seed(seed: number) { + public set seed(seed: number) { this._seed = seed this._rng = seedrandom(`${this._seed}`) as prng } @@ -73,7 +82,7 @@ export default abstract class BaseFilter { * Get a function used to draw random number * @return A factory function used to draw random integer */ - get random(): prng { + public get random(): prng { return this._rng } @@ -81,14 +90,14 @@ export default abstract class BaseFilter { * Return a next random seeded int32 integer * @returns */ - nextInt32(): number { + public nextInt32(): number { return this._rng.int32() } /** * Save the current structure as a JSON object */ - saveAsJSON(): Object { + public saveAsJSON(): Object { throw new Error('not-implemented') } @@ -98,7 +107,196 @@ export default abstract class BaseFilter { * @return Return the Object loaded from the provided JSON object */ // eslint-disable-next-line @typescript-eslint/no-explicit-any - static fromJSON(json: JSON): any { + public static fromJSON(json: JSON): any { throw new Error(`not-implemented: ${json}`) } + + /** + * Generate a set of distinct indexes on interval [0, size) using the double hashing technique + * For generating efficiently distinct indexes we re-hash after detecting a cycle by changing slightly the seed. + * It has the effect of generating faster distinct indexes without loosing entirely the utility of the double hashing. + * For small number of indexes it will work perfectly. For a number close to the size, and size very large + * Advise: do not generate `size` indexes for a large interval. In practice, size should be equal + * to the number of hash functions used and is often low. + * + * @param element - The element to hash + * @param size - the range on which we can generate an index [0, size) = size + * @param number - The number of indexes desired + * @param seed - The seed used + * @return Array + * @author Arnaud Grall + * @author Simon Woolf (SimonWoolf) + */ + protected _getDistinctIndexes( + element: HashableInput, + size: number, + number: number, + seed?: number + ): Array { + if (seed === undefined) { + seed = getDefaultSeed() + } + let n = 0 + const indexes: Set = new Set() + let hashes = this._hashTwice(element, seed) + // let cycle = 0 + while (indexes.size < number) { + const ind = hashes.first % size + if (!indexes.has(ind)) { + indexes.add(ind) + } + hashes.first = (hashes.first + hashes.second) % size + hashes.second = (hashes.second + n) % size + n++ + + if (n > size) { + // Enhanced double hashing stops cycles of length less than `size` in the case where + // size is coprime with the second hash. But you still get cycles of length `size`. + // So if we reach there and haven't finished, append a prime to the input and + // rehash. + seed++ + hashes = this._hashTwice(element, seed) + } + } + return [...indexes.values()] + } + + /** + * Generate N indexes on range [0, size) + * It uses the double hashing technique to generate the indexes. + * It hash twice the value only once before generating the indexes. + * Warning: you can have a lot of modulo collisions. + * @param element - The element to hash + * @param size - The range on which we can generate the index, exclusive + * @param hashCount - The number of indexes we want + * @return An array of indexes on range [0, size) + */ + protected _getIndexes( + element: HashableInput, + size: number, + hashCount: number, + seed?: number + ): Array { + if (seed === undefined) { + seed = getDefaultSeed() + } + const arr = [] + const hashes = this._hashTwice(element, seed) + for (let i = 0; i < hashCount; i++) { + arr.push(doubleHashing(i, hashes.first, hashes.second, size)) + } + return arr + } + + /** + * @public + * @internal + * Hash an element of type {@link HashableInput} into {@link Number} + * Can be overrided as long as you return a value of type {@link Number} + * Don't forget to use the seed when hashing, otherwise if some kind of randomness is in the process + * you may have inconsistent behaviors between 2 runs. + * @param element + * @param seed + * @returns A 64bits floating point {@link Number} + */ + protected _serialize(element: HashableInput, seed?: number) { + if (!seed) { + seed = getDefaultSeed() + } + return Number(XXH.h64(element, seed).toNumber()) + } + + /** + * @private + * @internal + * (64-bits only) Hash a value into two values (in hex or integer format) + * @param value - The value to hash + * @param asInt - (optional) If True, the values will be returned as an integer. Otherwise, as hexadecimal values. + * @param seed the seed used for hashing + * @return The results of the hash functions applied to the value (in hex or integer) + * @author Arnaud Grall & Thomas Minier + */ + protected _hashTwice(value: HashableInput, seed?: number): TwoHashes { + if (seed === undefined) { + seed = getDefaultSeed() + } + return { + first: this._serialize(value, seed + 1), + second: this._serialize(value, seed + 2), + } + } + + /** + * Hash twice an element into their HEX string representations + * @param value + * @param seed + * @returns TwoHashesTemplated + */ + protected _hashTwiceAsString( + value: HashableInput, + seed?: number + ): TwoHashesTemplated { + const {first, second} = this._hashTwice(value, seed) + return { + first: numberToHex(first), + second: numberToHex(second), + } + } + + /** + * (64-bits only) Same as hashTwice but return Numbers and String equivalent + * @param val the value to hash + * @param seed the seed to change when hashing + * @return TwoHashesIntAndString + * @author Arnaud Grall + */ + protected _hashTwiceIntAndString( + val: HashableInput, + seed?: number + ): TwoHashesIntAndString { + if (seed === undefined) { + seed = getDefaultSeed() + } + const one = this._hashIntAndString(val, seed + 1) + const two = this._hashIntAndString(val, seed + 2) + return { + int: { + first: one.int, + second: two.int, + }, + string: { + first: one.string, + second: two.string, + }, + } + } + + /** + * Hash an item as an unsigned int + * @param elem - Element to hash + * @param seed - The hash seed. If its is UINT32 make sure to set the length to 32 + * @param length - The length of hashes (defaults to 32 bits) + * @return The hash value as an unsigned int + * @author Arnaud Grall + */ + protected _hashAsInt(elem: HashableInput, seed?: number): number { + if (seed === undefined) { + seed = getDefaultSeed() + } + return this._serialize(elem, seed) + } + + /** + * Hash an item and return its number and HEX string representation + * @param elem - Element to hash + * @param seed - The hash seed. If its is UINT32 make sure to set the length to 32 + * @param base - The base in which the string will be returned, default: 16 + * @param length - The length of hashes (defaults to 32 bits) + * @return The item hased as an int and a string + * @author Arnaud Grall + */ + protected _hashIntAndString(elem: HashableInput, seed?: number) { + const hash = this._hashAsInt(elem, seed) + return {int: hash, string: numberToHex(hash)} + } } diff --git a/src/bloom/bloom-filter.ts b/src/bloom/bloom-filter.ts index 35f37c5..073dcca 100644 --- a/src/bloom/bloom-filter.ts +++ b/src/bloom/bloom-filter.ts @@ -29,7 +29,7 @@ import BaseFilter from '../base-filter' import BitSet from './bit-set' import {AutoExportable, Field, Parameter} from '../exportable' import {optimalFilterSize, optimalHashes} from '../formulas' -import {HashableInput, getIndexes} from '../utils' +import {HashableInput} from '../utils' /** * A Bloom filter is a space-efficient probabilistic data structure, conceived by Burton Howard Bloom in 1970, @@ -83,7 +83,7 @@ export default class BloomFilter static create(nbItems: number, errorRate: number): BloomFilter { const size = optimalFilterSize(nbItems, errorRate) const hashes = optimalHashes(size, nbItems) - return new BloomFilter(size, hashes) + return new this(size, hashes) } /** @@ -138,7 +138,12 @@ export default class BloomFilter * ``` */ add(element: HashableInput): void { - const indexes = getIndexes(element, this._size, this._nbHashes, this.seed) + const indexes = this._getIndexes( + element, + this._size, + this._nbHashes, + this.seed + ) for (let i = 0; i < indexes.length; i++) { this._filter.add(indexes[i]) } @@ -157,7 +162,12 @@ export default class BloomFilter * ``` */ has(element: HashableInput): boolean { - const indexes = getIndexes(element, this._size, this._nbHashes, this.seed) + const indexes = this._getIndexes( + element, + this._size, + this._nbHashes, + this.seed + ) for (let i = 0; i < indexes.length; i++) { if (!this._filter.has(indexes[i])) { return false diff --git a/src/bloom/counting-bloom-filter.ts b/src/bloom/counting-bloom-filter.ts index e98b72a..bffc261 100644 --- a/src/bloom/counting-bloom-filter.ts +++ b/src/bloom/counting-bloom-filter.ts @@ -28,7 +28,7 @@ import BaseFilter from '../base-filter' import WritableFilter from '../interfaces/writable-filter' import {AutoExportable, Field, Parameter} from '../exportable' import {optimalFilterSize, optimalHashes} from '../formulas' -import {HashableInput, allocateArray, getIndexes} from '../utils' +import {HashableInput, allocateArray} from '../utils' /** * A Counting Bloom filter works in a similar manner as a regular Bloom filter; however, it is able to keep track of insertions and deletions. In a counting Bloom filter, each entry in the Bloom filter is a small counter associated with a basic Bloom filter bit. @@ -128,7 +128,12 @@ export default class CountingBloomFilter * ``` */ add(element: HashableInput): void { - const indexes = getIndexes(element, this._size, this._nbHashes, this.seed) + const indexes = this._getIndexes( + element, + this._size, + this._nbHashes, + this.seed + ) for (let i = 0; i < indexes.length; i++) { // increment counter this._filter[indexes[i]][1] += 1 @@ -150,7 +155,12 @@ export default class CountingBloomFilter * ``` */ remove(element: HashableInput): boolean { - const indexes = getIndexes(element, this._size, this._nbHashes, this.seed) + const indexes = this._getIndexes( + element, + this._size, + this._nbHashes, + this.seed + ) const success = true for (let i = 0; i < indexes.length; i++) { // decrement counter @@ -177,7 +187,12 @@ export default class CountingBloomFilter * ``` */ has(element: HashableInput): boolean { - const indexes = getIndexes(element, this._size, this._nbHashes, this.seed) + const indexes = this._getIndexes( + element, + this._size, + this._nbHashes, + this.seed + ) for (let i = 0; i < indexes.length; i++) { if (!this._filter[indexes[i]][0]) { return false diff --git a/src/bloom/partitioned-bloom-filter.ts b/src/bloom/partitioned-bloom-filter.ts index 6f947ed..1d0ae42 100644 --- a/src/bloom/partitioned-bloom-filter.ts +++ b/src/bloom/partitioned-bloom-filter.ts @@ -27,7 +27,7 @@ SOFTWARE. import BaseFilter from '../base-filter' import ClassicFilter from '../interfaces/classic-filter' import {AutoExportable, Field, Parameter} from '../exportable' -import {HashableInput, allocateArray, getIndexes} from '../utils' +import {HashableInput, allocateArray} from '../utils' /** * Return the optimal number of hashes needed for a given error rate and load factor @@ -222,7 +222,12 @@ export default class PartitionedBloomFilter * ``` */ add(element: HashableInput): void { - const indexes = getIndexes(element, this._m, this._nbHashes, this.seed) + const indexes = this._getIndexes( + element, + this._m, + this._nbHashes, + this.seed + ) for (let i = 0; i < this._nbHashes; i++) { this._filter[i][indexes[i]] = 1 } @@ -242,7 +247,12 @@ export default class PartitionedBloomFilter * ``` */ has(element: HashableInput): boolean { - const indexes = getIndexes(element, this._m, this._nbHashes, this.seed) + const indexes = this._getIndexes( + element, + this._m, + this._nbHashes, + this.seed + ) for (let i = 0; i < this._nbHashes; i++) { if (!this._filter[i][indexes[i]]) { return false diff --git a/src/bloom/xor-filter.ts b/src/bloom/xor-filter.ts index 47f531d..37aa8a1 100644 --- a/src/bloom/xor-filter.ts +++ b/src/bloom/xor-filter.ts @@ -26,7 +26,7 @@ SOFTWARE. import BaseFilter from '../base-filter' import {AutoExportable, Field, Parameter} from '../exportable' -import {HashableInput, allocateArray} from '../utils' +import {HashableInput, allocateArray, BufferError} from '../utils' import XXH from 'xxhashjs' import Long from 'long' import {encode, decode} from 'base64-arraybuffer' @@ -35,6 +35,11 @@ const CONSTANTS = new Map() CONSTANTS.set(8, 0xff) CONSTANTS.set(16, 0xffff) +/** + * Extended HashableInput type adding the Long type from the long package for using plain 64-bits number. + */ +export type XorHashableInput = HashableInput | Long + /** * XOR-Filter for 8-bits and 16-bits fingerprint length. * @@ -98,6 +103,10 @@ export default class XorFilter extends BaseFilter { @Parameter('_bits') bits_per_fingerprint?: 8 | 16 ) { super() + // try to use the Buffer class or reject by throwing an error + if (!Buffer) { + throw new Error(BufferError) + } if (bits_per_fingerprint) { if (!this.ALLOWED_FINGERPRINT_SIZES.includes(bits_per_fingerprint)) { throw new Error( @@ -124,9 +133,11 @@ export default class XorFilter extends BaseFilter { * @param element * @returns */ - public has(element: HashableInput): boolean { + public has(element: XorHashableInput): boolean { const hash = this._hash64( - this._hashable_to_long(element, this.seed), + element instanceof Long + ? element + : this._hashable_to_long(element, this.seed), this.seed ) const fingerprint = this._fingerprint(hash).toInt() @@ -155,7 +166,7 @@ export default class XorFilter extends BaseFilter { * xor.has('bob') // false * ``` */ - add(elements: HashableInput[]) { + add(elements: XorHashableInput[]) { if (elements.length !== this._size) { throw new Error( `This filter has been created for exactly ${this._size} elements` @@ -202,7 +213,7 @@ export default class XorFilter extends BaseFilter { * @returns */ public static create( - elements: HashableInput[], + elements: XorHashableInput[], bits_per_fingerprint?: 8 | 16 ): XorFilter { const a = new XorFilter(elements.length, bits_per_fingerprint) @@ -338,7 +349,7 @@ export default class XorFilter extends BaseFilter { * @param arraylength length of the filter * @returns */ - private _create(elements: HashableInput[], arrayLength: number) { + private _create(elements: XorHashableInput[], arrayLength: number) { const reverseOrder: Long[] = allocateArray(this._size, Long.ZERO) const reverseH: number[] = allocateArray(this._size, 0) let reverseOrderPos diff --git a/src/cuckoo/cuckoo-filter.ts b/src/cuckoo/cuckoo-filter.ts index bbfef3e..d927d40 100644 --- a/src/cuckoo/cuckoo-filter.ts +++ b/src/cuckoo/cuckoo-filter.ts @@ -28,13 +28,7 @@ import WritableFilter from '../interfaces/writable-filter' import BaseFilter from '../base-filter' import Bucket from './bucket' import {Exportable, cloneObject} from '../exportable' -import { - HashableInput, - allocateArray, - hashAsInt, - hashIntAndString, - randomInt, -} from '../utils' +import {HashableInput, allocateArray, randomInt} from '../utils' /** * Compute the optimal fingerprint length in bytes for a given bucket size @@ -264,7 +258,7 @@ export default class CuckooFilter this._filter[index].set(rndIndex, movedElement) movedElement = tmp! // movedElement = this._filter[index].set(rndswapRandom(movedElement, this._rng) - const newHash = hashAsInt(movedElement!, this.seed) + const newHash = this._hashAsInt(movedElement!, this.seed) index = Math.abs(index ^ Math.abs(newHash)) % this._filter.length // add the moved element to the bucket if possible if (this._filter[index].isFree()) { @@ -375,7 +369,7 @@ export default class CuckooFilter * @private */ _locations(element: HashableInput) { - const hashes = hashIntAndString(element, this.seed) + const hashes = this._hashIntAndString(element, this.seed) const hash = hashes.int if (this._fingerprintLength > hashes.string.length) { throw new Error( @@ -384,7 +378,7 @@ export default class CuckooFilter } const fingerprint = hashes.string.substring(0, this._fingerprintLength) const firstIndex = Math.abs(hash) - const secondHash = Math.abs(hashAsInt(fingerprint, this.seed)) + const secondHash = Math.abs(this._hashAsInt(fingerprint, this.seed)) const secondIndex = Math.abs(firstIndex ^ secondHash) const res = { fingerprint, diff --git a/src/iblt/cell.ts b/src/iblt/cell.ts index bd9a99a..d4e7b9e 100644 --- a/src/iblt/cell.ts +++ b/src/iblt/cell.ts @@ -24,7 +24,7 @@ SOFTWARE. 'use strict' -import {hashTwiceAsString, xorBuffer} from '../utils' +import {xorBuffer} from '../utils' import {AutoExportable, Field, Parameter} from '../exportable' import BaseFilter from '../base-filter' @@ -166,7 +166,7 @@ export default class Cell extends BaseFilter { return false } // compare the hashes - const hashes = hashTwiceAsString( + const hashes = this._hashTwiceAsString( JSON.stringify(this._idSum.toJSON()), this.seed ) diff --git a/src/iblt/invertible-bloom-lookup-tables.ts b/src/iblt/invertible-bloom-lookup-tables.ts index d95e165..378269e 100644 --- a/src/iblt/invertible-bloom-lookup-tables.ts +++ b/src/iblt/invertible-bloom-lookup-tables.ts @@ -28,7 +28,7 @@ import BaseFilter from '../base-filter' import WritableFilter from '../interfaces/writable-filter' import Cell from './cell' import {AutoExportable, Field, Parameter} from '../exportable' -import {allocateArray, getDistinctIndexes, hashTwiceAsString} from '../utils' +import {allocateArray} from '../utils' import {optimalFilterSize, optimalHashes} from '../formulas' /** @@ -169,11 +169,11 @@ export default class InvertibleBloomFilter * @param element - The element to insert */ add(element: Buffer): void { - const hashes = hashTwiceAsString( + const hashes = this._hashTwiceAsString( JSON.stringify(element.toJSON()), this.seed ) - const indexes = getDistinctIndexes( + const indexes = this._getDistinctIndexes( hashes.first, this._size, this._hashCount, @@ -190,11 +190,11 @@ export default class InvertibleBloomFilter * @return True if the element has been removed, False otheriwse */ remove(element: Buffer): boolean { - const hashes = hashTwiceAsString( + const hashes = this._hashTwiceAsString( JSON.stringify(element.toJSON()), this.seed ) - const indexes = getDistinctIndexes( + const indexes = this._getDistinctIndexes( hashes.first, this._size, this._hashCount, @@ -214,11 +214,11 @@ export default class InvertibleBloomFilter * @return False if the element is not in the filter, true if "may be" in the filter. */ has(element: Buffer): boolean { - const hashes = hashTwiceAsString( + const hashes = this._hashTwiceAsString( JSON.stringify(element.toJSON()), this.seed ) - const indexes = getDistinctIndexes( + const indexes = this._getDistinctIndexes( hashes.first, this._size, this._hashCount, @@ -337,8 +337,11 @@ export default class InvertibleBloomFilter } else { throw new Error('Please report, not possible') } - const hashes = hashTwiceAsString(JSON.stringify(id.toJSON()), this.seed) - const indexes = getDistinctIndexes( + const hashes = this._hashTwiceAsString( + JSON.stringify(id.toJSON()), + this.seed + ) + const indexes = this._getDistinctIndexes( hashes.first, this._size, this._hashCount, diff --git a/src/sketch/count-min-sketch.ts b/src/sketch/count-min-sketch.ts index dc69f3b..e46bdf6 100644 --- a/src/sketch/count-min-sketch.ts +++ b/src/sketch/count-min-sketch.ts @@ -27,7 +27,7 @@ SOFTWARE. import BaseFilter from '../base-filter' import CountingFilter from '../interfaces/counting-filter' import {AutoExportable, Field, Parameter} from '../exportable' -import {allocateArray, getIndexes, HashableInput} from '../utils' +import {allocateArray, HashableInput} from '../utils' /** * The count–min sketch (CM sketch) is a probabilistic data structure that serves as a frequency table of events in a stream of data. @@ -130,7 +130,12 @@ export default class CountMinSketch */ update(element: HashableInput, count = 1): void { this._allSums += count - const indexes = getIndexes(element, this._columns, this._rows, this.seed) + const indexes = this._getIndexes( + element, + this._columns, + this._rows, + this.seed + ) for (let i = 0; i < this._rows; i++) { this._matrix[i][indexes[i]] += count } @@ -143,7 +148,12 @@ export default class CountMinSketch */ count(element: HashableInput): number { let min = Infinity - const indexes = getIndexes(element, this._columns, this._rows, this.seed) + const indexes = this._getIndexes( + element, + this._columns, + this._rows, + this.seed + ) for (let i = 0; i < this._rows; i++) { const v = this._matrix[i][indexes[i]] min = Math.min(v, min) diff --git a/src/sketch/hyperloglog.ts b/src/sketch/hyperloglog.ts index e1d4793..cfec819 100644 --- a/src/sketch/hyperloglog.ts +++ b/src/sketch/hyperloglog.ts @@ -24,7 +24,7 @@ SOFTWARE. import BaseFilter from '../base-filter' import {AutoExportable, Field, Parameter} from '../exportable' -import {HashableInput, allocateArray, hashAsInt} from '../utils' +import {HashableInput, allocateArray} from '../utils' // 2^32, computed as a constant as we use it a lot in the HyperLogLog algorithm const TWO_POW_32 = Math.pow(2, 32) @@ -104,7 +104,7 @@ export default class HyperLogLog extends BaseFilter { */ update(element: HashableInput): void { // const hashedValue = Buffer.from(hashAsString(element, this.seed)) - const hashedValue = hashAsInt(element, this.seed).toString(2) + const hashedValue = this._hashAsInt(element, this.seed).toString(2) const registerIndex = 1 + parseInt(hashedValue.slice(0, this._nbBytesPerHash - 1), 2) // find the left most 1-bit in the second part of the buffer diff --git a/src/utils.ts b/src/utils.ts index f561bb0..c6767e9 100644 --- a/src/utils.ts +++ b/src/utils.ts @@ -24,8 +24,6 @@ SOFTWARE. 'use strict' -import XXH from 'xxhashjs' - /** * Utilitaries functions * @namespace Utils @@ -66,44 +64,10 @@ export interface TwoHashesIntAndString { export type HashableInput = string | ArrayBuffer | Buffer /** - * @internal - * Internal variable for switching XXH hash function from/to 32/64 bits type. - * Can be overrided as long as you respect the XXH.HashInterface type. - * Only .toNumber() is used in the package. see {@link serialize} - */ -let serialize_function: XXH.HashInterface = XXH.h64 - -/** - * Allow to switch the hash function between XXH.h32 or XXH.h64 bits ({@link serialize_function}) - * @param base 32 or 64 by default - * @returns + * BufferError */ -export function switchSerializationType(base = 64) { - switch (base) { - case 32: - serialize_function = XXH.h32 - break - case 64: - default: - serialize_function = XXH.h64 - } -} - -/** - * Hash an element of type {@link HashableInput} into {@link Number} - * Can be overrided as long as you return a value of type {@link Number} - * Don't forget to use the seed when hashing, otherwise if some kind of randomness is in the process - * you may have inconsistent behaviors between 2 runs. - * @param element - * @param seed - * @returns A 64bits floating point {@link Number} - */ -export function serialize(element: HashableInput, seed?: number) { - if (!seed) { - seed = getDefaultSeed() - } - return Number(serialize_function(element, seed).toNumber()) -} +export const BufferError = + 'The buffer class must be available, if you are a browser user use the buffer package (https://www.npmjs.com/package/buffer)' /** * Create a new array fill with a base value @@ -132,7 +96,7 @@ export function allocateArray( * @param elem the element to transform in HEX * @returns the HEX number padded of zeroes */ -function numberToHex(elem: number): string { +export function numberToHex(elem: number): string { let e = Number(elem).toString(16) if (e.length % 4 !== 0) { e = '0'.repeat(4 - (e.length % 4)) + e @@ -140,99 +104,6 @@ function numberToHex(elem: number): string { return e } -/** - * (64-bits only) Hash a value into two values (in hex or integer format) - * @param value - The value to hash - * @param asInt - (optional) If True, the values will be returned as an integer. Otherwise, as hexadecimal values. - * @param seed the seed used for hashing - * @return The results of the hash functions applied to the value (in hex or integer) - * @memberof Utils - * @author Arnaud Grall & Thomas Minier - */ -export function hashTwice(value: HashableInput, seed?: number): TwoHashes { - if (seed === undefined) { - seed = getDefaultSeed() - } - return { - first: serialize(value, seed + 1), - second: serialize(value, seed + 2), - } -} - -/** - * Hash twice an element into their HEX string representations - * @param value - * @param seed - * @returns TwoHashesTemplated - */ -export function hashTwiceAsString( - value: HashableInput, - seed?: number -): TwoHashesTemplated { - const {first, second} = hashTwice(value, seed) - return { - first: numberToHex(first), - second: numberToHex(second), - } -} - -/** - * (64-bits only) Same as hashTwice but return Numbers and String equivalent - * @param val the value to hash - * @param seed the seed to change when hashing - * @return TwoHashesIntAndString - * @author Arnaud Grall - */ -export function HashTwiceIntAndString( - val: HashableInput, - seed?: number -): TwoHashesIntAndString { - if (seed === undefined) { - seed = getDefaultSeed() - } - const one = hashIntAndString(val, seed + 1) - const two = hashIntAndString(val, seed + 2) - return { - int: { - first: one.int, - second: two.int, - }, - string: { - first: one.string, - second: two.string, - }, - } -} - -/** - * Hash an item as an unsigned int - * @param elem - Element to hash - * @param seed - The hash seed. If its is UINT32 make sure to set the length to 32 - * @param length - The length of hashes (defaults to 32 bits) - * @return The hash value as an unsigned int - * @author Arnaud Grall - */ -export function hashAsInt(elem: HashableInput, seed?: number): number { - if (seed === undefined) { - seed = getDefaultSeed() - } - return serialize(elem, seed) -} - -/** - * Hash an item and return its number and HEX string representation - * @param elem - Element to hash - * @param seed - The hash seed. If its is UINT32 make sure to set the length to 32 - * @param base - The base in which the string will be returned, default: 16 - * @param length - The length of hashes (defaults to 32 bits) - * @return The item hased as an int and a string - * @author Arnaud Grall - */ -export function hashIntAndString(elem: HashableInput, seed?: number) { - const hash = hashAsInt(elem, seed) - return {int: hash, string: numberToHex(hash)} -} - /** * Apply enhanced Double Hashing to produce a n-hash * @see {@link http://peterd.org/pcd-diss.pdf} s6.5.4 @@ -254,83 +125,6 @@ export function doubleHashing( return Math.abs((hashA + n * hashB + Math.floor((n ** 3 - n) / 6)) % size) } -/** - * Generate a set of distinct indexes on interval [0, size) using the double hashing technique - * For generating efficiently distinct indexes we re-hash after detecting a cycle by changing slightly the seed. - * It has the effect of generating faster distinct indexes without loosing entirely the utility of the double hashing. - * For small number of indexes it will work perfectly. For a number close to the size, and size very large - * Advise: do not generate `size` indexes for a large interval. In practice, size should be equal - * to the number of hash functions used and is often low. - * - * @param element - The element to hash - * @param size - the range on which we can generate an index [0, size) = size - * @param number - The number of indexes desired - * @param seed - The seed used - * @return Array - * @author Arnaud Grall - * @author Simon Woolf (SimonWoolf) - */ -export function getDistinctIndexes( - element: HashableInput, - size: number, - number: number, - seed?: number -): Array { - if (seed === undefined) { - seed = getDefaultSeed() - } - let n = 0 - const indexes: Set = new Set() - let hashes = hashTwice(element, seed) - // let cycle = 0 - while (indexes.size < number) { - const ind = hashes.first % size - if (!indexes.has(ind)) { - indexes.add(ind) - } - hashes.first = (hashes.first + hashes.second) % size - hashes.second = (hashes.second + n) % size - n++ - - if (n > size) { - // Enhanced double hashing stops cycles of length less than `size` in the case where - // size is coprime with the second hash. But you still get cycles of length `size`. - // So if we reach there and haven't finished, append a prime to the input and - // rehash. - seed++ - hashes = hashTwice(element, seed) - } - } - return [...indexes.values()] -} - -/** - * Generate N indexes on range [0, size) - * It uses the double hashing technique to generate the indexes. - * It hash twice the value only once before generating the indexes. - * Warning: you can have a lot of modulo collisions. - * @param element - The element to hash - * @param size - The range on which we can generate the index, exclusive - * @param hashCount - The number of indexes we want - * @return An array of indexes on range [0, size) - */ -export function getIndexes( - element: HashableInput, - size: number, - hashCount: number, - seed?: number -): Array { - if (seed === undefined) { - seed = getDefaultSeed() - } - const arr = [] - const hashes = hashTwice(element, seed) - for (let i = 0; i < hashCount; i++) { - arr.push(doubleHashing(i, hashes.first, hashes.second, size)) - } - return arr -} - /** * Generate a random int between two bounds (included) * @param min - The lower bound