-
Notifications
You must be signed in to change notification settings - Fork 13
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Optimize the "Hash" and make it faster #10
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,10 @@ | ||
github.com/dgryski/go-metro v0.0.0-20200812162917-85c65e2d0165 h1:BS21ZUJ/B5X2UVUbczfmdWH7GapPWAhxcMsDnjJTU1E= | ||
github.com/dgryski/go-metro v0.0.0-20200812162917-85c65e2d0165/go.mod h1:c9O8+fpSOX1DM8cPNSkX/qsBWdkD4yd2dpciOWQjpBw= | ||
github.com/google/go-cmp v0.5.9 h1:O2Tfq5qg4qc4AmwVlvv0oLiVAGB7enBSJ2x2DqQFi38= | ||
github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= | ||
github.com/klauspost/cpuid/v2 v2.0.9 h1:lgaqFMSdTdQYdZ04uHyN2d/eKdOMyi2YLSvlQIBFYa4= | ||
github.com/klauspost/cpuid/v2 v2.0.9/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg= | ||
github.com/zeebo/assert v1.3.0 h1:g7C04CbJuIDKNPFHmsk4hwZDO5O+kntRxzaUoNXj+IQ= | ||
github.com/zeebo/assert v1.3.0/go.mod h1:Pq9JiuJQpG8JLJdtkwrJESF0Foym2/D9XMU5ciN/wJ0= | ||
github.com/zeebo/wyhash v0.0.2-0.20221101160656-7c89dcff99fa h1:icNAX4PdeSmRwz4fmsMSUmYSPz+agsgrEe67BFrLFBM= | ||
github.com/zeebo/wyhash v0.0.2-0.20221101160656-7c89dcff99fa/go.mod h1:Ti+OwfNtM5AZiYAL0kOPIfliqDP5c0VtOnnMAqzuuZk= | ||
github.com/zeebo/xxh3 v1.0.2 h1:xZmwmqxHZA8AI603jOQ0tMqmBr9lPeFwGg6d+xy9DC0= | ||
github.com/zeebo/xxh3 v1.0.2/go.mod h1:5NWz9Sef7zIDm2JHfFlcQvNekmcEl9ekUZQQKCYaDcA= |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2,24 +2,46 @@ package cuckoo | |
|
||
import ( | ||
"encoding/binary" | ||
"math/rand" | ||
"unsafe" | ||
|
||
metro "github.com/dgryski/go-metro" | ||
"github.com/zeebo/wyhash" | ||
"github.com/zeebo/xxh3" | ||
) | ||
|
||
var ( | ||
// plus 1 to prevent out of range. | ||
altHash = [maxFingerprint + 1]uint{} | ||
rng wyhash.SRNG | ||
) | ||
|
||
// randi returns either i1 or i2 randomly. | ||
func randi(i1, i2 uint) uint { | ||
if rand.Int31()%2 == 0 { | ||
// it's faster than mod, but the result is the almost same. | ||
if uint32(uint64(uint32(rng.Uint64()))*uint64(2)>>32) == 0 { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This makes it much harder to read, is the optimization really worth it? Please do a separate PR/evaluation. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. After studying the code in rng.Uint64(), I see that there will be a speed up in particular on concurrent use. So moving to the new library looks good to me. But I don't like the obfuscated code here. Please revert it to rng.Uint64() % 2 == 0 In my opinion, readability is more important here than the small gain from converting and bit shifting. |
||
return i1 | ||
} | ||
return i2 | ||
} | ||
func getEndian() (endian binary.ByteOrder) { | ||
var i int = 0x1 | ||
bs := (*[int(unsafe.Sizeof(0))]byte)(unsafe.Pointer(&i)) | ||
if bs[0] == 0 { | ||
return binary.BigEndian | ||
} else { | ||
return binary.LittleEndian | ||
} | ||
} | ||
func init() { | ||
b := make([]byte, 2) | ||
endian := getEndian() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What's the advantage in deriving endianes? Hard-coding either big or little for all platforms should do the same afaik. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Undo, I don't see the benefit. |
||
for i := 0; i < maxFingerprint+1; i++ { | ||
endian.PutUint16(b, uint16(i)) | ||
altHash[i] = (uint(xxh3.Hash(b))) | ||
} | ||
} | ||
|
||
func getAltIndex(fp fingerprint, i uint, bucketIndexMask uint) uint { | ||
b := make([]byte, 2) | ||
binary.LittleEndian.PutUint16(b, uint16(fp)) | ||
hash := uint(metro.Hash64(b, 1337)) | ||
return (i ^ hash) & bucketIndexMask | ||
return (i ^ altHash[fp]) & bucketIndexMask | ||
} | ||
|
||
func getFingerprint(hash uint64) fingerprint { | ||
|
@@ -32,7 +54,7 @@ func getFingerprint(hash uint64) fingerprint { | |
|
||
// getIndexAndFingerprint returns the primary bucket index and fingerprint to be used | ||
func getIndexAndFingerprint(data []byte, bucketIndexMask uint) (uint, fingerprint) { | ||
hash := metro.Hash64(data, 1337) | ||
hash := xxh3.Hash(data) | ||
f := getFingerprint(hash) | ||
// Use least significant bits for deriving index. | ||
i1 := uint(hash) & bucketIndexMask | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This comes with a memory penalty of 2^16 /1024 = 64kb.
Please do a separate PR/evaluation to see if it's worth.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ok, I agree with your argument that in most cases, the filter itself will already be quite big, hence the additional 64 kb don't add much. Please send as separate pull request.