-
-
Notifications
You must be signed in to change notification settings - Fork 180
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
5 changed files
with
263 additions
and
52 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,64 @@ | ||
package bitset | ||
|
||
// From Wikipedia: http://en.wikipedia.org/wiki/Hamming_weight | ||
const m1 uint64 = 0x5555555555555555 //binary: 0101... | ||
const m2 uint64 = 0x3333333333333333 //binary: 00110011.. | ||
const m4 uint64 = 0x0f0f0f0f0f0f0f0f //binary: 4 zeros, 4 ones ... | ||
const m8 uint64 = 0x00ff00ff00ff00ff //binary: 8 zeros, 8 ones ... | ||
const m16 uint64 = 0x0000ffff0000ffff //binary: 16 zeros, 16 ones ... | ||
const m32 uint64 = 0x00000000ffffffff //binary: 32 zeros, 32 ones | ||
const hff uint64 = 0xffffffffffffffff //binary: all ones | ||
const h01 uint64 = 0x0101010101010101 //the sum of 256 to the power of 0,1,2,3... | ||
|
||
// From Wikipedia: count number of set bits. | ||
// This is algorithm popcount_2 in the article retrieved May 9, 2011 | ||
|
||
func popcount_2(x uint64) uint64 { | ||
x -= (x >> 1) & m1 //put count of each 2 bits into those 2 bits | ||
x = (x & m2) + ((x >> 2) & m2) //put count of each 4 bits into those 4 bits | ||
x = (x + (x >> 4)) & m4 //put count of each 8 bits into those 8 bits | ||
x += x >> 8 //put count of each 16 bits into their lowest 8 bits | ||
x += x >> 16 //put count of each 32 bits into their lowest 8 bits | ||
x += x >> 32 //put count of each 64 bits into their lowest 8 bits | ||
return x & 0x7f | ||
} | ||
|
||
func popcntSliceGo(s []uint64) uint64 { | ||
cnt := uint64(0) | ||
for _, x := range s { | ||
cnt += popcount_2(x) | ||
} | ||
return cnt | ||
} | ||
|
||
func popcntMaskSliceGo(s, m []uint64) uint64 { | ||
cnt := uint64(0) | ||
for i := range s { | ||
cnt += popcount_2(s[i] &^ m[i]) | ||
} | ||
return cnt | ||
} | ||
|
||
func popcntAndSliceGo(s, m []uint64) uint64 { | ||
cnt := uint64(0) | ||
for i := range s { | ||
cnt += popcount_2(s[i] & m[i]) | ||
} | ||
return cnt | ||
} | ||
|
||
func popcntOrSliceGo(s, m []uint64) uint64 { | ||
cnt := uint64(0) | ||
for i := range s { | ||
cnt += popcount_2(s[i] | m[i]) | ||
} | ||
return cnt | ||
} | ||
|
||
func popcntXorSliceGo(s, m []uint64) uint64 { | ||
cnt := uint64(0) | ||
for i := range s { | ||
cnt += popcount_2(s[i] ^ m[i]) | ||
} | ||
return cnt | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,102 @@ | ||
TEXT ·hasAsm(SB),4,$0 | ||
MOVQ $1, AX | ||
CPUID | ||
SHRQ $23, CX | ||
ANDQ $1, CX | ||
MOVB CX, ret+0(FP) | ||
RET | ||
|
||
|
||
#define POPCNTQ_DX_DX BYTE $0xf3; BYTE $0x48; BYTE $0x0f; BYTE $0xb8; BYTE $0xd2 | ||
|
||
TEXT ·popcntSliceAsm(SB),4,$0-32 | ||
XORQ AX, AX | ||
MOVQ s+0(FP), SI | ||
MOVQ s+8(FP), CX | ||
TESTQ CX, CX | ||
JZ popcntSliceEnd | ||
popcntSliceLoop: | ||
BYTE $0xf3; BYTE $0x48; BYTE $0x0f; BYTE $0xb8; BYTE $0x16 // POPCNTQ (SI), DX | ||
ADDQ DX, AX | ||
ADDQ $8, SI | ||
LOOP popcntSliceLoop | ||
popcntSliceEnd: | ||
MOVQ AX, ret+24(FP) | ||
RET | ||
|
||
TEXT ·popcntMaskSliceAsm(SB),4,$0-56 | ||
XORQ AX, AX | ||
MOVQ s+0(FP), SI | ||
MOVQ s+8(FP), CX | ||
TESTQ CX, CX | ||
JZ popcntMaskSliceEnd | ||
MOVQ m+24(FP), DI | ||
popcntMaskSliceLoop: | ||
MOVQ (DI), DX | ||
NOTQ DX | ||
ANDQ (SI), DX | ||
POPCNTQ_DX_DX | ||
ADDQ DX, AX | ||
ADDQ $8, SI | ||
ADDQ $8, DI | ||
LOOP popcntMaskSliceLoop | ||
popcntMaskSliceEnd: | ||
MOVQ AX, ret+48(FP) | ||
RET | ||
|
||
TEXT ·popcntAndSliceAsm(SB),4,$0-56 | ||
XORQ AX, AX | ||
MOVQ s+0(FP), SI | ||
MOVQ s+8(FP), CX | ||
TESTQ CX, CX | ||
JZ popcntAndSliceEnd | ||
MOVQ m+24(FP), DI | ||
popcntAndSliceLoop: | ||
MOVQ (DI), DX | ||
ANDQ (SI), DX | ||
POPCNTQ_DX_DX | ||
ADDQ DX, AX | ||
ADDQ $8, SI | ||
ADDQ $8, DI | ||
LOOP popcntAndSliceLoop | ||
popcntAndSliceEnd: | ||
MOVQ AX, ret+48(FP) | ||
RET | ||
|
||
TEXT ·popcntOrSliceAsm(SB),4,$0-56 | ||
XORQ AX, AX | ||
MOVQ s+0(FP), SI | ||
MOVQ s+8(FP), CX | ||
TESTQ CX, CX | ||
JZ popcntOrSliceEnd | ||
MOVQ m+24(FP), DI | ||
popcntOrSliceLoop: | ||
MOVQ (DI), DX | ||
ORQ (SI), DX | ||
POPCNTQ_DX_DX | ||
ADDQ DX, AX | ||
ADDQ $8, SI | ||
ADDQ $8, DI | ||
LOOP popcntOrSliceLoop | ||
popcntOrSliceEnd: | ||
MOVQ AX, ret+48(FP) | ||
RET | ||
|
||
TEXT ·popcntXorSliceAsm(SB),4,$0-56 | ||
XORQ AX, AX | ||
MOVQ s+0(FP), SI | ||
MOVQ s+8(FP), CX | ||
TESTQ CX, CX | ||
JZ popcntXorSliceEnd | ||
MOVQ m+24(FP), DI | ||
popcntXorSliceLoop: | ||
MOVQ (DI), DX | ||
XORQ (SI), DX | ||
POPCNTQ_DX_DX | ||
ADDQ DX, AX | ||
ADDQ $8, SI | ||
ADDQ $8, DI | ||
LOOP popcntXorSliceLoop | ||
popcntXorSliceEnd: | ||
MOVQ AX, ret+48(FP) | ||
RET |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,64 @@ | ||
// +build amd64 | ||
|
||
package bitset | ||
|
||
//go:noescape | ||
|
||
func hasAsm() bool | ||
|
||
var useAsm = hasAsm() | ||
|
||
//go:noescape | ||
|
||
func popcntSliceAsm(s []uint64) uint64 | ||
|
||
//go:noescape | ||
|
||
func popcntMaskSliceAsm(s, m []uint64) uint64 | ||
|
||
//go:noescape | ||
|
||
func popcntAndSliceAsm(s, m []uint64) uint64 | ||
|
||
//go:noescape | ||
|
||
func popcntOrSliceAsm(s, m []uint64) uint64 | ||
|
||
//go:noescape | ||
|
||
func popcntXorSliceAsm(s, m []uint64) uint64 | ||
|
||
func popcntSlice(s []uint64) uint64 { | ||
if useAsm { | ||
return popcntSliceAsm(s) | ||
} | ||
return popcntSliceGo(s) | ||
} | ||
|
||
func popcntMaskSlice(s, m []uint64) uint64 { | ||
if useAsm { | ||
return popcntMaskSliceAsm(s, m) | ||
} | ||
return popcntMaskSliceGo(s, m) | ||
} | ||
|
||
func popcntAndSlice(s, m []uint64) uint64 { | ||
if useAsm { | ||
return popcntAndSliceAsm(s, m) | ||
} | ||
return popcntAndSliceGo(s, m) | ||
} | ||
|
||
func popcntOrSlice(s, m []uint64) uint64 { | ||
if useAsm { | ||
return popcntOrSliceAsm(s, m) | ||
} | ||
return popcntOrSliceGo(s, m) | ||
} | ||
|
||
func popcntXorSlice(s, m []uint64) uint64 { | ||
if useAsm { | ||
return popcntXorSliceAsm(s, m) | ||
} | ||
return popcntXorSliceGo(s, m) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
// +build !amd64 | ||
|
||
package bitset | ||
|
||
func popcntSlice(s []uint64) uint64 { | ||
return popcntSliceGo(s) | ||
} | ||
|
||
func popcntMaskSlice(s, m []uint64) uint64 { | ||
return popcntMaskSliceGo(s, m) | ||
} | ||
|
||
func popcntAndSlice(s, m []uint64) uint64 { | ||
return popcntAndSliceGo(s, m) | ||
} | ||
|
||
func popcntOrSlice(s, m []uint64) uint64 { | ||
return popcntOrSliceGo(s, m) | ||
} | ||
|
||
func popcntXorSlice(s, m []uint64) uint64 { | ||
return popcntSliceGo(s, m) | ||
} |