Skip to content

Commit

Permalink
Generate correct matrix for code-gen based on actual vector length (f…
Browse files Browse the repository at this point in the history
…or 256 bits and below)
  • Loading branch information
fwessels committed Aug 21, 2024
1 parent 5b12fc2 commit 7949410
Show file tree
Hide file tree
Showing 5 changed files with 28 additions and 21 deletions.
17 changes: 9 additions & 8 deletions galois.go
Original file line number Diff line number Diff line change
Expand Up @@ -910,30 +910,31 @@ func galExp(a byte, n int) byte {
return expTable[uint8(logResult)]
}

func genCodeGenMatrix(matrixRows [][]byte, inputs, inIdx, outputs int, dst []byte) []byte {
func genCodeGenMatrix(matrixRows [][]byte, inputs, inIdx, outputs, vectorLength int, dst []byte) []byte {
if !codeGen {
panic("codegen not enabled")
}
total := inputs * outputs

// Duplicated in+out
wantBytes := total * 32 * 2
wantBytes := total * vectorLength * 2
if cap(dst) < wantBytes {
dst = AllocAligned(1, wantBytes)[0]
} else {
dst = dst[:wantBytes]
}
for i, row := range matrixRows[:outputs] {
for j, idx := range row[inIdx : inIdx+inputs] {
dstIdx := (j*outputs + i) * 64
dstIdx := (j*outputs + i) * vectorLength * 2
dstPart := dst[dstIdx:]
dstPart = dstPart[:64]
dstPart = dstPart[:vectorLength*2]
lo := mulTableLow[idx][:]
hi := mulTableHigh[idx][:]
copy(dstPart[:16], lo)
copy(dstPart[16:32], lo)
copy(dstPart[32:48], hi)
copy(dstPart[48:64], hi)

for k := 0; k < vectorLength; k += 16 {
copy(dstPart[k:k+16], lo)
copy(dstPart[vectorLength*2-(k+16):vectorLength*2-k], hi)
}
}
}
return dst
Expand Down
8 changes: 6 additions & 2 deletions galois_arm64.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,12 @@ func getVectorLength() (vl, pl uint64)

func init() {
if defaultOptions.useSVE {
if vl, _ := getVectorLength(); vl != 256 {
defaultOptions.useSVE = false // Temp fix: disable SVE for non-256 vector widths (ie Graviton4)
if vl, _ := getVectorLength(); vl <= 256 {
// set vector length in bytes
defaultOptions.vectorLength = int(vl) >> 3
} else {
// disable SVE for hardware implementatons over 256 bits (only know to be Fujitsu A64FX atm)
defaultOptions.useSVE = false
}
}
}
Expand Down
4 changes: 2 additions & 2 deletions galois_arm64_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,9 @@ import (

func TestGenGalois(t *testing.T) {
if defaultOptions.useSVE {
testGenGaloisUpto10x10(t, galMulSlicesSve, galMulSlicesSveXor)
testGenGaloisUpto10x10(t, galMulSlicesSve, galMulSlicesSveXor, defaultOptions.vectorLength)
}
if defaultOptions.useNEON {
testGenGaloisUpto10x10(t, galMulSlicesNeon, galMulSlicesNeonXor)
testGenGaloisUpto10x10(t, galMulSlicesNeon, galMulSlicesNeonXor, 32)
}
}
18 changes: 9 additions & 9 deletions galois_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -235,7 +235,7 @@ func TestSliceGalAdd(t *testing.T) {
}
}

func testGenGalois(t *testing.T, matrixRows [][]byte, size, start, stop int, f func(matrix []byte, in, out [][]byte, start, stop int) int) {
func testGenGalois(t *testing.T, matrixRows [][]byte, size, start, stop int, f func(matrix []byte, in, out [][]byte, start, stop int) int, vectorLength int) {

// reference versions
galMulSliceRef := func(c byte, in, out []byte) {
Expand Down Expand Up @@ -270,7 +270,7 @@ func testGenGalois(t *testing.T, matrixRows [][]byte, size, start, stop int, f f
}
}

m := genCodeGenMatrix(matrixRows, len(inputs), 0, len(outputs), nil)
m := genCodeGenMatrix(matrixRows, len(inputs), 0, len(outputs), vectorLength, nil)

end := start + f(m, inputs, outputs, start, stop)
if end != stop {
Expand All @@ -297,7 +297,7 @@ func testGenGalois(t *testing.T, matrixRows [][]byte, size, start, stop int, f f
}
}

func testGenGaloisXor(t *testing.T, matrixRows [][]byte, size, start, stop int, f func(matrix []byte, in, out [][]byte, start, stop int) int) {
func testGenGaloisXor(t *testing.T, matrixRows [][]byte, size, start, stop int, f func(matrix []byte, in, out [][]byte, start, stop int) int, vectorLength int) {

// reference version
galMulSliceXorRef := func(c byte, in, out []byte) {
Expand Down Expand Up @@ -327,7 +327,7 @@ func testGenGaloisXor(t *testing.T, matrixRows [][]byte, size, start, stop int,
}
}

m := genCodeGenMatrix(matrixRows, len(inputs), 0, len(outputs), nil)
m := genCodeGenMatrix(matrixRows, len(inputs), 0, len(outputs), vectorLength, nil)

end := start + f(m, inputs, outputs, start, stop)
if end != stop {
Expand Down Expand Up @@ -363,7 +363,7 @@ func testGenGaloisEarlyAbort(t *testing.T, matrixRows [][]byte, size int, f func
}
}

func testGenGaloisUpto10x10(t *testing.T, f, fXor func(matrix []byte, in, out [][]byte, start, stop int) int) {
func testGenGaloisUpto10x10(t *testing.T, f, fXor func(matrix []byte, in, out [][]byte, start, stop int) int, vectorLength int) {

for output := 1; output <= codeGenMaxOutputs; output++ {
for input := 1; input <= codeGenMaxInputs; input++ {
Expand All @@ -386,15 +386,15 @@ func testGenGaloisUpto10x10(t *testing.T, f, fXor func(matrix []byte, in, out []
const limit = 1024
for ; size < limit; size += stepsize {
// test full range
testGenGalois(t, matrixRows, size, 0, size, f)
testGenGaloisXor(t, matrixRows, size, 0, size, fXor)
testGenGalois(t, matrixRows, size, 0, size, f, vectorLength)
testGenGaloisXor(t, matrixRows, size, 0, size, fXor, vectorLength)

if size >= stepsize*2 && size < limit-stepsize*2 {
start := stepsize
stop := size - start
// test partial range
testGenGalois(t, matrixRows, size, start, stop, f)
testGenGaloisXor(t, matrixRows, size, start, stop, fXor)
testGenGalois(t, matrixRows, size, start, stop, f, vectorLength)
testGenGaloisXor(t, matrixRows, size, start, stop, fXor, vectorLength)
}
}
}
Expand Down
2 changes: 2 additions & 0 deletions options.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ type options struct {
useSSE2,
useNEON,
useSVE bool
vectorLength int

useJerasureMatrix bool
usePAR1Matrix bool
Expand Down Expand Up @@ -55,6 +56,7 @@ var defaultOptions = options{
useAvxGNFI: cpuid.CPU.Supports(cpuid.AVX, cpuid.GFNI),
useNEON: cpuid.CPU.Supports(cpuid.ASIMD),
useSVE: cpuid.CPU.Supports(cpuid.SVE),
vectorLength: 32, // default vector length is 32 bytes (256 bits) for AVX2 code gen
}

// leopardMode controls the use of leopard GF in encoding and decoding.
Expand Down

0 comments on commit 7949410

Please sign in to comment.