From f3b9bd174c45a6b9b54df8694e002e351bca7216 Mon Sep 17 00:00:00 2001 From: Stephen Buttolph Date: Sun, 11 May 2025 18:13:20 -0400 Subject: [PATCH 1/7] Add failing test --- gfp_test.go | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/gfp_test.go b/gfp_test.go index 54570b4..78c41f3 100644 --- a/gfp_test.go +++ b/gfp_test.go @@ -3,8 +3,11 @@ package bn256 import ( "crypto/rand" "encoding/binary" + "fmt" "io" "math/big" + "runtime" + "sync" "testing" ) @@ -112,6 +115,33 @@ func TestGFp(t *testing.T) { } }) + t.Run("mul_fp_corruption", func(t *testing.T) { + // By enabling the mutex profiling, the go runtime will traverse the + // stack to measure mutex operations when a mutex is unlocked when a + // goroutine is blocking on it. + runtime.SetMutexProfileFraction(1) + + var wg sync.WaitGroup + wg.Add(testTimes) + for i := 0; i < testTimes; i++ { + // If multiple goroutines interact with a global sync pool, the + // goroutines may block on acquiring a lock. When that happens, the + // mutex profiler will traverse the stack. + go func() { + defer wg.Done() + + a := togfP(randomGF(rand.Reader)) + b := togfP(randomGF(rand.Reader)) + c := &gfP{} + gfpMul(c, a, b) + + // Print ends up interacting with a global sync pool. + fmt.Print("") + }() + } + wg.Wait() + }) + t.Run("neg", func(t *testing.T) { c := &gfP{} bigC := new(big.Int) From d408651d731cb87a548d94a0773b035a709c4696 Mon Sep 17 00:00:00 2001 From: Stephen Buttolph Date: Sun, 11 May 2025 19:33:46 -0400 Subject: [PATCH 2/7] Use c7 rather than R29 as much as possible --- mul_arm64.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/mul_arm64.h b/mul_arm64.h index b43404b..6b25bda 100644 --- a/mul_arm64.h +++ b/mul_arm64.h @@ -19,15 +19,15 @@ UMULH R2, R6, R27 \ MUL R2, R7, R0 \ ADCS R0, R27 \ - UMULH R2, R7, R29 \ + UMULH R2, R7, c7 \ MUL R2, R8, R0 \ - ADCS R0, R29 \ + ADCS R0, c7 \ UMULH R2, R8, c5 \ ADCS ZR, c5 \ ADDS R1, c1 \ ADCS R26, c2 \ ADCS R27, c3 \ - ADCS R29, c4 \ + ADCS c7, c4 \ ADCS ZR, c5 \ \ MUL R3, R5, R1 \ @@ -37,15 +37,15 @@ UMULH R3, R6, R27 \ MUL R3, R7, R0 \ ADCS R0, R27 \ - UMULH R3, R7, R29 \ + UMULH R3, R7, c7 \ MUL R3, R8, R0 \ - ADCS R0, R29 \ + ADCS R0, c7 \ UMULH R3, R8, c6 \ ADCS ZR, c6 \ ADDS R1, c2 \ ADCS R26, c3 \ ADCS R27, c4 \ - ADCS R29, c5 \ + ADCS c7, c5 \ ADCS ZR, c6 \ \ MUL R4, R5, R1 \ From 8c3a708461e19f0f6b5ddaf8826466fee10f4d63 Mon Sep 17 00:00:00 2001 From: Stephen Buttolph Date: Sun, 11 May 2025 19:35:34 -0400 Subject: [PATCH 3/7] Use c6 rather than R27 as much as possible --- mul_arm64.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/mul_arm64.h b/mul_arm64.h index 6b25bda..5e1baf1 100644 --- a/mul_arm64.h +++ b/mul_arm64.h @@ -16,9 +16,9 @@ UMULH R2, R5, R26 \ MUL R2, R6, R0 \ ADDS R0, R26 \ - UMULH R2, R6, R27 \ + UMULH R2, R6, c6 \ MUL R2, R7, R0 \ - ADCS R0, R27 \ + ADCS R0, c6 \ UMULH R2, R7, c7 \ MUL R2, R8, R0 \ ADCS R0, c7 \ @@ -26,7 +26,7 @@ ADCS ZR, c5 \ ADDS R1, c1 \ ADCS R26, c2 \ - ADCS R27, c3 \ + ADCS c6, c3 \ ADCS c7, c4 \ ADCS ZR, c5 \ \ From 1d4b528d615f00687339325686813882e69c12d1 Mon Sep 17 00:00:00 2001 From: Stephen Buttolph Date: Sun, 11 May 2025 19:39:03 -0400 Subject: [PATCH 4/7] Overwrite R6 rather than clobber R29 --- mul_arm64.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/mul_arm64.h b/mul_arm64.h index 5e1baf1..1a35d3a 100644 --- a/mul_arm64.h +++ b/mul_arm64.h @@ -55,15 +55,15 @@ UMULH R4, R6, R27 \ MUL R4, R7, R0 \ ADCS R0, R27 \ - UMULH R4, R7, R29 \ + UMULH R4, R7, R6 \ MUL R4, R8, R0 \ - ADCS R0, R29 \ + ADCS R0, R6 \ UMULH R4, R8, c7 \ ADCS ZR, c7 \ ADDS R1, c3 \ ADCS R26, c4 \ ADCS R27, c5 \ - ADCS R29, c6 \ + ADCS R6, c6 \ ADCS ZR, c7 #define gfpReduce() \ @@ -121,6 +121,7 @@ ADCS R16, R24 \ ADCS ZR, R0 \ \ + MOVD ·p2+8(SB), R6 \ // Restore R6 \ // Our output is R21:R22:R23:R24. Reduce mod p if necessary. SUBS R5, R21, R10 \ SBCS R6, R22, R11 \ From 6cbf98d85e612317b7e28f7b45c4d5c8a9737094 Mon Sep 17 00:00:00 2001 From: Stephen Buttolph Date: Sun, 11 May 2025 20:09:15 -0400 Subject: [PATCH 5/7] Overwrite R5 rather than clobber R27 in the simple case --- mul_arm64.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/mul_arm64.h b/mul_arm64.h index 1a35d3a..d49a6d1 100644 --- a/mul_arm64.h +++ b/mul_arm64.h @@ -52,9 +52,9 @@ UMULH R4, R5, R26 \ MUL R4, R6, R0 \ ADDS R0, R26 \ - UMULH R4, R6, R27 \ + UMULH R4, R6, R5 \ MUL R4, R7, R0 \ - ADCS R0, R27 \ + ADCS R0, R5 \ UMULH R4, R7, R6 \ MUL R4, R8, R0 \ ADCS R0, R6 \ @@ -62,7 +62,7 @@ ADCS ZR, c7 \ ADDS R1, c3 \ ADCS R26, c4 \ - ADCS R27, c5 \ + ADCS R5, c5 \ ADCS R6, c6 \ ADCS ZR, c7 @@ -121,6 +121,7 @@ ADCS R16, R24 \ ADCS ZR, R0 \ \ + MOVD ·p2+0(SB), R5 \ // Restore R5 MOVD ·p2+8(SB), R6 \ // Restore R6 \ // Our output is R21:R22:R23:R24. Reduce mod p if necessary. SUBS R5, R21, R10 \ From 6a07d39415f313b12436d867f87540b9d673f4a2 Mon Sep 17 00:00:00 2001 From: Stephen Buttolph Date: Sun, 11 May 2025 20:38:48 -0400 Subject: [PATCH 6/7] Remove last usage of R27 --- gfp_arm64.s | 2 +- mul_arm64.h | 17 ++++++++++++----- 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/gfp_arm64.s b/gfp_arm64.s index c65e801..f1e8e21 100644 --- a/gfp_arm64.s +++ b/gfp_arm64.s @@ -105,7 +105,7 @@ TEXT ·gfpMul(SB),0,$0-24 MOVD b+16(FP), R0 loadBlock(0(R0), R5,R6,R7,R8) - mul(R9,R10,R11,R12,R13,R14,R15,R16) + mul(R9,R10,R11,R12,R13,R14,R15,R16,R17,EMPTY) gfpReduce() MOVD c+0(FP), R0 diff --git a/mul_arm64.h b/mul_arm64.h index d49a6d1..67adf6a 100644 --- a/mul_arm64.h +++ b/mul_arm64.h @@ -1,4 +1,9 @@ -#define mul(c0,c1,c2,c3,c4,c5,c6,c7) \ +#define EMPTY + +#define RELOAD \ + MOVD ·p2+0(SB), R5 + +#define mul(c0,c1,c2,c3,c4,c5,c6,c7,t0,reset) \ MUL R1, R5, c0 \ UMULH R1, R5, c1 \ MUL R1, R6, R0 \ @@ -34,9 +39,9 @@ UMULH R3, R5, R26 \ MUL R3, R6, R0 \ ADDS R0, R26 \ - UMULH R3, R6, R27 \ + UMULH R3, R6, t0 \ MUL R3, R7, R0 \ - ADCS R0, R27 \ + ADCS R0, t0 \ UMULH R3, R7, c7 \ MUL R3, R8, R0 \ ADCS R0, c7 \ @@ -44,10 +49,12 @@ ADCS ZR, c6 \ ADDS R1, c2 \ ADCS R26, c3 \ - ADCS R27, c4 \ + ADCS t0, c4 \ ADCS c7, c5 \ ADCS ZR, c6 \ \ + reset \ + \ MUL R4, R5, R1 \ UMULH R4, R5, R26 \ MUL R4, R6, R0 \ @@ -107,7 +114,7 @@ \ \ // m * N loadModulus(R5,R6,R7,R8) \ - mul(R17,R25,R19,R20,R21,R22,R23,R24) \ + mul(R17,R25,R19,R20,R21,R22,R23,R24,R5,RELOAD) \ \ \ // Add the 512-bit intermediate to m*N MOVD ZR, R0 \ From 20c6b86deeaae004f7fc3a894769040e81e65714 Mon Sep 17 00:00:00 2001 From: Stephen Buttolph Date: Mon, 12 May 2025 10:36:35 -0400 Subject: [PATCH 7/7] Simplify regression test --- gfp_test.go | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/gfp_test.go b/gfp_test.go index 78c41f3..ce3024d 100644 --- a/gfp_test.go +++ b/gfp_test.go @@ -3,12 +3,12 @@ package bn256 import ( "crypto/rand" "encoding/binary" - "fmt" "io" "math/big" "runtime" "sync" "testing" + "time" ) // randomGF returns a random integer between 0 and p-1. @@ -121,12 +121,12 @@ func TestGFp(t *testing.T) { // goroutine is blocking on it. runtime.SetMutexProfileFraction(1) - var wg sync.WaitGroup + var ( + lock sync.Mutex + wg sync.WaitGroup + ) wg.Add(testTimes) for i := 0; i < testTimes; i++ { - // If multiple goroutines interact with a global sync pool, the - // goroutines may block on acquiring a lock. When that happens, the - // mutex profiler will traverse the stack. go func() { defer wg.Done() @@ -135,8 +135,13 @@ func TestGFp(t *testing.T) { c := &gfP{} gfpMul(c, a, b) - // Print ends up interacting with a global sync pool. - fmt.Print("") + lock.Lock() + // Make it more likely for goroutines to block on this mutex. + time.Sleep(time.Microsecond) + + // If the frame pointer was corrupted, and another goroutine is + // blocked on this mutex, then this will segfault. + lock.Unlock() }() } wg.Wait()