feat(lib/erasure): implement Go binding over rust for erasure coding (#…

…3447)
ChainSafe · Jun 20, 2024 · 69d501c · 69d501c
1 parent 9697a6e
commit 69d501c
Show file tree

Hide file tree

Showing 11 changed files with 4,583 additions and 143 deletions.
diff --git a/.github/workflows/unit-tests.yml b/.github/workflows/unit-tests.yml
@@ -77,6 +77,9 @@ jobs:
           go test -timeout=10m ./... && \
           cd ..
 
+      - name: generate a shared library file for erasure 
+        run: make compile-erasure
+
       - name: Run unit tests
         run: CI=buildjet go test -coverprofile=coverage.out -covermode=atomic -timeout=45m ./...
 

diff --git a/.gitignore b/.gitignore
@@ -26,3 +26,8 @@ tmp
 # node_modules used by polkadot.js/api tests
 tests/polkadotjs_test/node_modules
 !tests/polkadotjs_test/test/*.wasm
+
+# Ignore rust target dir
+lib/erasure/rustlib/target
+
+*.so
diff --git a/Makefile b/Makefile
@@ -151,3 +151,6 @@ endif
 
 zombienet-test: install install-zombienet
 	zombienet test -p native zombienet_tests/functional/0001-basic-network.zndsl
+
+compile-erasure:
+	cargo build --release --manifest-path=lib/erasure/rustlib/Cargo.toml
diff --git a/go.sum b/go.sum
@@ -340,6 +340,7 @@ github.com/klauspost/cpuid/v2 v2.2.7 h1:ZWSB3igEs+d0qvnxR/ZBzXVmxkgt8DdzP6m9pfuV
 github.com/klauspost/cpuid/v2 v2.2.7/go.mod h1:Lcz8mBdAVJIBVzewtcLocK12l3Y+JytZYpaMropDUws=
 github.com/klauspost/reedsolomon v1.11.8 h1:s8RpUW5TK4hjr+djiOpbZJB4ksx+TdYbRH7vHQpwPOY=
 github.com/klauspost/reedsolomon v1.11.8/go.mod h1:4bXRN+cVzMdml6ti7qLouuYi32KHJ5MGv0Qd8a47h6A=
+github.com/koron/go-ssdp v0.0.0-20191105050749-2e1c40ed0b5d/go.mod h1:5Ky9EC2xfoUKUor0Hjgi2BJhCSXJfMOFlmyYrVKGQMk=
 github.com/koron/go-ssdp v0.0.4 h1:1IDwrghSKYM7yLf7XCzbByg2sJ/JcNOZRXS2jczTwz0=
 github.com/koron/go-ssdp v0.0.4/go.mod h1:oDXq+E5IL5q0U8uSBcoAXzTzInwy5lEgC91HoKtbmZk=
 github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo=

diff --git a/lib/erasure/README.md b/lib/erasure/README.md
@@ -0,0 +1,6 @@
+# Building Rust code Binary
+
+- Generate rust binary
+    ```
+    cargo build --release --manifest-path=lib/erasure/rustlib/Cargo.toml
+    ```
diff --git a/lib/erasure/erasure.go b/lib/erasure/erasure.go
@@ -1,70 +1,101 @@
-// Copyright 2021 ChainSafe Systems (ON)
+// Copyright 2023 ChainSafe Systems (ON)
 // SPDX-License-Identifier: LGPL-3.0-only
 
 package erasure
 
+// #cgo LDFLAGS: -Wl,-rpath,${SRCDIR}/rustlib/target/release -L${SRCDIR}/rustlib/target/release -lerasure
+// #include "./erasure.h"
+import (
+	"C"
+)
 import (
-	"bytes"
 	"errors"
-	"fmt"
-
-	"github.com/klauspost/reedsolomon"
+	"unsafe"
 )
 
-// ErrNotEnoughValidators cannot encode something for zero or one validator
-var ErrNotEnoughValidators = errors.New("expected at least 2 validators")
+var (
+	ErrZeroSizedData   = errors.New("data can't be zero sized")
+	ErrZeroSizedChunks = errors.New("chunks can't be zero sized")
+)
 
-// ObtainChunks obtains erasure-coded chunks, divides data into number of validatorsQty chunks and
-// creates parity chunks for reconstruction
-func ObtainChunks(validatorsQty int, data []byte) ([][]byte, error) {
-	recoveryThres, err := recoveryThreshold(validatorsQty)
-	if err != nil {
-		return nil, err
-	}
-	enc, err := reedsolomon.New(validatorsQty, recoveryThres)
-	if err != nil {
-		return nil, fmt.Errorf("creating new reed solomon failed: %w", err)
+// ObtainChunks obtains erasure-coded chunks, one for each validator.
+// This works only up to 65536 validators, and `n_validators` must be non-zero and accepts
+// number of validators and scale encoded data.
+func ObtainChunks(nValidators uint, data []byte) ([][]byte, error) {
+	if len(data) == 0 {
+		return nil, ErrZeroSizedData
 	}
-	shards, err := enc.Split(data)
-	if err != nil {
-		return nil, err
+
+	var cFlattenedChunks *C.uchar
+	var cFlattenedChunksLen C.size_t
+
+	cnValidators := C.size_t(nValidators)
+	cData := (*C.uchar)(unsafe.Pointer(&data[0]))
+	cLen := C.size_t(len(data))
+
+	cErr := C.obtain_chunks(cnValidators, cData, cLen, &cFlattenedChunks, &cFlattenedChunksLen)
+	errStr := C.GoString(cErr)
+	C.free(unsafe.Pointer(cErr))
+
+	if len(errStr) > 0 {
+		return nil, errors.New(errStr)
 	}
-	err = enc.Encode(shards)
-	if err != nil {
-		return nil, err
+
+	resData := C.GoBytes(unsafe.Pointer(cFlattenedChunks), C.int(cFlattenedChunksLen))
+	C.free(unsafe.Pointer(cFlattenedChunks))
+
+	chunkSize := uint(len(resData)) / nValidators
+	chunks := make([][]byte, nValidators)
+
+	start := uint(0)
+	for i := start; i < nValidators; i++ {
+		end := start + chunkSize
+		chunks[i] = resData[start:end]
+		start = end
 	}
 
-	return shards, nil
+	return chunks, nil
 }
 
-// Reconstruct the missing data from a set of chunks
-func Reconstruct(validatorsQty, originalDataLen int, chunks [][]byte) ([]byte, error) {
-	recoveryThres, err := recoveryThreshold(validatorsQty)
-	if err != nil {
-		return nil, err
+// Reconstruct decodable data from a set of chunks.
+//
+// Provide an iterator containing chunk data and the corresponding index.
+// The indices of the present chunks must be indicated. If too few chunks
+// are provided, recovery is not possible.
+//
+// Works only up to 65536 validators, and `n_validators` must be non-zero
+func Reconstruct(nValidators uint, chunks [][]byte) ([]byte, error) {
+	if len(chunks) == 0 {
+		return nil, ErrZeroSizedChunks
 	}
 
-	enc, err := reedsolomon.New(validatorsQty, recoveryThres)
-	if err != nil {
-		return nil, err
-	}
-	err = enc.Reconstruct(chunks)
-	if err != nil {
-		return nil, err
+	var cReconstructedData *C.uchar
+	var cReconstructedDataLen C.size_t
+	var flattenedChunks []byte
+
+	for _, chunk := range chunks {
+		flattenedChunks = append(flattenedChunks, chunk...)
 	}
-	buf := new(bytes.Buffer)
-	err = enc.Join(buf, chunks, originalDataLen)
-	return buf.Bytes(), err
-}
 
-// recoveryThreshold gives the max number of shards/chunks that we can afford to lose and still construct
-// the full initial data.  Total number of chunks will be validatorQty + recoveryThreshold
-func recoveryThreshold(validators int) (int, error) {
-	if validators <= 1 {
-		return 0, ErrNotEnoughValidators
+	cChunkSize := C.size_t(len(chunks[0]))
+	cFlattenedChunks := (*C.uchar)(unsafe.Pointer(&flattenedChunks[0]))
+	cFlattenedChunksLen := C.size_t(len(flattenedChunks))
+
+	cErr := C.reconstruct(
+		C.size_t(nValidators),
+		cFlattenedChunks, cFlattenedChunksLen,
+		cChunkSize,
+		&cReconstructedData, &cReconstructedDataLen,
+	)
+	errStr := C.GoString(cErr)
+	C.free(unsafe.Pointer(cErr))
+
+	if len(errStr) > 0 {
+		return nil, errors.New(errStr)
 	}
 
-	needed := (validators - 1) / 3
+	res := C.GoBytes(unsafe.Pointer(cReconstructedData), C.int(cReconstructedDataLen))
+	C.free(unsafe.Pointer(cReconstructedData))
 
-	return needed + 1, nil
+	return res, nil
 }
diff --git a/lib/erasure/erasure.h b/lib/erasure/erasure.h
@@ -0,0 +1,11 @@
+/*
+ * Copyright 2023 ChainSafe Systems (ON)
+ * SPDX-License-Identifier: LGPL-3.0-only
+ */
+
+#include <stdlib.h>
+#include <stddef.h>
+
+int32_t add(int32_t a, int32_t b);
+const char* obtain_chunks(size_t n_validators, unsigned char *data, size_t len, unsigned char **flattened_chunks, size_t *flattened_chunks_len);
+const char* reconstruct(size_t n_validators, unsigned char *flattened_chunks, size_t flattened_chunks_len, size_t chunk_size, unsigned char **res_data, size_t *res_len);