diff --git a/Cargo.lock b/Cargo.lock index 99e3c194e3c..91c9f572da4 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -696,6 +696,15 @@ dependencies = [ "pin-project-lite 0.2.9", ] +[[package]] +name = "atomic-polyfill" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d299f547288d6db8d5c3a2916f7b2f66134b15b8c1ac1c4357dd3b8752af7bb2" +dependencies = [ + "critical-section", +] + [[package]] name = "atomic-waker" version = "1.1.0" @@ -925,6 +934,29 @@ version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8d696c370c750c948ada61c69a0ee2cbbb9c50b1019ddb86d9317157a99c2cae" +[[package]] +name = "blst" +version = "0.3.10" +source = "git+https://github.com/supranational/blst.git#ca03e11a3ff24d818ae390a1e7f435f15bf72aee" +dependencies = [ + "cc", + "glob", + "threadpool", + "zeroize", +] + +[[package]] +name = "blst_from_scratch" +version = "0.1.0" +source = "git+https://github.com/sifraitech/rust-kzg?rev=7eb52ca97576ea1eefe4dd2165f224c916f8c862#7eb52ca97576ea1eefe4dd2165f224c916f8c862" +dependencies = [ + "blst", + "kzg", + "libc", + "once_cell", + "sha2 0.10.6", +] + [[package]] name = "bounded-collections" version = "0.1.5" @@ -1584,6 +1616,12 @@ dependencies = [ "itertools 0.10.5", ] +[[package]] +name = "critical-section" +version = "1.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6548a0ad5d2549e111e1f6a11a6c2e2d00ce6a3dafe22948d67c2b443f775e52" + [[package]] name = "cross-domain-message-gossip" version = "0.1.0" @@ -3289,6 +3327,12 @@ version = "0.27.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ad0a93d233ebf96623465aad4046a8d3aa4da22d4f4beba5388838c8a434bbb4" +[[package]] +name = "glob" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b" + [[package]] name = "globset" version = "0.4.10" @@ -4127,6 +4171,11 @@ dependencies = [ "parking_lot 0.12.1", ] +[[package]] +name = "kzg" +version = "0.1.0" +source = "git+https://github.com/sifraitech/rust-kzg?rev=7eb52ca97576ea1eefe4dd2165f224c916f8c862#7eb52ca97576ea1eefe4dd2165f224c916f8c862" + [[package]] name = "language-tags" version = "0.3.2" @@ -5800,6 +5849,10 @@ name = "once_cell" version = "1.17.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b7e5500299e16ebb147ae15a00a942af264cf3688f47923b8fc2cd5858f23ad3" +dependencies = [ + "atomic-polyfill", + "critical-section", +] [[package]] name = "oorandom" @@ -9752,6 +9805,17 @@ dependencies = [ "uint", ] +[[package]] +name = "subspace-erasure-coding" +version = "0.1.0" +dependencies = [ + "blst_from_scratch", + "criterion", + "kzg", + "rand 0.8.5", + "subspace-core-primitives", +] + [[package]] name = "subspace-farmer" version = "0.1.0" diff --git a/crates/subspace-core-primitives/src/lib.rs b/crates/subspace-core-primitives/src/lib.rs index fd0305e7310..b032edd367f 100644 --- a/crates/subspace-core-primitives/src/lib.rs +++ b/crates/subspace-core-primitives/src/lib.rs @@ -198,20 +198,38 @@ impl From<&[u8; Self::SAFE_BYTES]> for Scalar { } } +impl From<[u8; Self::SAFE_BYTES]> for Scalar { + fn from(value: [u8; Self::SAFE_BYTES]) -> Self { + Self::from(&value) + } +} + impl From<&[u8; Self::FULL_BYTES]> for Scalar { fn from(value: &[u8; Self::FULL_BYTES]) -> Self { Scalar(Fr::from_le_bytes_mod_order(value)) } } +impl From<[u8; Self::FULL_BYTES]> for Scalar { + fn from(value: [u8; Self::FULL_BYTES]) -> Self { + Self::from(&value) + } +} + impl From<&Scalar> for [u8; Scalar::FULL_BYTES] { - fn from(value: &Scalar) -> [u8; Scalar::FULL_BYTES] { - let mut bytes = [0u8; Scalar::FULL_BYTES]; + fn from(value: &Scalar) -> Self { + let mut bytes = Self::default(); value.write_to_bytes(&mut bytes); bytes } } +impl From for [u8; Scalar::FULL_BYTES] { + fn from(value: Scalar) -> Self { + Self::from(&value) + } +} + impl Scalar { /// How many full bytes can be stored in BLS12-381 scalar (for instance before encoding). It is /// actually 254 bits, but bits are mut harder to work with and likely not worth it. diff --git a/crates/subspace-erasure-coding/Cargo.toml b/crates/subspace-erasure-coding/Cargo.toml new file mode 100644 index 00000000000..c92431019d7 --- /dev/null +++ b/crates/subspace-erasure-coding/Cargo.toml @@ -0,0 +1,27 @@ +[package] +name = "subspace-erasure-coding" +description = "Polynomial erasure coding implementation used in Subspace Network" +license = "Apache-2.0" +version = "0.1.0" +authors = ["Nazar Mokrynskyi "] +edition = "2021" +include = [ + "/src", + "/Cargo.toml", +] + +[dependencies] +blst_from_scratch = { git = "https://github.com/sifraitech/rust-kzg", rev = "7eb52ca97576ea1eefe4dd2165f224c916f8c862", default-features = false } +kzg = { git = "https://github.com/sifraitech/rust-kzg", rev = "7eb52ca97576ea1eefe4dd2165f224c916f8c862", default-features = false } +subspace-core-primitives = { version = "0.1.0", path = "../subspace-core-primitives", default-features = false } + +[dev-dependencies] +criterion = "0.4.0" +rand = "0.8.5" + +[features] +default = ["std"] +std = [ + "blst_from_scratch/std", + "subspace-core-primitives/std", +] diff --git a/crates/subspace-erasure-coding/src/lib.rs b/crates/subspace-erasure-coding/src/lib.rs new file mode 100644 index 00000000000..0b58f6eea27 --- /dev/null +++ b/crates/subspace-erasure-coding/src/lib.rs @@ -0,0 +1,103 @@ +#![cfg_attr(not(feature = "std"), no_std)] + +extern crate alloc; + +#[cfg(all(test, features = "std"))] +mod tests; + +use alloc::format; +use alloc::string::{String, ToString}; +use alloc::vec::Vec; +use blst_from_scratch::types::fft_settings::FsFFTSettings; +use blst_from_scratch::types::fr::FsFr; +use blst_from_scratch::types::poly::FsPoly; +use core::num::NonZeroUsize; +use kzg::{FFTSettings, PolyRecover, DAS}; +use subspace_core_primitives::Scalar; + +/// Erasure coding abstraction. +/// +/// Supports creation of parity records and recovery of missing data. +#[derive(Debug, Clone)] +pub struct ErasureCoding { + fft_settings: FsFFTSettings, +} + +impl ErasureCoding { + /// Create new erasure coding instance. + /// + /// Number of shards supported is `2^scale`, half of shards are source data and the other half + /// are parity. + pub fn new(scale: NonZeroUsize) -> Result { + let fft_settings = FsFFTSettings::new(scale.get())?; + + Ok(Self { fft_settings }) + } + + /// Extend sources using erasure coding. + /// + /// Returns parity data. + pub fn extend(&self, source: &[Scalar]) -> Result, String> { + // TODO: Once our scalars are based on `blst_from_scratch` we can use a bit of transmute to + // avoid allocation here + // TODO: das_fft_extension modifies buffer internally, it needs to change to use + // pre-allocated buffer instead of allocating a new one + let source = source + .iter() + .map(|scalar| { + FsFr::from_scalar(scalar.to_bytes()) + .map_err(|error| format!("Failed to convert scalar: {error}")) + }) + .collect::, String>>()?; + let parity = self + .fft_settings + .das_fft_extension(&source)? + .into_iter() + .map(|scalar| { + // This is fine, scalar is guaranteed to be correct here + Scalar::from(scalar.to_scalar()) + }) + .collect(); + + Ok(parity) + } + + /// Recovery of missing shards from given shards (at least 1/2 should be `Some`). + /// + /// Both in input and output source shards are interleaved with parity shards: + /// source, parity, source, parity, .... + pub fn recover(&self, shards: &[Option]) -> Result, String> { + // TODO This is only necessary because upstream silently doesn't recover anything: + // https://github.com/sifraitech/rust-kzg/issues/195 + if shards.iter().filter(|scalar| scalar.is_some()).count() < self.fft_settings.max_width / 2 + { + return Err("Impossible to recover, too many shards are missing".to_string()); + } + // TODO: Once our scalars are based on `blst_from_scratch` we can use a bit of transmute to + // avoid allocation here + let shards = shards + .iter() + .map(|maybe_scalar| { + maybe_scalar + .map(|scalar| { + FsFr::from_scalar(scalar.into()) + .map_err(|error| format!("Failed to convert scalar: {error}")) + }) + .transpose() + }) + .collect::, _>>()?; + let poly = >::recover_poly_from_samples( + &shards, + &self.fft_settings, + )?; + + Ok(poly + .coeffs + .iter() + .map(|scalar| { + // This is fine, scalar is guaranteed to be correct here + Scalar::from(scalar.to_scalar()) + }) + .collect()) + } +} diff --git a/crates/subspace-erasure-coding/src/tests.rs b/crates/subspace-erasure-coding/src/tests.rs new file mode 100644 index 00000000000..3a80a018c22 --- /dev/null +++ b/crates/subspace-erasure-coding/src/tests.rs @@ -0,0 +1,109 @@ +use crate::ErasureCoding; +use std::iter; +use std::num::NonZeroUsize; +use subspace_core_primitives::Scalar; + +// TODO: This could have been done in-place, once implemented can be exposed as a utility +fn concatenated_to_interleaved(input: Vec) -> Vec +where + T: Clone, +{ + if input.len() <= 1 { + return input; + } + + let (first_half, second_half) = input.split_at(input.len() / 2); + + first_half + .iter() + .zip(second_half) + .flat_map(|(a, b)| [a, b]) + .cloned() + .collect() +} + +// TODO: This could have been done in-place, once implemented can be exposed as a utility +fn interleaved_to_concatenated(input: Vec) -> Vec +where + T: Clone, +{ + let first_half = input.iter().step_by(2); + let second_half = input.iter().skip(1).step_by(2); + + first_half.chain(second_half).cloned().collect() +} + +#[test] +fn basic() { + let scale = NonZeroUsize::new(8).unwrap(); + let num_shards = 2usize.pow(scale.get() as u32); + let ec = ErasureCoding::new(scale).unwrap(); + + let source_shards = (0..num_shards / 2) + .map(|_| rand::random::<[u8; Scalar::SAFE_BYTES]>()) + .map(Scalar::from) + .collect::>(); + + let parity_shards = ec.extend(&source_shards).unwrap(); + + assert_ne!(source_shards, parity_shards); + + let partial_shards = concatenated_to_interleaved( + iter::repeat(None) + .take(num_shards / 4) + .chain(source_shards.iter().skip(num_shards / 4).copied().map(Some)) + .chain(parity_shards.iter().take(num_shards / 4).copied().map(Some)) + .chain(iter::repeat(None).take(num_shards / 4)) + .collect::>(), + ); + + let recovered = interleaved_to_concatenated(ec.recover(&partial_shards).unwrap()); + + assert_eq!( + recovered, + source_shards + .iter() + .chain(&parity_shards) + .copied() + .collect::>() + ); +} + +#[test] +fn bad_shards_number() { + let scale = NonZeroUsize::new(8).unwrap(); + let num_shards = 2usize.pow(scale.get() as u32); + let ec = ErasureCoding::new(scale).unwrap(); + + let source_shards = vec![Default::default(); num_shards - 1]; + + assert!(ec.extend(&source_shards).is_err()); + + let partial_shards = vec![Default::default(); num_shards - 1]; + assert!(ec.recover(&partial_shards).is_err()); +} + +#[test] +fn not_enough_partial() { + let scale = NonZeroUsize::new(8).unwrap(); + let num_shards = 2usize.pow(scale.get() as u32); + let ec = ErasureCoding::new(scale).unwrap(); + + let mut partial_shards = vec![None; num_shards]; + + // Less than half is not sufficient + partial_shards + .iter_mut() + .take(num_shards / 2 - 1) + .for_each(|maybe_scalar| { + maybe_scalar.replace(Scalar::default()); + }); + assert!(ec.recover(&partial_shards).is_err()); + + // Any half is sufficient + partial_shards + .last_mut() + .unwrap() + .replace(Scalar::default()); + assert!(ec.recover(&partial_shards).is_ok()); +}