uses LruCache instead of InversionTree for caching data decode matrices

Current implementation of LRU eviction policy on InversionTree is wrong and inefficient. The commit removes InversionTree and instead uses LruCache to cache data decode matrices.
rust-rse · Sep 3, 2022 · f1adaa9 · f1adaa9
1 parent eb1f66f
commit f1adaa9
Show file tree

Hide file tree

Showing 4 changed files with 40 additions and 542 deletions.
diff --git a/Cargo.toml b/Cargo.toml
@@ -43,6 +43,7 @@ parking_lot = { version = "0.11.2", optional = true }
 smallvec = "1.2"
 # `Mutex` implementation for `no_std` environment with the same high-level API as `parking_lot`
 spin = { version = "0.9.2", default-features = false, features = ["spin_mutex"] }
+lru = "0.7.7"
 
 [dev-dependencies]
 rand = "0.7.2"

diff --git a/src/core.rs b/src/core.rs
@@ -9,12 +9,20 @@ use smallvec::SmallVec;
 use crate::errors::Error;
 use crate::errors::SBSError;
 
-use crate::inversion_tree::InversionTree;
 use crate::matrix::Matrix;
 
+use lru::LruCache;
+
+#[cfg(feature = "std")]
+use parking_lot::Mutex;
+#[cfg(not(feature = "std"))]
+use spin::Mutex;
+
 use super::Field;
 use super::ReconstructShard;
 
+const DATA_DECODE_MATRIX_CACHE_CAPACITY: usize = 254;
+
 // /// Parameters for parallelism.
 // #[derive(PartialEq, Debug, Clone, Copy)]
 // pub struct ParallelParam {
@@ -338,7 +346,7 @@ pub struct ReedSolomon<F: Field> {
     parity_shard_count: usize,
     total_shard_count: usize,
     matrix: Matrix<F>,
-    tree: InversionTree<F>,
+    data_decode_matrix_cache: Mutex<LruCache<Vec<usize>, Arc<Matrix<F>>>>,
 }
 
 impl<F: Field> Clone for ReedSolomon<F> {
@@ -454,7 +462,7 @@ impl<F: Field> ReedSolomon<F> {
             parity_shard_count: parity_shards,
             total_shard_count: total_shards,
             matrix,
-            tree: InversionTree::new(data_shards, parity_shards),
+            data_decode_matrix_cache: Mutex::new(LruCache::new(DATA_DECODE_MATRIX_CACHE_CAPACITY)),
         })
     }
 
@@ -691,40 +699,35 @@ impl<F: Field> ReedSolomon<F> {
         valid_indices: &[usize],
         invalid_indices: &[usize],
     ) -> Arc<Matrix<F>> {
-        // Attempt to get the cached inverted matrix out of the tree
-        // based on the indices of the invalid rows.
-        match self.tree.get_inverted_matrix(&invalid_indices) {
-            // If the inverted matrix isn't cached in the tree yet we must
-            // construct it ourselves and insert it into the tree for the
-            // future.  In this way the inversion tree is lazily loaded.
-            None => {
-                // Pull out the rows of the matrix that correspond to the
-                // shards that we have and build a square matrix.  This
-                // matrix could be used to generate the shards that we have
-                // from the original data.
-                let mut sub_matrix = Matrix::new(self.data_shard_count, self.data_shard_count);
-                for (sub_matrix_row, &valid_index) in valid_indices.iter().enumerate() {
-                    for c in 0..self.data_shard_count {
-                        sub_matrix.set(sub_matrix_row, c, self.matrix.get(valid_index, c));
-                    }
-                }
-                // Invert the matrix, so we can go from the encoded shards
-                // back to the original data.  Then pull out the row that
-                // generates the shard that we want to decode.  Note that
-                // since this matrix maps back to the original data, it can
-                // be used to create a data shard, but not a parity shard.
-                let data_decode_matrix = Arc::new(sub_matrix.invert().unwrap());
-
-                // Cache the inverted matrix in the tree for future use keyed on the
-                // indices of the invalid rows.
-                self.tree
-                    .insert_inverted_matrix(&invalid_indices, &data_decode_matrix)
-                    .unwrap();
-
-                data_decode_matrix
+        {
+            let mut cache = self.data_decode_matrix_cache.lock();
+            if let Some(entry) = cache.get(invalid_indices) {
+                return entry.clone();
+            }
+        }
+        // Pull out the rows of the matrix that correspond to the shards that
+        // we have and build a square matrix. This matrix could be used to
+        // generate the shards that we have from the original data.
+        let mut sub_matrix = Matrix::new(self.data_shard_count, self.data_shard_count);
+        for (sub_matrix_row, &valid_index) in valid_indices.iter().enumerate() {
+            for c in 0..self.data_shard_count {
+                sub_matrix.set(sub_matrix_row, c, self.matrix.get(valid_index, c));
             }
-            Some(m) => m,
         }
+        // Invert the matrix, so we can go from the encoded shards back to the
+        // original data. Then pull out the row that generates the shard that
+        // we want to decode. Note that since this matrix maps back to the
+        // original data, it can be used to create a data shard, but not a
+        // parity shard.
+        let data_decode_matrix = Arc::new(sub_matrix.invert().unwrap());
+        // Cache the inverted matrix for future use keyed on the indices of the
+        // invalid rows.
+        {
+            let data_decode_matrix = data_decode_matrix.clone();
+            let mut cache = self.data_decode_matrix_cache.lock();
+            cache.put(Vec::from(invalid_indices), data_decode_matrix);
+        }
+        data_decode_matrix
     }
 
     fn reconstruct_internal<T: ReconstructShard<F>>(
@@ -780,7 +783,7 @@ impl<F: Field> ReedSolomon<F> {
         //
         // The valid indices are used to construct the data decode matrix,
         // the invalid indices are used to key the data decode matrix
-        // in the inversion tree.
+        // in the data decode matrix cache.
         //
         // We only need exactly N valid indices, where N = `data_shard_count`,
         // as the data decode matrix is a N x N matrix, thus only needs