-
Notifications
You must be signed in to change notification settings - Fork 246
kvdb-rocksdb: optimize and rename iter_from_prefix #365
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
c85a3da
727158f
2fb3993
a8681b1
3b98f77
1acaefc
ddb330e
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -12,8 +12,7 @@ | |
| //! | ||
| //! Note: this crate does not use "Prefix Seek" mode which means that the prefix iterator | ||
| //! will return keys not starting with the given prefix as well (as long as `key >= prefix`). | ||
| //! To work around this we filter the data returned by rocksdb to ensure that | ||
| //! all data yielded by the iterator does start with the given prefix. | ||
| //! To work around this we set an upper bound to the prefix successor. | ||
| //! See https://github.com/facebook/rocksdb/wiki/Prefix-Seek-API-Changes for details. | ||
|
|
||
| use crate::DBAndColumns; | ||
|
|
@@ -28,6 +27,13 @@ pub type KeyValuePair = (Box<[u8]>, Box<[u8]>); | |
| /// Iterator with built-in synchronization. | ||
| pub struct ReadGuardedIterator<'a, I, T> { | ||
| inner: OwningHandle<UnsafeStableAddress<'a, Option<T>>, DerefWrapper<Option<I>>>, | ||
| // We store the upper bound here | ||
| // to make sure it lives at least as long as the iterator. | ||
| // See https://github.com/rust-rocksdb/rust-rocksdb/pull/309. | ||
| // TODO: remove this once https://github.com/rust-rocksdb/rust-rocksdb/pull/377 | ||
| // is merged and released. | ||
| #[allow(dead_code)] | ||
| upper_bound_prefix: Option<Box<[u8]>>, | ||
| } | ||
|
|
||
| // We can't implement `StableAddress` for a `RwLockReadGuard` | ||
|
|
@@ -81,8 +87,8 @@ pub trait IterationHandler { | |
| /// Create an `Iterator` over a `ColumnFamily` corresponding to the passed index. Takes a | ||
| /// reference to a `ReadOptions` to allow configuration of the new iterator (see | ||
| /// https://github.com/facebook/rocksdb/blob/master/include/rocksdb/options.h#L1169). | ||
| /// The iterator starts from the first key having the provided `prefix`. | ||
| fn iter_from_prefix(&self, col: u32, prefix: &[u8], read_opts: &ReadOptions) -> Self::Iterator; | ||
| /// The `Iterator` iterates over keys which start with the provided `prefix`. | ||
| fn iter_with_prefix(&self, col: u32, prefix: &[u8], read_opts: &ReadOptions) -> Self::Iterator; | ||
| } | ||
|
|
||
| impl<'a, T> ReadGuardedIterator<'a, <&'a T as IterationHandler>::Iterator, T> | ||
|
|
@@ -92,18 +98,22 @@ where | |
| /// Creates a new `ReadGuardedIterator` that maps `RwLock<RocksDB>` to `RwLock<DBIterator>`, | ||
| /// where `DBIterator` iterates over all keys. | ||
| pub fn new(read_lock: RwLockReadGuard<'a, Option<T>>, col: u32, read_opts: &ReadOptions) -> Self { | ||
| Self { inner: Self::new_inner(read_lock, |db| db.iter(col, read_opts)) } | ||
| Self { inner: Self::new_inner(read_lock, |db| db.iter(col, read_opts)), upper_bound_prefix: None } | ||
| } | ||
|
|
||
| /// Creates a new `ReadGuardedIterator` that maps `RwLock<RocksDB>` to `RwLock<DBIterator>`, | ||
| /// where `DBIterator` iterates over keys >= prefix. | ||
| pub fn new_from_prefix( | ||
| /// where `DBIterator` iterates over keys which start with the given prefix. | ||
| pub fn new_with_prefix( | ||
| read_lock: RwLockReadGuard<'a, Option<T>>, | ||
| col: u32, | ||
| prefix: &[u8], | ||
| upper_bound: Box<[u8]>, | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Small nit, I would have put the calculation to end_prefix (or the call to a function doing the calculation) inside this function to avoid this additional parameter in the api.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I agree the current internal API is cumbersome, but we also pass here |
||
| read_opts: &ReadOptions, | ||
| ) -> Self { | ||
| Self { inner: Self::new_inner(read_lock, |db| db.iter_from_prefix(col, prefix, read_opts)) } | ||
| Self { | ||
| inner: Self::new_inner(read_lock, |db| db.iter_with_prefix(col, prefix, read_opts)), | ||
| upper_bound_prefix: Some(upper_bound), | ||
| } | ||
| } | ||
|
|
||
| fn new_inner( | ||
|
|
@@ -126,7 +136,7 @@ impl<'a> IterationHandler for &'a DBAndColumns { | |
| .expect("iterator params are valid; qed") | ||
| } | ||
|
|
||
| fn iter_from_prefix(&self, col: u32, prefix: &[u8], read_opts: &ReadOptions) -> Self::Iterator { | ||
| fn iter_with_prefix(&self, col: u32, prefix: &[u8], read_opts: &ReadOptions) -> Self::Iterator { | ||
| self.db | ||
| .iterator_cf_opt(self.cf(col as usize), read_opts, IteratorMode::From(prefix, Direction::Forward)) | ||
| .expect("iterator params are valid; qed") | ||
|
|
||
| Original file line number | Diff line number | Diff line change | ||
|---|---|---|---|---|
|
|
@@ -444,13 +444,13 @@ impl Database { | |||
| batch.delete_cf(cf, &key).map_err(other_io_err)? | ||||
| } | ||||
| DBOp::DeletePrefix { col: _, prefix } => { | ||||
| if prefix.len() > 0 { | ||||
| if !prefix.is_empty() { | ||||
| let end_range = kvdb::end_prefix(&prefix[..]); | ||||
| batch.delete_range_cf(cf, &prefix[..], &end_range[..]).map_err(other_io_err)?; | ||||
| } else { | ||||
| // Deletes all values in the column. | ||||
| let end_range = &[u8::max_value()]; | ||||
| batch.delete_range_cf(cf, &prefix[..], &end_range[..]).map_err(other_io_err)?; | ||||
| batch.delete_range_cf(cf, &[][..], &end_range[..]).map_err(other_io_err)?; | ||||
| batch.delete_cf(cf, &end_range[..]).map_err(other_io_err)?; | ||||
| } | ||||
| } | ||||
|
|
@@ -492,7 +492,7 @@ impl Database { | |||
|
|
||||
| /// Get value by partial key. Prefix size should match configured prefix size. | ||||
| pub fn get_by_prefix(&self, col: u32, prefix: &[u8]) -> Option<Box<[u8]>> { | ||||
| self.iter_from_prefix(col, prefix).next().map(|(_, v)| v) | ||||
| self.iter_with_prefix(col, prefix).next().map(|(_, v)| v) | ||||
| } | ||||
|
|
||||
| /// Iterator over the data in the given database column index. | ||||
|
|
@@ -512,18 +512,26 @@ impl Database { | |||
| /// Iterator over data in the `col` database column index matching the given prefix. | ||||
| /// Will hold a lock until the iterator is dropped | ||||
| /// preventing the database from being closed. | ||||
| fn iter_from_prefix<'a>(&'a self, col: u32, prefix: &'a [u8]) -> impl Iterator<Item = iter::KeyValuePair> + 'a { | ||||
| fn iter_with_prefix<'a>(&'a self, col: u32, prefix: &'a [u8]) -> impl Iterator<Item = iter::KeyValuePair> + 'a { | ||||
| let read_lock = self.db.read(); | ||||
| let optional = if read_lock.is_some() { | ||||
| let guarded = iter::ReadGuardedIterator::new_from_prefix(read_lock, col, prefix, &self.read_opts); | ||||
| let mut read_opts = ReadOptions::default(); | ||||
| read_opts.set_verify_checksums(false); | ||||
| let end_prefix = kvdb::end_prefix(prefix).into_boxed_slice(); | ||||
| // rocksdb doesn't work with an empty upper bound | ||||
| if !end_prefix.is_empty() { | ||||
|
Comment on lines
+521
to
+522
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. cc @cheme, we might want to re-review prefix deletion code as rocksdb doesn't iterate at all if the upper bound is empty as it matches all keys
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I should have remember that, there was some similar things in the deletion prefix pr, let me check back: parity-common/kvdb-rocksdb/src/lib.rs Line 447 in dd89c9a
Is there a case where we got a empty upper bound and the start bound is not empty, ok [255] or [255, 255] ... So yes the test should be rewrite on the result of end prefix instead of the prefix 👍 I am doing a quick PR. |
||||
| // SAFETY: the end_prefix lives as long as the iterator | ||||
| // See `ReadGuardedIterator` definition for more details. | ||||
| unsafe { | ||||
| read_opts.set_iterate_upper_bound(&end_prefix); | ||||
| } | ||||
| } | ||||
| let guarded = iter::ReadGuardedIterator::new_with_prefix(read_lock, col, prefix, end_prefix, &read_opts); | ||||
| Some(guarded) | ||||
| } else { | ||||
| None | ||||
| }; | ||||
| // We're not using "Prefix Seek" mode, so the iterator will return | ||||
| // keys not starting with the given prefix as well, | ||||
| // see https://github.com/facebook/rocksdb/wiki/Prefix-Seek-API-Changes | ||||
| optional.into_iter().flat_map(identity).take_while(move |(k, _)| k.starts_with(prefix)) | ||||
| optional.into_iter().flat_map(identity) | ||||
| } | ||||
|
|
||||
| /// Close the database | ||||
|
|
@@ -648,8 +656,8 @@ impl KeyValueDB for Database { | |||
| Box::new(unboxed.into_iter()) | ||||
| } | ||||
|
|
||||
| fn iter_from_prefix<'a>(&'a self, col: u32, prefix: &'a [u8]) -> Box<dyn Iterator<Item = KeyValuePair> + 'a> { | ||||
| let unboxed = Database::iter_from_prefix(self, col, prefix); | ||||
| fn iter_with_prefix<'a>(&'a self, col: u32, prefix: &'a [u8]) -> Box<dyn Iterator<Item = KeyValuePair> + 'a> { | ||||
| let unboxed = Database::iter_with_prefix(self, col, prefix); | ||||
| Box::new(unboxed.into_iter()) | ||||
| } | ||||
|
|
||||
|
|
@@ -729,9 +737,9 @@ mod tests { | |||
| } | ||||
|
|
||||
| #[test] | ||||
| fn iter_from_prefix() -> io::Result<()> { | ||||
| fn iter_with_prefix() -> io::Result<()> { | ||||
| let db = create(1)?; | ||||
| st::test_iter_from_prefix(&db) | ||||
| st::test_iter_with_prefix(&db) | ||||
| } | ||||
|
|
||||
| #[test] | ||||
|
|
@@ -742,7 +750,7 @@ mod tests { | |||
|
|
||||
| #[test] | ||||
| fn stats() -> io::Result<()> { | ||||
| let db = create(st::IOSTATS_NUM_COLUMNS)?; | ||||
| let db = create(st::IO_STATS_NUM_COLUMNS)?; | ||||
| st::test_io_stats(&db) | ||||
| } | ||||
|
|
||||
|
|
||||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Also see rust-rocksdb/rust-rocksdb#377