diff --git a/src/layer/internal/base.rs b/src/layer/internal/base.rs index 46d9a69..a38c11f 100644 --- a/src/layer/internal/base.rs +++ b/src/layer/internal/base.rs @@ -402,6 +402,10 @@ impl BaseLayerFileBuilderPhase2 { self.indexed_properties_builder.add_triples(triples) } + pub fn set_index_lengths>(&mut self, lengths: I) { + self.indexed_properties_builder.set_lengths(lengths); + } + pub(crate) async fn partial_finalize(self) -> io::Result> { self.triple_builder.finalize().await?; if let Some(indexed_properties_collection_bufs) = self.indexed_properties_builder.finalize() diff --git a/src/layer/internal/child.rs b/src/layer/internal/child.rs index a76af62..9fe4c6f 100644 --- a/src/layer/internal/child.rs +++ b/src/layer/internal/child.rs @@ -532,6 +532,10 @@ impl ChildLayerFileBuil self.indexed_properties_builder.add_triples(triples) } + pub fn set_index_lengths>(&mut self, lengths: I) { + self.indexed_properties_builder.set_lengths(lengths); + } + /// Remove the given triple. /// /// This will panic if a greater triple has already been removed, diff --git a/src/layer/internal/mod.rs b/src/layer/internal/mod.rs index a0a6103..e996c18 100644 --- a/src/layer/internal/mod.rs +++ b/src/layer/internal/mod.rs @@ -956,6 +956,9 @@ impl Layer for InternalLayer { fn indexed_property_si(&self, subject: u64, index: usize) -> Option { let mut cur = self; loop { + if (cur.node_and_value_count() as u64) < subject { + break; + } if let Some(ip) = cur.indexed_property_collection() { if let Some(object) = ip.lookup_index(subject, index) { return Some(IndexIdTriple::new(subject, index, object)); @@ -973,9 +976,40 @@ impl Layer for InternalLayer { } None } - fn indexed_property_s(&self, subject: u64) -> Box + Send> { - todo!(); + fn indexed_property_s<'a>( + &'a self, + subject: u64, + ) -> Box + Send + 'a> { + let mut iters = Vec::new(); + let mut cur = self; + loop { + if (cur.node_and_value_count() as u64) < subject { + break; + } + if let Some(collection) = self.indexed_property_collection() { + if let Some(indexes_iter) = collection.indexes_for(subject) { + iters.push(indexes_iter); + } + } + if let Some(parent) = self.immediate_parent() { + cur = parent; + } else { + break; + } + } + + let mut seen = HashSet::new(); + Box::new(iters.into_iter().flatten().filter_map(move |(p, o)| { + if seen.contains(&p) { + // already seen, so skip + None + } else { + seen.insert(p); + Some(IndexIdTriple::new(subject, p, o)) + } + })) } + fn indexed_properties(&self) -> Box + Send> { todo!(); } diff --git a/src/layer/layer.rs b/src/layer/layer.rs index 0e12bb5..40c5822 100644 --- a/src/layer/layer.rs +++ b/src/layer/layer.rs @@ -168,7 +168,10 @@ pub trait Layer: Send + Sync { fn indexed_property_exists(&self, subject: u64, index: usize) -> bool { self.indexed_property_si(subject, index).is_some() } - fn indexed_property_s(&self, subject: u64) -> Box + Send>; + fn indexed_property_s<'a>( + &'a self, + subject: u64, + ) -> Box + Send + 'a>; fn indexed_properties(&self) -> Box + Send>; } @@ -231,13 +234,6 @@ pub struct ValueTriple { pub object: ObjectType, } -#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] -pub struct IndexValueTriple { - pub subject: String, - pub index: usize, - pub object: ObjectType, -} - impl ValueTriple { /// Construct a triple with a node object. /// @@ -282,6 +278,48 @@ impl ValueTriple { } } +#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct IndexValueTriple { + pub subject: String, + pub index: usize, + pub object: ObjectType, +} + +impl IndexValueTriple { + /// Construct a triple with a node object. + /// + /// Nodes may appear in both the subject and object position. + pub fn new_node(subject: &str, index: usize, object: &str) -> IndexValueTriple { + IndexValueTriple { + subject: subject.to_owned(), + index, + object: ObjectType::Node(object.to_owned()), + } + } + + /// Construct a triple with a value object. + /// + /// Values may only appear in the object position. + pub fn new_value(subject: &str, index: usize, object: TypedDictEntry) -> IndexValueTriple { + IndexValueTriple { + subject: subject.to_owned(), + index, + object: ObjectType::Value(object), + } + } + + /// Construct a triple with a string value object. + /// + /// Values may only appear in the object position. + pub fn new_string_value(subject: &str, index: usize, object: &str) -> IndexValueTriple { + IndexValueTriple { + subject: subject.to_owned(), + index, + object: ObjectType::Value(String::make_entry(&object)), + } + } +} + /// Either a resolved id or an unresolved inner type. #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] pub enum PossiblyResolved { diff --git a/src/layer/simple_builder.rs b/src/layer/simple_builder.rs index d497622..7da093f 100644 --- a/src/layer/simple_builder.rs +++ b/src/layer/simple_builder.rs @@ -40,6 +40,7 @@ pub trait LayerBuilder: Send + Sync { fn remove_id_triple(&mut self, triple: IdTriple); fn set_index_value_triple(&mut self, triple: IndexValueTriple); fn set_index_id_triple(&mut self, triple: IndexIdTriple); + fn set_index_len(&mut self, subject: u64, len: usize); /// Commit the layer to storage fn commit(self) -> Pin> + Send>>; /// Commit a boxed layer to storage @@ -57,6 +58,7 @@ pub struct SimpleLayerBuilder { files: LayerFiles, id_additions: Vec, id_removals: Vec, + index_lengths: Vec<(u64, usize)>, index_id_additions: Vec, nodes_values_map: HashMap, @@ -77,6 +79,7 @@ impl SimpleLayerBuilder { files: LayerFiles::Base(files), id_additions: Vec::with_capacity(0), id_removals: Vec::with_capacity(0), + index_lengths: Vec::with_capacity(0), index_id_additions: Vec::with_capacity(0), nodes_values_map: HashMap::new(), @@ -99,6 +102,7 @@ impl SimpleLayerBuilder { files: LayerFiles::Child(files), id_additions: Vec::new(), id_removals: Vec::new(), + index_lengths: Vec::new(), index_id_additions: Vec::new(), nodes_values_map: HashMap::new(), @@ -245,12 +249,17 @@ impl LayerBuilder for SimpleLayerBuil self.index_id_additions.push(triple); } + fn set_index_len(&mut self, subject: u64, len: usize) { + self.index_lengths.push((subject, len)); + } + fn commit(self) -> Pin> + Send>> { let SimpleLayerBuilder { parent, files, mut id_additions, mut id_removals, + mut index_lengths, mut index_id_additions, nodes_values_map, @@ -275,6 +284,8 @@ impl LayerBuilder for SimpleLayerBuil id_removals.shrink_to_fit(); index_id_additions.sort(); index_id_additions.dedup(); + index_lengths.sort(); + index_lengths.dedup(); // we now need to figure out noops. let mut additions_it = id_additions.iter_mut().peekable(); @@ -461,6 +472,7 @@ impl LayerBuilder for SimpleLayerBuil builder.remove_id_triples(id_removals).await?; // TODO: while setting index triples for this layer, we need to make sure we're not duplicating assignments that were already present from the parent. This is currently not yet done bedcause the query logic has not been written yet. builder.set_index_triples(index_id_additions); + builder.set_index_lengths(index_lengths); builder.finalize().await } @@ -477,6 +489,7 @@ impl LayerBuilder for SimpleLayerBuil builder.add_id_triples(id_additions).await?; builder.set_index_triples(index_id_additions); + builder.set_index_lengths(index_lengths); builder.finalize().await } diff --git a/src/store/mod.rs b/src/store/mod.rs index 3a6365b..9fcb27b 100644 --- a/src/store/mod.rs +++ b/src/store/mod.rs @@ -7,7 +7,8 @@ use std::path::{Path, PathBuf}; use std::sync::{Arc, RwLock}; use crate::layer::{ - IdTriple, IndexIdTriple, Layer, LayerBuilder, LayerCounts, ObjectType, ValueTriple, + IdTriple, IndexIdTriple, IndexValueTriple, Layer, LayerBuilder, LayerCounts, ObjectType, + ValueTriple, }; use crate::storage::archive::{ArchiveLayerStore, DirectoryArchiveBackend, LruArchiveBackend}; use crate::storage::directory::{DirectoryLabelStore, DirectoryLayerStore}; @@ -114,6 +115,18 @@ impl StoreLayerBuilder { self.with_builder(move |b| b.remove_id_triple(triple)) } + pub fn set_index_value_triple(&self, triple: IndexValueTriple) -> Result<(), io::Error> { + self.with_builder(move |b| b.set_index_value_triple(triple)) + } + + pub fn set_index_id_triple(&self, triple: IndexIdTriple) -> Result<(), io::Error> { + self.with_builder(move |b| b.set_index_id_triple(triple)) + } + + pub fn set_index_len(&self, subject: u64, len: usize) -> Result<(), io::Error> { + self.with_builder(move |b| b.set_index_len(subject, len)) + } + /// Returns true if this layer has been committed, and false otherwise. pub fn committed(&self) -> bool { self.builder @@ -677,7 +690,10 @@ impl Layer for StoreLayer { fn indexed_property_si(&self, subject: u64, index: usize) -> Option { self.layer.indexed_property_si(subject, index) } - fn indexed_property_s(&self, subject: u64) -> Box + Send> { + fn indexed_property_s<'a>( + &'a self, + subject: u64, + ) -> Box + Send + 'a> { self.layer.indexed_property_s(subject) } fn indexed_properties(&self) -> Box + Send> { diff --git a/src/store/sync.rs b/src/store/sync.rs index e30f318..9f2547e 100644 --- a/src/store/sync.rs +++ b/src/store/sync.rs @@ -11,7 +11,9 @@ use tokio::runtime::Runtime; use std::io; use std::path::{Path, PathBuf}; -use crate::layer::{IdTriple, IndexIdTriple, Layer, LayerCounts, ObjectType, ValueTriple}; +use crate::layer::{ + IdTriple, IndexIdTriple, IndexValueTriple, Layer, LayerCounts, ObjectType, ValueTriple, +}; use crate::store::{ open_directory_store, open_memory_store, NamedGraph, Store, StoreLayer, StoreLayerBuilder, }; @@ -76,6 +78,18 @@ impl SyncStoreLayerBuilder { self.inner.remove_id_triple(triple) } + pub fn set_index_value_triple(&self, triple: IndexValueTriple) -> Result<(), io::Error> { + self.inner.set_index_value_triple(triple) + } + + pub fn set_index_id_triple(&self, triple: IndexIdTriple) -> Result<(), io::Error> { + self.inner.set_index_id_triple(triple) + } + + pub fn set_index_len(&self, subject: u64, len: usize) -> Result<(), io::Error> { + self.inner.set_index_len(subject, len) + } + /// Returns a boolean result which is true if this builder has been committed, and false otherwise. pub fn committed(&self) -> bool { self.inner.committed() @@ -456,7 +470,10 @@ impl Layer for SyncStoreLayer { fn indexed_property_si(&self, subject: u64, index: usize) -> Option { self.inner.indexed_property_si(subject, index) } - fn indexed_property_s(&self, subject: u64) -> Box + Send> { + fn indexed_property_s<'a>( + &'a self, + subject: u64, + ) -> Box + Send + 'a> { self.inner.indexed_property_s(subject) } fn indexed_properties(&self) -> Box + Send> { diff --git a/src/structure/indexed_property.rs b/src/structure/indexed_property.rs index 60fb82b..0bcb4c3 100644 --- a/src/structure/indexed_property.rs +++ b/src/structure/indexed_property.rs @@ -43,6 +43,12 @@ impl IndexedPropertyBuilder { self.added.push((subject_id, 0, len as u64)); } + pub fn set_lengths>(&mut self, lengths: I) { + for (subject, len) in lengths { + self.set_len(subject, len); + } + } + pub fn remove_from(&mut self, subject_id: u64) { self.set_len(subject_id, 0); } @@ -143,16 +149,19 @@ impl IndexedPropertyCollection { self.lookup_index0(subject, 0).map(|n| n as usize) } - pub fn indexes_for<'a>(&'a self, subject: u64) -> impl Iterator + 'a { + pub fn indexes_for<'a>( + &'a self, + subject: u64, + ) -> Option + 'a> { if let Some(subject_index) = self.subjects.index_of(subject) { let subject_index = subject_index + 1; let offset = self.adjacencies.offset_for(subject_index as u64) + 1; let indexes = self.adjacencies.get(subject_index as u64); - itertools::Either::Left(indexes.iter().skip(1).enumerate().map(move |(ix_ix, ix)| { + Some(indexes.iter().skip(1).enumerate().map(move |(ix_ix, ix)| { (ix as usize - 1, self.objects.entry(ix_ix + offset as usize)) })) } else { - itertools::Either::Right(std::iter::empty()) + None } } @@ -195,11 +204,14 @@ mod tests { assert_eq!(Some(21), collection.lookup_index(5, 1)); assert_eq!(None, collection.lookup_index(5, 2)); - assert_eq!(vec![(1, 21)], collection.indexes_for(5).collect::>()); + assert_eq!( + vec![(1, 21)], + collection.indexes_for(5).unwrap().collect::>() + ); assert_eq!( vec![(4, 42), (7, 420)], - collection.indexes_for(3).collect::>() + collection.indexes_for(3).unwrap().collect::>() ); assert_eq!( vec![(3, 4, 42), (3, 7, 420), (5, 1, 21)],