diff --git a/src/layer/internal/base.rs b/src/layer/internal/base.rs index 8a984094..1233556f 100644 --- a/src/layer/internal/base.rs +++ b/src/layer/internal/base.rs @@ -9,6 +9,7 @@ use super::super::builder::*; use super::super::id_map::*; use super::super::layer::*; use crate::layer::InternalLayer; +use crate::structure::indexed_property::{IndexedPropertyBuilder, IndexedPropertyCollection}; use crate::structure::*; use crate::{chrono_log, storage::*}; @@ -38,6 +39,8 @@ pub struct BaseLayer { pub(super) o_ps_adjacency_list: AdjacencyList, pub(super) predicate_wavelet_tree: WaveletTree, + + pub(super) indexed_property_collection: Option, } impl BaseLayer { @@ -119,6 +122,10 @@ impl BaseLayer { predicate_wavelet_tree_width, ); + let indexed_property_collection = maps + .index_property_maps + .map(|m| IndexedPropertyCollection::from_buffers(m.into())); + InternalLayer::Base(BaseLayer { name, node_dictionary, @@ -137,6 +144,8 @@ impl BaseLayer { o_ps_adjacency_list, predicate_wavelet_tree, + + indexed_property_collection, }) } } @@ -324,7 +333,8 @@ impl BaseLayerFileBuilder { pub struct BaseLayerFileBuilderPhase2 { files: BaseLayerFiles, - builder: TripleFileBuilder, + triple_builder: TripleFileBuilder, + indexed_properties_builder: IndexedPropertyBuilder, } impl BaseLayerFileBuilderPhase2 { @@ -335,7 +345,7 @@ impl BaseLayerFileBuilderPhase2 { num_predicates: usize, num_values: usize, ) -> io::Result { - let builder = TripleFileBuilder::new( + let triple_builder = TripleFileBuilder::new( files.s_p_adjacency_list_files.clone(), files.sp_o_adjacency_list_files.clone(), num_nodes, @@ -345,7 +355,13 @@ impl BaseLayerFileBuilderPhase2 { ) .await?; - Ok(BaseLayerFileBuilderPhase2 { files, builder }) + let indexed_properties_builder = IndexedPropertyBuilder::new(); + + Ok(BaseLayerFileBuilderPhase2 { + files, + triple_builder, + indexed_properties_builder, + }) } /// Add the given subject, predicate and object. @@ -357,7 +373,9 @@ impl BaseLayerFileBuilderPhase2 { predicate: u64, object: u64, ) -> io::Result<()> { - self.builder.add_triple(subject, predicate, object).await + self.triple_builder + .add_triple(subject, predicate, object) + .await } /// Add the given triple. @@ -370,18 +388,23 @@ impl BaseLayerFileBuilderPhase2 { where ::IntoIter: Unpin + Send, { - self.builder.add_id_triples(triples).await + self.triple_builder.add_id_triples(triples).await + } + + pub fn set_index_triple(&mut self, subject: u64, index: usize, object: u64) { + self.indexed_properties_builder.add(subject, index, object) } pub(crate) async fn partial_finalize(self) -> io::Result> { - self.builder.finalize().await?; + self.triple_builder.finalize().await?; + let indexed_properties_collection_bufs = self.indexed_properties_builder.finalize(); chrono_log!("finalized base triples builder"); Ok(self.files) } pub async fn finalize(self) -> io::Result<()> { - self.builder.finalize().await?; + self.triple_builder.finalize().await?; chrono_log!("finalized base triples builder"); let s_p_adjacency_list_files = self.files.s_p_adjacency_list_files.clone(); let sp_o_adjacency_list_files = self.files.sp_o_adjacency_list_files.clone(); diff --git a/src/storage/consts.rs b/src/storage/consts.rs index bae2d668..d2f3c15c 100644 --- a/src/storage/consts.rs +++ b/src/storage/consts.rs @@ -90,6 +90,13 @@ pub struct Filenames { pub parent: &'static str, pub rollup: &'static str, + + pub index_property_subjects: &'static str, + pub index_property_adjacency_list_nums: &'static str, + pub index_property_adjacency_list_bits: &'static str, + pub index_property_adjacency_list_bit_index_blocks: &'static str, + pub index_property_adjacency_list_bit_index_sblocks: &'static str, + pub index_property_objects: &'static str, } #[derive(Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord, Debug, FromPrimitive)] @@ -156,6 +163,13 @@ pub enum LayerFileEnum { Parent, Rollup, + + IndexPropertySubjects, + IndexPropertyAdjacencyListNums, + IndexPropertyAdjacencyListBits, + IndexPropertyAdjacencyListBitIndexBlocks, + IndexPropertyAdjacencyListBitIndexSBlocks, + IndexPropertyObjects, } pub const FILENAMES: Filenames = Filenames { @@ -254,6 +268,15 @@ pub const FILENAMES: Filenames = Filenames { parent: "parent.hex", rollup: "rollup.hex", + + index_property_subjects: "index_property_subject_nums.logarray", + index_property_adjacency_list_nums: "index_property_adjacency_list_nums.logarray", + index_property_adjacency_list_bits: "index_property_adjacency_list_bits.bitarray", + index_property_adjacency_list_bit_index_blocks: + "index_property_adjacency_list_bit_index_blocks.nums", + index_property_adjacency_list_bit_index_sblocks: + "index_property_adjacency_list_bit_index_sblocks.nums", + index_property_objects: "index_property_object_nums.logarray", }; lazy_static! { @@ -502,6 +525,30 @@ lazy_static! { ), ("parent.hex", LayerFileEnum::Parent), ("rollup.hex", LayerFileEnum::Rollup), + ( + "index_property_subject_nums.logarray", + LayerFileEnum::IndexPropertySubjects + ), + ( + "index_property_adjacency_list_nums.logarray", + LayerFileEnum::IndexPropertyAdjacencyListNums + ), + ( + "index_property_adjacency_list_bits.bitarray", + LayerFileEnum::IndexPropertyAdjacencyListBits + ), + ( + "index_property_adjacency_list_bit_index_blocks.nums", + LayerFileEnum::IndexPropertyAdjacencyListBitIndexBlocks + ), + ( + "index_property_adjacency_list_bit_index_sblocks.nums", + LayerFileEnum::IndexPropertyAdjacencyListBitIndexSBlocks + ), + ( + "index_property_object_nums.logarray", + LayerFileEnum::IndexPropertyObjects + ), ]); } diff --git a/src/storage/file.rs b/src/storage/file.rs index 78e85dee..ed9b4df5 100644 --- a/src/storage/file.rs +++ b/src/storage/file.rs @@ -7,7 +7,9 @@ use tokio::io::{AsyncRead, AsyncWrite, AsyncWriteExt}; use async_trait::async_trait; -use crate::structure::{AdjacencyList, BitIndex}; +use crate::structure::{ + indexed_property::IndexPropertyBuffers, AdjacencyList, AdjacencyListBuffers, BitIndex, +}; #[async_trait] pub trait SyncableFile: AsyncWrite + Unpin + Send { @@ -100,6 +102,7 @@ pub struct BaseLayerFiles { pub s_p_adjacency_list_files: AdjacencyListFiles, pub sp_o_adjacency_list_files: AdjacencyListFiles, + pub index_property_files: IndexPropertyFiles, pub o_ps_adjacency_list_files: AdjacencyListFiles, @@ -123,6 +126,7 @@ pub struct BaseLayerMaps { pub o_ps_adjacency_list_maps: AdjacencyListMaps, pub predicate_wavelet_tree_maps: BitIndexMaps, + pub index_property_maps: Option, } impl BaseLayerFiles { @@ -138,6 +142,7 @@ impl BaseLayerFiles { let s_p_adjacency_list_maps = self.s_p_adjacency_list_files.map_all().await?; let sp_o_adjacency_list_maps = self.sp_o_adjacency_list_files.map_all().await?; + let index_property_maps = self.index_property_files.map_all_if_exists().await?; let o_ps_adjacency_list_maps = self.o_ps_adjacency_list_files.map_all().await?; let predicate_wavelet_tree_maps = self.predicate_wavelet_tree_files.map_all().await?; @@ -154,8 +159,9 @@ impl BaseLayerFiles { s_p_adjacency_list_maps, sp_o_adjacency_list_maps, - o_ps_adjacency_list_maps, + index_property_maps, + o_ps_adjacency_list_maps, predicate_wavelet_tree_maps, }) } @@ -371,6 +377,54 @@ impl DictionaryFiles { } } +#[derive(Clone)] +pub struct IndexPropertyFiles { + pub subjects_logarray_file: F, + pub adjacency_files: AdjacencyListFiles, + pub objects_logarray_file: F, +} + +#[derive(Clone)] +pub struct IndexPropertyMaps { + pub subjects_logarray_map: Bytes, + pub adjacency_maps: AdjacencyListMaps, + pub objects_logarray_map: Bytes, +} + +impl IndexPropertyFiles { + pub async fn map_all_if_exists(&self) -> io::Result> { + if let Some(subjects_logarray_map) = self.subjects_logarray_file.map_if_exists().await? { + Ok(Some(IndexPropertyMaps { + subjects_logarray_map, + adjacency_maps: self.adjacency_files.map_all().await?, + objects_logarray_map: self.objects_logarray_file.map().await?, + })) + } else { + Ok(None) + } + } +} + +// a little silly +impl Into for IndexPropertyMaps { + fn into(self) -> IndexPropertyBuffers { + IndexPropertyBuffers { + subjects_logarray_buf: self.subjects_logarray_map, + adjacency_bufs: self.adjacency_maps.into(), + objects_logarray_buf: self.objects_logarray_map, + } + } +} +impl From for IndexPropertyMaps { + fn from(value: IndexPropertyBuffers) -> Self { + Self { + subjects_logarray_map: value.subjects_logarray_buf, + adjacency_maps: value.adjacency_bufs.into(), + objects_logarray_map: value.objects_logarray_buf, + } + } +} + #[derive(Clone)] pub struct IdMapMaps { pub node_value_idmap_maps: Option, @@ -454,6 +508,30 @@ impl Into for AdjacencyListMaps { } } +impl Into for AdjacencyListMaps { + fn into(self) -> AdjacencyListBuffers { + AdjacencyListBuffers { + nums: self.nums_map, + bits: self.bitindex_maps.bits_map, + bitindex_blocks: self.bitindex_maps.blocks_map, + bitindex_sblocks: self.bitindex_maps.sblocks_map, + } + } +} + +impl From for AdjacencyListMaps { + fn from(value: AdjacencyListBuffers) -> Self { + Self { + nums_map: value.nums, + bitindex_maps: BitIndexMaps { + bits_map: value.bits, + blocks_map: value.bitindex_blocks, + sblocks_map: value.bitindex_sblocks, + }, + } + } +} + #[derive(Clone)] pub struct AdjacencyListFiles { pub bitindex_files: BitIndexFiles, diff --git a/src/storage/layer.rs b/src/storage/layer.rs index fcae9f80..f0728fe7 100644 --- a/src/storage/layer.rs +++ b/src/storage/layer.rs @@ -427,6 +427,12 @@ pub trait PersistentLayerStore: 'static + Send + Sync + Clone { FILENAMES.base_predicate_wavelet_tree_bits, FILENAMES.base_predicate_wavelet_tree_bit_index_blocks, FILENAMES.base_predicate_wavelet_tree_bit_index_sblocks, + FILENAMES.index_property_subjects, + FILENAMES.index_property_adjacency_list_nums, + FILENAMES.index_property_adjacency_list_bits, + FILENAMES.index_property_adjacency_list_bit_index_blocks, + FILENAMES.index_property_adjacency_list_bit_index_sblocks, + FILENAMES.index_property_objects, ]; let mut files = Vec::with_capacity(filenames.len()); @@ -496,6 +502,18 @@ pub trait PersistentLayerStore: 'static + Send + Sync + Clone { blocks_file: files[29].clone(), sblocks_file: files[30].clone(), }, + index_property_files: IndexPropertyFiles { + subjects_logarray_file: files[31].clone(), + adjacency_files: AdjacencyListFiles { + nums_file: files[32].clone(), + bitindex_files: BitIndexFiles { + bits_file: files[33].clone(), + blocks_file: files[34].clone(), + sblocks_file: files[35].clone(), + }, + }, + objects_logarray_file: files[36].clone(), + }, }) } diff --git a/src/structure/adjacencylist.rs b/src/structure/adjacencylist.rs index 6b2abd83..8b4559c1 100644 --- a/src/structure/adjacencylist.rs +++ b/src/structure/adjacencylist.rs @@ -441,10 +441,10 @@ impl AdjacencyListBufBuilder { #[derive(Clone)] pub struct AdjacencyListBuffers { - nums: Bytes, - bits: Bytes, - bitindex_blocks: Bytes, - bitindex_sblocks: Bytes, + pub nums: Bytes, + pub bits: Bytes, + pub bitindex_blocks: Bytes, + pub bitindex_sblocks: Bytes, } pub struct AdjacencyListBuilder diff --git a/src/structure/indexed_property.rs b/src/structure/indexed_property.rs index b4a44f9e..ff46bcaf 100644 --- a/src/structure/indexed_property.rs +++ b/src/structure/indexed_property.rs @@ -5,11 +5,14 @@ use super::{ LogArrayBufBuilder, MonotonicLogArray, }; +// TODO this is a mirror of the file maps we define in file.rs. +// It does make more sense to have them here, as they are not file dependent. +// But we should probably eliminate some of those maps to instead map to types defined with the structurtes instead. #[derive(Clone)] pub struct IndexPropertyBuffers { - subjects_logarray_buf: Bytes, - adjacency_bufs: AdjacencyListBuffers, - objects_logarray_buf: Bytes, + pub subjects_logarray_buf: Bytes, + pub adjacency_bufs: AdjacencyListBuffers, + pub objects_logarray_buf: Bytes, } pub struct IndexedPropertyBuilder { @@ -75,6 +78,7 @@ impl IndexedPropertyBuilder { } } +#[derive(Clone)] pub struct IndexedPropertyCollection { subjects: MonotonicLogArray, adjacencies: AdjacencyList,