Skip to content
Merged
Show file tree
Hide file tree
Changes from 14 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 36 additions & 3 deletions rust/lance-index/src/scalar/bitmap.rs
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ use crate::{metrics::MetricsCollector, Index, IndexType};
use crate::{scalar::expression::ScalarQueryParser, scalar::IndexReader};

pub const BITMAP_LOOKUP_NAME: &str = "bitmap_page_lookup.lance";
pub const INDEX_STATS_METADATA_KEY: &str = "lance:index_stats";

const MAX_BITMAP_ARRAY_LENGTH: usize = i32::MAX as usize - 1024 * 1024; // leave headroom

Expand Down Expand Up @@ -620,6 +621,7 @@ impl BitmapIndexPlugin {
index_store: &dyn IndexStore,
value_type: &DataType,
) -> Result<()> {
let num_bitmaps = state.len();
let schema = Arc::new(Schema::new(vec![
Field::new("keys", value_type.clone(), true),
Field::new("bitmaps", DataType::Binary, true),
Expand Down Expand Up @@ -672,8 +674,17 @@ impl BitmapIndexPlugin {
bitmap_index_file.write_record_batch(record_batch).await?;
}

// Finish file once at the end - this creates the file even if we wrote no batches
bitmap_index_file.finish().await?;
// Finish file with metadata that allows lightweight statistics reads
let stats_json = serde_json::to_string(&BitmapStatistics { num_bitmaps }).map_err(|e| {
Error::Internal {
message: format!("failed to serialize bitmap statistics: {e}"),
location: location!(),
}
})?;
let mut metadata = HashMap::new();
metadata.insert(INDEX_STATS_METADATA_KEY.to_string(), stats_json);

bitmap_index_file.finish_with_metadata(metadata).await?;

Ok(())
}
Expand Down Expand Up @@ -738,6 +749,10 @@ impl ScalarIndexPlugin for BitmapIndexPlugin {
true
}

fn index_type(&self) -> IndexType {
IndexType::Bitmap
}
Comment thread
Xuanwo marked this conversation as resolved.
Outdated

fn version(&self) -> u32 {
BITMAP_INDEX_VERSION
}
Expand Down Expand Up @@ -782,6 +797,23 @@ impl ScalarIndexPlugin for BitmapIndexPlugin {
) -> Result<Arc<dyn ScalarIndex>> {
Ok(BitmapIndex::load(index_store, frag_reuse_index, cache).await? as Arc<dyn ScalarIndex>)
}

async fn load_statistics(
&self,
index_store: Arc<dyn IndexStore>,
_index_details: &prost_types::Any,
) -> Result<Option<serde_json::Value>> {
let reader = index_store.open_index_file(BITMAP_LOOKUP_NAME).await?;
if let Some(value) = reader.schema().metadata.get(INDEX_STATS_METADATA_KEY) {
let stats = serde_json::from_str(value).map_err(|e| Error::Internal {
message: format!("failed to parse bitmap statistics metadata: {e}"),
location: location!(),
})?;
Ok(Some(stats))
} else {
Ok(None)
}
}
}

#[cfg(test)]
Expand All @@ -790,11 +822,12 @@ pub mod tests {
use crate::metrics::NoOpMetricsCollector;
use crate::scalar::lance_format::LanceIndexStore;
use arrow_array::{RecordBatch, StringArray, UInt64Array};
use arrow_schema::{Field, Schema};
use arrow_schema::{DataType, Field, Schema};
use datafusion::physical_plan::stream::RecordBatchStreamAdapter;
use futures::stream;
use lance_core::utils::{address::RowAddress, tempfile::TempObjDir};
use lance_io::object_store::ObjectStore;
use std::collections::HashMap;

#[tokio::test]
async fn test_bitmap_lazy_loading_and_cache() {
Expand Down
12 changes: 12 additions & 0 deletions rust/lance-index/src/scalar/bloomfilter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1285,6 +1285,10 @@ impl ScalarIndexPlugin for BloomFilterIndexPlugin {
false
}

fn index_type(&self) -> IndexType {
IndexType::BloomFilter
}

fn version(&self) -> u32 {
BLOOMFILTER_INDEX_VERSION
}
Expand All @@ -1309,6 +1313,14 @@ impl ScalarIndexPlugin for BloomFilterIndexPlugin {
as Arc<dyn ScalarIndex>,
)
}

async fn load_statistics(
&self,
_index_store: Arc<dyn IndexStore>,
_index_details: &prost_types::Any,
) -> Result<Option<serde_json::Value>> {
Ok(None)
}
}

#[derive(Debug)]
Expand Down
4 changes: 4 additions & 0 deletions rust/lance-index/src/scalar/btree.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1939,6 +1939,10 @@ impl ScalarIndexPlugin for BTreeIndexPlugin {
true
}

fn index_type(&self) -> IndexType {
IndexType::BTree
}

fn version(&self) -> u32 {
BTREE_INDEX_VERSION
}
Expand Down
5 changes: 5 additions & 0 deletions rust/lance-index/src/scalar/inverted.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ use crate::{
registry::{ScalarIndexPlugin, TrainingCriteria, TrainingOrdering, TrainingRequest},
CreatedIndex, ScalarIndex,
},
IndexType,
};

use super::IndexStore;
Expand Down Expand Up @@ -137,6 +138,10 @@ impl ScalarIndexPlugin for InvertedIndexPlugin {
false
}

fn index_type(&self) -> IndexType {
IndexType::Inverted
}

fn version(&self) -> u32 {
INVERTED_INDEX_VERSION
}
Expand Down
4 changes: 4 additions & 0 deletions rust/lance-index/src/scalar/json.rs
Original file line number Diff line number Diff line change
Expand Up @@ -742,6 +742,10 @@ impl ScalarIndexPlugin for JsonIndexPlugin {
true
}

fn index_type(&self) -> IndexType {
IndexType::Scalar
}

fn attach_registry(&self, registry: Arc<IndexPluginRegistry>) {
let mut reg_ref = self.registry.lock().unwrap();
*reg_ref = Some(registry);
Expand Down
4 changes: 4 additions & 0 deletions rust/lance-index/src/scalar/label_list.rs
Original file line number Diff line number Diff line change
Expand Up @@ -386,6 +386,10 @@ impl ScalarIndexPlugin for LabelListIndexPlugin {
true
}

fn index_type(&self) -> IndexType {
IndexType::LabelList
}

fn version(&self) -> u32 {
LABEL_LIST_INDEX_VERSION
}
Expand Down
4 changes: 4 additions & 0 deletions rust/lance-index/src/scalar/ngram.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1278,6 +1278,10 @@ impl ScalarIndexPlugin for NGramIndexPlugin {
false
}

fn index_type(&self) -> IndexType {
IndexType::NGram
}

fn version(&self) -> u32 {
NGRAM_INDEX_VERSION
}
Expand Down
13 changes: 13 additions & 0 deletions rust/lance-index/src/scalar/registry.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ use crate::registry::IndexPluginRegistry;
use crate::{
frag_reuse::FragReuseIndex,
scalar::{expression::ScalarQueryParser, CreatedIndex, IndexStore, ScalarIndex},
IndexType,
};

pub const VALUE_COLUMN_NAME: &str = "value";
Expand Down Expand Up @@ -129,6 +130,9 @@ pub trait ScalarIndexPlugin: Send + Sync + std::fmt::Debug {
/// Returns true if the index returns an exact answer (e.g. not AtMost)
fn provides_exact_answer(&self) -> bool;

/// Returns the index type for this plugin
fn index_type(&self) -> IndexType;
Comment thread
Xuanwo marked this conversation as resolved.
Outdated

/// The version of the index plugin
///
/// We assume that indexes are not forwards compatible. If an index was written with a
Expand Down Expand Up @@ -156,6 +160,15 @@ pub trait ScalarIndexPlugin: Send + Sync + std::fmt::Debug {
cache: &LanceCache,
) -> Result<Arc<dyn ScalarIndex>>;

/// Optional hook allowing a plugin to provide statistics without loading the index.
async fn load_statistics(
&self,
_index_store: Arc<dyn IndexStore>,
_index_details: &prost_types::Any,
) -> Result<Option<serde_json::Value>> {
Ok(None)
}

/// Optional hook that plugins can use if they need to be aware of the registry
fn attach_registry(&self, _registry: Arc<IndexPluginRegistry>) {}

Expand Down
4 changes: 4 additions & 0 deletions rust/lance-index/src/scalar/zonemap.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1023,6 +1023,10 @@ impl ScalarIndexPlugin for ZoneMapIndexPlugin {
false
}

fn index_type(&self) -> IndexType {
IndexType::ZoneMap
}

fn version(&self) -> u32 {
ZONEMAP_INDEX_VERSION
}
Expand Down
Loading
Loading