Skip to content

Commit

Permalink
fix: add rustdoc comments + move examples to root
Browse files Browse the repository at this point in the history
  • Loading branch information
Max-Meldrum committed Aug 20, 2024
1 parent 42290e9 commit 954d727
Show file tree
Hide file tree
Showing 8 changed files with 389 additions and 116 deletions.
4 changes: 3 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[workspace]
resolver = "2"
members = ["benchmarks/nyc_taxi_bench", "datafusion-uwheel"]
members = ["benchmarks/nyc_taxi_bench", "datafusion-uwheel", "examples/*"]

[workspace.package]
version = "40.0.0"
Expand All @@ -19,7 +19,9 @@ uwheel = { version = "0.2.0", default-features = false, features = [
"max",
"all",
] }
datafusion-uwheel = { path = "datafusion-uwheel", version = "40.0.0" }
datafusion = "40.0.0"
chrono = "0.4.38"
bitpacking = "0.9.2"
tokio = "1.38.1"
human_bytes = "0.4.3"
4 changes: 2 additions & 2 deletions benchmarks/nyc_taxi_bench/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ async fn main() -> Result<()> {
.build_index(
IndexBuilder::with_col_and_aggregate(
"fare_amount",
datafusion_uwheel::AggregateType::Sum,
datafusion_uwheel::UWheelAggregate::Sum,
)
.with_time_range(
ScalarValue::Utf8(Some("2022-01-01T00:00:00Z".to_string())),
Expand All @@ -108,7 +108,7 @@ async fn main() -> Result<()> {
.build_index(
IndexBuilder::with_col_and_aggregate(
"fare_amount",
datafusion_uwheel::AggregateType::Sum,
datafusion_uwheel::UWheelAggregate::Sum,
)
.with_filter(col("passenger_count").eq(lit(3.0)))
.with_time_range(
Expand Down
161 changes: 150 additions & 11 deletions datafusion-uwheel/src/builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,54 @@ use std::sync::Arc;
use uwheel::{wheels::read::aggregation::conf::WheelMode, HawConf};

/// Builder for creating a UWheelOptimizer
#[allow(dead_code)]
///
/// This struct provides an interface for configuring and creating a UWheelOptimizer.
///
/// # Examples
///
/// ```
/// use datafusion_uwheel::builder::Builder;
/// use datafusion::prelude::*;
/// use datafusion::common::ScalarValue;
/// use datafusion::common::arrow::datatypes::{Schema, DataType, Field};
/// use datafusion::common::arrow::array::{RecordBatch, Date32Array, Float32Array};
/// use datafusion::datasource::MemTable;
/// use std::sync::Arc;
///
/// async fn create_mem_table() -> MemTable {
/// let schema = Arc::new(Schema::new(vec![
/// Field::new("timestamp", DataType::Date32, false),
/// Field::new("temperature", DataType::Float32, false),
/// Field::new("humidity", DataType::Float32, false),
/// ]));
///
/// let data = RecordBatch::try_new(
/// schema.clone(),
/// vec![
/// Arc::new(Date32Array::from(vec![19000, 19001, 19002])),
/// Arc::new(Float32Array::from(vec![20.5, 21.0, 22.3])),
/// Arc::new(Float32Array::from(vec![0.5, 0.6, 0.7])),
/// ],
/// ).unwrap();
///
/// MemTable::try_new(schema, vec![vec![data]]).unwrap()
/// }
///
/// async fn create_optimizer() {
/// let mem_table = create_mem_table().await;
/// let optimizer = Builder::new("timestamp")
/// .with_name("my_table")
/// .with_min_max_wheels(vec!["temperature", "humidity"])
/// .with_time_range(
/// ScalarValue::Date32(Some(19000)),
/// ScalarValue::Date32(Some(19100))
/// )
/// .unwrap()
/// .build_with_provider(Arc::new(mem_table))
/// .await
/// .unwrap();
/// }
/// ```
pub struct Builder {
/// Name of the table
name: String,
Expand All @@ -24,6 +71,18 @@ pub struct Builder {

impl Builder {
/// Create a new UWheelOptimizer builder
///
/// # Arguments
///
/// * `time_column` - The name of the column that represents time in the dataset
///
/// # Examples
///
/// ```
/// use datafusion_uwheel::builder::Builder;
///
/// let builder = Builder::new("timestamp");
/// ```
pub fn new(time_column: impl Into<String>) -> Self {
Self {
name: "".to_string(),
Expand All @@ -33,44 +92,88 @@ impl Builder {
time_range: None,
}
}
// helper method to create a default Haw configuration

/// Create a default Haw configuration
///
/// This method sets up a HawConf with Index mode and Keep retention policy for all time dimensions.
fn default_haw_conf() -> HawConf {
// configure Index mode
let mut conf = HawConf::default().with_mode(WheelMode::Index);
// set the retention policy to keep all data on all time dimensions

conf.seconds
.set_retention_policy(uwheel::RetentionPolicy::Keep);

conf.minutes
.set_retention_policy(uwheel::RetentionPolicy::Keep);

conf.hours
.set_retention_policy(uwheel::RetentionPolicy::Keep);

conf.days
.set_retention_policy(uwheel::RetentionPolicy::Keep);

conf.weeks
.set_retention_policy(uwheel::RetentionPolicy::Keep);
conf
}

/// Set the name of the table
///
/// # Arguments
///
/// * `name` - The name to be assigned to the table
///
/// # Examples
///
/// ```
/// use datafusion_uwheel::builder::Builder;
///
/// let builder = Builder::new("timestamp").with_name("my_table");
/// ```
pub fn with_name(mut self, name: impl Into<String>) -> Self {
self.name = name.into();
self
}

/// Set the Haw configuration to use when building wheels
///
/// # Arguments
///
/// * `conf` - The HawConf to be used
///
/// # Examples
///
/// ```
/// use datafusion_uwheel::builder::Builder;
/// use uwheel::HawConf;
///
/// let custom_conf = HawConf::default();
/// let builder = Builder::new("timestamp").with_haw_conf(custom_conf);
/// ```
pub fn with_haw_conf(mut self, conf: HawConf) -> Self {
self.wheel_conf = conf;
self
}

/// Applies a time range when building the index
///
/// Input must be a ScalarValue of type Date32, Date64 or Timestamp
/// # Arguments
///
/// * `start` - The start of the time range (inclusive)
/// * `end` - The end of the time range (inclusive)
///
/// # Returns
///
/// * `Result<Self, DataFusionError>` - Ok if the time range is valid, Err otherwise
///
/// # Examples
///
/// ```
/// use datafusion_uwheel::builder::Builder;
/// use datafusion::prelude::*;
/// use datafusion::common::ScalarValue;
///
/// let builder = Builder::new("timestamp")
/// .with_time_range(
/// ScalarValue::Date32(Some(19000)),
/// ScalarValue::Date32(Some(19100))
/// )
/// .unwrap();
/// ```
pub fn with_time_range(
mut self,
start: ScalarValue,
Expand All @@ -89,13 +192,49 @@ impl Builder {

/// Columns to build min/max wheels for
///
/// Columns must be of numeric data types
/// # Arguments
///
/// * `columns` - A vector of column names to build min/max wheels for
///
/// # Examples
///
/// ```
/// use datafusion_uwheel::builder::Builder;
///
/// let builder = Builder::new("timestamp")
/// .with_min_max_wheels(vec!["temperature", "humidity"]);
/// ```
pub fn with_min_max_wheels(mut self, columns: Vec<&str>) -> Self {
self.min_max_columns = columns.iter().map(|s| s.to_string()).collect();
self
}

/// Builds the UWheelOptimizer using the provided TableProvider
///
/// # Arguments
///
/// * `provider` - The TableProvider to build the UWheelOptimizer from
///
/// # Returns
///
/// * `Result<UWheelOptimizer>` - The built UWheelOptimizer if successful
///
/// # Examples
///
/// ```
/// use datafusion_uwheel::builder::Builder;
/// use std::sync::Arc;
/// use datafusion::datasource::TableProvider;
/// use datafusion::error::Result;
/// use datafusion_uwheel::UWheelOptimizer;
///
/// async fn build_optimizer(provider: Arc<dyn TableProvider>) -> Result<UWheelOptimizer> {
/// Builder::new("timestamp")
/// .with_name("my_table")
/// .build_with_provider(provider)
/// .await
/// }
/// ```
pub async fn build_with_provider(
self,
provider: Arc<dyn TableProvider>,
Expand Down
Loading

0 comments on commit 954d727

Please sign in to comment.