-
Notifications
You must be signed in to change notification settings - Fork 147
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[FEAT] Approximate quantile aggregation (pulled into main) (#2179)
Puts the finishing touches on #2076 --------- Co-authored-by: Maxime Petitjean <[email protected]> Co-authored-by: Jay Chia <[email protected]@users.noreply.github.com>
- Loading branch information
1 parent
2e90b70
commit 99a0ac0
Showing
30 changed files
with
1,021 additions
and
26 deletions.
There are no files selected for viewing
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,109 @@ | ||
use super::as_arrow::AsArrow; | ||
use super::from_arrow::FromArrow; | ||
use super::DaftApproxSketchAggable; | ||
use crate::array::ops::GroupIndices; | ||
use crate::{array::StructArray, datatypes::*}; | ||
use arrow2::array::Array; | ||
use common_error::DaftResult; | ||
use sketches_ddsketch::{Config, DDSketch}; | ||
|
||
impl DaftApproxSketchAggable for &DataArray<Float64Type> { | ||
type Output = DaftResult<StructArray>; | ||
|
||
fn approx_sketch(&self) -> Self::Output { | ||
let primitive_arr = self.as_arrow(); | ||
let arrow_array = if primitive_arr.is_empty() { | ||
daft_sketch::into_arrow2(vec![]) | ||
} else if primitive_arr.null_count() > 0 { | ||
let sketch = primitive_arr | ||
.iter() | ||
.fold(None, |acc, value| match (acc, value) { | ||
(acc, None) => acc, | ||
(None, Some(v)) => { | ||
let mut sketch = DDSketch::new(Config::defaults()); | ||
sketch.add(*v); | ||
Some(sketch) | ||
} | ||
(Some(mut acc), Some(v)) => { | ||
acc.add(*v); | ||
Some(acc) | ||
} | ||
}); | ||
daft_sketch::into_arrow2(vec![sketch]) | ||
} else { | ||
let sketch = primitive_arr.values_iter().fold( | ||
DDSketch::new(Config::defaults()), | ||
|mut acc, value| { | ||
acc.add(*value); | ||
acc | ||
}, | ||
); | ||
|
||
daft_sketch::into_arrow2(vec![Some(sketch)]) | ||
}; | ||
|
||
StructArray::from_arrow( | ||
Field::new( | ||
&self.field.name, | ||
DataType::from(&*daft_sketch::ARROW2_DDSKETCH_DTYPE), | ||
) | ||
.into(), | ||
arrow_array, | ||
) | ||
} | ||
|
||
fn grouped_approx_sketch(&self, groups: &GroupIndices) -> Self::Output { | ||
let arrow_array = self.as_arrow(); | ||
let sketch_per_group = if arrow_array.is_empty() { | ||
daft_sketch::into_arrow2(vec![]) | ||
} else if arrow_array.null_count() > 0 { | ||
let sketches: Vec<Option<DDSketch>> = groups | ||
.iter() | ||
.map(|g| { | ||
g.iter().fold(None, |acc, index| { | ||
let idx = *index as usize; | ||
match (acc, arrow_array.is_null(idx)) { | ||
(acc, true) => acc, | ||
(None, false) => { | ||
let mut sketch = DDSketch::new(Config::defaults()); | ||
sketch.add(arrow_array.value(idx)); | ||
Some(sketch) | ||
} | ||
(Some(mut acc), false) => { | ||
acc.add(arrow_array.value(idx)); | ||
Some(acc) | ||
} | ||
} | ||
}) | ||
}) | ||
.collect(); | ||
|
||
daft_sketch::into_arrow2(sketches) | ||
} else { | ||
let sketches = groups | ||
.iter() | ||
.map(|g| { | ||
Some( | ||
g.iter() | ||
.fold(DDSketch::new(Config::defaults()), |mut acc, index| { | ||
let idx = *index as usize; | ||
acc.add(arrow_array.value(idx)); | ||
acc | ||
}), | ||
) | ||
}) | ||
.collect(); | ||
|
||
daft_sketch::into_arrow2(sketches) | ||
}; | ||
|
||
StructArray::from_arrow( | ||
Field::new( | ||
&self.field.name, | ||
DataType::from(&*daft_sketch::ARROW2_DDSKETCH_DTYPE), | ||
) | ||
.into(), | ||
sketch_per_group, | ||
) | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,74 @@ | ||
use super::from_arrow::FromArrow; | ||
use super::DaftMergeSketchAggable; | ||
use crate::array::ops::GroupIndices; | ||
use crate::{array::StructArray, datatypes::*}; | ||
use common_error::{DaftError, DaftResult}; | ||
|
||
impl DaftMergeSketchAggable for &StructArray { | ||
type Output = DaftResult<StructArray>; | ||
|
||
fn merge_sketch(&self) -> Self::Output { | ||
let sketches_array = daft_sketch::from_arrow2(self.to_arrow())?; | ||
let sketch = | ||
sketches_array | ||
.into_iter() | ||
.try_fold(None, |acc, value| match (acc, value) { | ||
(acc, None) => Ok::<_, DaftError>(acc), | ||
(None, Some(v)) => Ok(Some(v)), | ||
(Some(mut acc), Some(v)) => { | ||
acc.merge(&v).map_err(|err| { | ||
DaftError::ComputeError(format!("Error merging sketches: {}", err)) | ||
})?; | ||
Ok(Some(acc)) | ||
} | ||
})?; | ||
let arrow_array = daft_sketch::into_arrow2(vec![sketch]); | ||
|
||
StructArray::from_arrow( | ||
Field::new( | ||
&self.field.name, | ||
DataType::from(&*daft_sketch::ARROW2_DDSKETCH_DTYPE), | ||
) | ||
.into(), | ||
arrow_array, | ||
) | ||
} | ||
|
||
fn grouped_merge_sketch(&self, groups: &GroupIndices) -> Self::Output { | ||
let sketches_array = daft_sketch::from_arrow2(self.to_arrow())?; | ||
|
||
let sketch_per_group = groups | ||
.iter() | ||
.map(|g| { | ||
g.iter().try_fold(None, |acc, index| { | ||
let idx = *index as usize; | ||
match (acc, sketches_array[idx].is_none()) { | ||
(acc, true) => Ok::<_, DaftError>(acc), | ||
(None, false) => Ok(sketches_array[idx].clone()), | ||
(Some(mut acc), false) => { | ||
acc.merge(sketches_array[idx].as_ref().unwrap()) | ||
.map_err(|err| { | ||
DaftError::ComputeError(format!( | ||
"Error merging sketches: {}", | ||
err | ||
)) | ||
})?; | ||
Ok(Some(acc)) | ||
} | ||
} | ||
}) | ||
}) | ||
.collect::<DaftResult<Vec<_>>>()?; | ||
|
||
let arrow_array = daft_sketch::into_arrow2(sketch_per_group); | ||
|
||
StructArray::from_arrow( | ||
Field::new( | ||
&self.field.name, | ||
DataType::from(&*daft_sketch::ARROW2_DDSKETCH_DTYPE), | ||
) | ||
.into(), | ||
arrow_array, | ||
) | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.