diff --git a/core/src/main/java/org/apache/iceberg/puffin/StandardBlobTypes.java b/core/src/main/java/org/apache/iceberg/puffin/StandardBlobTypes.java index ce78375c4b1a..d916fc46e94b 100644 --- a/core/src/main/java/org/apache/iceberg/puffin/StandardBlobTypes.java +++ b/core/src/main/java/org/apache/iceberg/puffin/StandardBlobTypes.java @@ -27,6 +27,12 @@ private StandardBlobTypes() {} */ public static final String APACHE_DATASKETCHES_THETA_V1 = "apache-datasketches-theta-v1"; + /** + * A serialized form of a KLL sketch, a very compact quantiles sketch, produced by the Apache DataSketches library + */ + public static final String APACHE_DATASKETCHES_KLL_SKETCH = "apache-datasketches-kll-v1"; + /** A serialized deletion vector according to the Iceberg spec */ public static final String DV_V1 = "deletion-vector-v1"; } diff --git a/format/puffin-spec.md b/format/puffin-spec.md index 62e8ae085398..da28c4c497bd 100644 --- a/format/puffin-spec.md +++ b/format/puffin-spec.md @@ -181,6 +181,15 @@ for Puffin v1. [roaring-bitmap-portable-serialization]: https://github.com/RoaringBitmap/RoaringFormatSpec?tab=readme-ov-file#extension-for-64-bit-implementations [roaring-bitmap-general-layout]: https://github.com/RoaringBitmap/RoaringFormatSpec?tab=readme-ov-file#general-layout +#### `apache-datasketches-kll-v1` blob type + +A serialized form of a KLL sketch, a very compact quantiles sketch, produced by the +[Apache DataSketches](https://datasketches.apache.org/) library. +KLL quantiles sketch is a mergeable streaming algorithm to estimate +the distribution of values, and approximately answer queries about the rank of a value, +probability mass function of the distribution (PMF) or histogram, +cumulative distribution function (CDF), and quantiles (median, min, max, 95th percentile and such) + ### Compression codecs The data can also be uncompressed. If it is compressed the codec should be one of