diff --git a/docs/changelog/140478.yaml b/docs/changelog/140478.yaml new file mode 100644 index 0000000000000..8e9f77e377336 --- /dev/null +++ b/docs/changelog/140478.yaml @@ -0,0 +1,5 @@ +pr: 140478 +summary: T digest field type docs +area: Mapping +type: enhancement +issues: [] diff --git a/docs/reference/elasticsearch/mapping-reference/exponential-histogram.md b/docs/reference/elasticsearch/mapping-reference/exponential-histogram.md index 903cbaaad9d98..48a9250463207 100644 --- a/docs/reference/elasticsearch/mapping-reference/exponential-histogram.md +++ b/docs/reference/elasticsearch/mapping-reference/exponential-histogram.md @@ -59,10 +59,10 @@ Optionally, you can include precomputed summary statistics: When `sum`, `min`, or `max` are omitted, Elasticsearch will estimate these values during indexing. If the histogram is empty (no positive/negative buckets and zero count is `0`), then `sum` must be `0.0` or omitted, and `min` and `max` must be omitted or `null`. -::::{important} +## Limitations + - An `exponential_histogram` field is single-valued: one histogram per field per document. Nested arrays are not supported. - `exponential_histogram` fields are not searchable and do not support sorting. -:::: ## Use cases [exponential-histogram-use-cases] diff --git a/docs/reference/elasticsearch/mapping-reference/field-data-types.md b/docs/reference/elasticsearch/mapping-reference/field-data-types.md index 17a98684f2b7b..c2dc570201d77 100644 --- a/docs/reference/elasticsearch/mapping-reference/field-data-types.md +++ b/docs/reference/elasticsearch/mapping-reference/field-data-types.md @@ -80,6 +80,9 @@ Dates [`exponential_histogram`](/reference/elasticsearch/mapping-reference/exponential-histogram.md) : Pre-aggregated numerical values in the form of an exponential histogram. +[`tdigest`](/reference/elasticsearch/mapping-reference/t-digest.md) {applies_to}`stack: preview 9.3+` {applies_to}`serverless: preview` +: Pre-aggregated numerical values in the form of a T-Digest. + ### Text search types [text-search-types] diff --git a/docs/reference/elasticsearch/mapping-reference/t-digest.md b/docs/reference/elasticsearch/mapping-reference/t-digest.md new file mode 100644 index 0000000000000..410c495f981f1 --- /dev/null +++ b/docs/reference/elasticsearch/mapping-reference/t-digest.md @@ -0,0 +1,123 @@ +--- +applies_to: + stack: preview 9.3 + serverless: preview +navigation_title: "T-digest" +--- + +# T-digest field type [tdigest] + +A field to store pre-aggregated numerical data constructed using the [T-Digest](/reference/aggregations/search-aggregations-metrics-percentile-aggregation.md) algorithm. + +## Structure of a `tdigest` field + +A `tdigest` field requires two arrays: + +* A `centroids` array of + [`double`](/reference/elasticsearch/mapping-reference/number.md), containing + the computed centroids. These must be provided in ascending order. +* A `counts` array of + [`long`](/reference/elasticsearch/mapping-reference/number.md), containing the + computed counts for each of the centroids. This must be the same length as + the `centroids` array + +The field also accepts three optional summary fields: + +* `sum`, a [`double`](/reference/elasticsearch/mapping-reference/number.md), + representing the sum of the values being summarized by the t-digest +* `min`, a [`double`](/reference/elasticsearch/mapping-reference/number.md), + representing the minimum of the values being summarized by the t-digest +* `max`, a [`double`](/reference/elasticsearch/mapping-reference/number.md), + representing the maximum of the values being summarized by the t-digest + +Specifying the summary values enables them to be calculated with +higher accuracy from the raw data. If not specified, Elasticsearch +computes them based on the given `centroids` and `counts`, with some loss of +accuracy. + +## Limitations + +* A `tdigest` field can only store a single sketch per document. Multi-values or nested arrays are not supported. +* `tdigest` fields do not support sorting and are not searchable. + + +## Configuring T-Digest Fields + +T-Digest fields accept two field-specific configuration parameters: + +* `compression`, a + [`double`](/reference/elasticsearch/mapping-reference/number.md) between `0` and + `10000` (excluding `0`), which corresponds to the parameter of the same name in + the [T-Digest](/reference/aggregations/search-aggregations-metrics-percentile-aggregation.md) algorithm. + In general, the higher this number, the more space on disk the field will use + but the more accurate the sketch approximations will be. Default is `100` +* `digest_type`, which selects the merge strategy to use with the sketch. Valid + values are `default` and `high_accuracy`. The default is `default`. The + `default` is optimized for storage and performance, while still producing a + good approximation. The `high_accuracy` variant uses more memory, disk, and + CPU for a better approximation. + +## Use cases [tdigest-use-cases] + +`tdigest` fields are primarily intended for use with aggregations. To make them +efficient for aggregations, the data are stored as compact [doc +values](/reference/elasticsearch/mapping-reference/doc-values.md) and not +indexed. + +`tdigest` fields are supported in the following [ES|QL](/reference/query-languages/esql.md) aggregation functions: + +* [Avg](/reference/query-languages/esql/functions-operators/aggregation-functions.md#esql-avg) +* [Max](/reference/query-languages/esql/functions-operators/aggregation-functions.md#esql-max) + and + [Min](/reference/query-languages/esql/functions-operators/aggregation-functions.md#esql-min) +* [Percentile](/reference/query-languages/esql/functions-operators/aggregation-functions.md#esql-percentile) +* [Present](/reference/query-languages/esql/functions-operators/aggregation-functions.md#esql-present) and + [Absent](/reference/query-languages/esql/functions-operators/aggregation-functions.md#esql-absent) + + +## Synthetic `_source` [tdigest-synthetic-source] + +`tdigest` fields support [synthetic `_source`](/reference/elasticsearch/mapping-reference/mapping-source-field.md#synthetic-source) in their default configuration. + +::::{note} +To save space, zero-count buckets are not stored in `tdigest` doc values. If you index a `tdigest` field with zero-count buckets and synthetic `_source` is enabled, those buckets won't appear when you retrieve the field. +:::: + +## Examples + +### Create an index with a `tdigest` field + +```console +PUT my-index-000001 +{ + "mappings": { + "properties": { + "latency": { + "type": "tdigest" + } + } + } +} +``` + +### Index a simple document + +```console +PUT my-index-000001/_doc/1 +{ + "latency": { + "centroids": [0.1, 0.2, 0.3, 0.4, 0.5], + "counts": [3, 7, 23, 12, 6] + } +} +``` + +### Query via ES|QL + +```console +POST /_query?format=txt +{ + "query": "FROM test | STATS Percentile(99, latency)" +} + + diff --git a/docs/reference/elasticsearch/toc.yml b/docs/reference/elasticsearch/toc.yml index b7744af86d017..5fae1f0831985 100644 --- a/docs/reference/elasticsearch/toc.yml +++ b/docs/reference/elasticsearch/toc.yml @@ -159,6 +159,7 @@ toc: - file: mapping-reference/geo-shape.md - file: mapping-reference/histogram.md - file: mapping-reference/exponential-histogram.md + - file: mapping-reference/t-digest.md - file: mapping-reference/ip.md - file: mapping-reference/parent-join.md - file: mapping-reference/keyword.md diff --git a/docs/reference/query-languages/esql/limitations.md b/docs/reference/query-languages/esql/limitations.md index 9d9bff94f2d44..bdb07de5aa2bd 100644 --- a/docs/reference/query-languages/esql/limitations.md +++ b/docs/reference/query-languages/esql/limitations.md @@ -52,6 +52,7 @@ By default, an {{esql}} query returns up to 1,000 rows. You can increase the num * `gauge` * `aggregate_metric_double` * `exponential_histogram` {applies_to}`stack: preview 9.3+` {applies_to}`serverless: preview` + * `tdigest` {applies_to}`stack: preview 9.3+` {applies_to}`serverless: preview` ### Unsupported types [_unsupported_types] diff --git a/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/mapper/TDigestFieldMapper.java b/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/mapper/TDigestFieldMapper.java index 039b22f9a95c6..c57c4e1bddc79 100644 --- a/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/mapper/TDigestFieldMapper.java +++ b/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/mapper/TDigestFieldMapper.java @@ -51,7 +51,7 @@ import org.elasticsearch.script.field.DocValuesScriptFieldFactory; import org.elasticsearch.search.DocValueFormat; import org.elasticsearch.search.MultiValueMode; -import org.elasticsearch.search.aggregations.metrics.TDigestState; +import org.elasticsearch.search.aggregations.metrics.TDigestExecutionHint; import org.elasticsearch.search.sort.BucketedSort; import org.elasticsearch.search.sort.SortOrder; import org.elasticsearch.tdigest.parsing.TDigestParser; @@ -89,7 +89,7 @@ public static class Builder extends FieldMapper.Builder { private final Parameter> meta = Parameter.metaParam(); private final Parameter> ignoreMalformed; - private final Parameter digestType; + private final Parameter digestType; private final Parameter compression; public Builder(String name, boolean ignoreMalformedByDefault) { @@ -104,8 +104,8 @@ public Builder(String name, boolean ignoreMalformedByDefault) { "digest_type", false, m -> toType(m).digestType, - TDigestState.Type.HYBRID, - TDigestState.Type.class + TDigestExecutionHint.DEFAULT, + TDigestExecutionHint.class ); this.compression = new Parameter<>( "compression", @@ -147,7 +147,7 @@ public TDigestFieldMapper build(MapperBuilderContext context) { private final Explicit ignoreMalformed; private final boolean ignoreMalformedByDefault; - private final TDigestState.Type digestType; + private final TDigestExecutionHint digestType; private final double compression; public TDigestFieldMapper(String simpleName, MappedFieldType mappedFieldType, BuilderParams builderParams, Builder builder) { @@ -163,7 +163,7 @@ public boolean ignoreMalformed() { return ignoreMalformed.value(); } - public TDigestState.Type digestType() { + public TDigestExecutionHint digestType() { return digestType; } diff --git a/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/TDigestFieldBlockLoaderTests.java b/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/TDigestFieldBlockLoaderTests.java index f0b7615f5141e..19039162f0587 100644 --- a/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/TDigestFieldBlockLoaderTests.java +++ b/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/TDigestFieldBlockLoaderTests.java @@ -14,7 +14,7 @@ import org.elasticsearch.datageneration.datasource.DataSourceResponse; import org.elasticsearch.index.mapper.BlockLoaderTestCase; import org.elasticsearch.plugins.Plugin; -import org.elasticsearch.search.aggregations.metrics.TDigestState; +import org.elasticsearch.search.aggregations.metrics.TDigestExecutionHint; import org.elasticsearch.test.ESTestCase; import org.elasticsearch.xpack.analytics.AnalyticsPlugin; @@ -55,7 +55,7 @@ public DataSourceResponse.LeafMappingParametersGenerator handle(DataSourceReques if (ESTestCase.randomBoolean()) { map.put("ignore_malformed", ESTestCase.randomBoolean()); map.put("compression", randomDoubleBetween(1.0, 1000.0, true)); - map.put("digest_type", randomFrom(TDigestState.Type.values())); + map.put("digest_type", randomFrom(TDigestExecutionHint.values())); } return map; }); diff --git a/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/TDigestFieldMapperTests.java b/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/TDigestFieldMapperTests.java index c72a6e4a84bfa..e84d10e88f461 100644 --- a/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/TDigestFieldMapperTests.java +++ b/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/TDigestFieldMapperTests.java @@ -16,6 +16,7 @@ import org.elasticsearch.index.mapper.ParsedDocument; import org.elasticsearch.index.mapper.SourceToParse; import org.elasticsearch.plugins.Plugin; +import org.elasticsearch.search.aggregations.metrics.TDigestExecutionHint; import org.elasticsearch.search.aggregations.metrics.TDigestState; import org.elasticsearch.tdigest.Centroid; import org.elasticsearch.xcontent.XContentBuilder; @@ -59,7 +60,7 @@ protected void minimalMapping(XContentBuilder b) throws IOException { @Override protected void registerParameters(ParameterChecker checker) throws IOException { checker.registerUpdateCheck(b -> b.field("ignore_malformed", true), m -> assertTrue(m.ignoreMalformed())); - checker.registerConflictCheck("digest_type", b -> b.field("digest_type", TDigestState.Type.AVL_TREE)); + checker.registerConflictCheck("digest_type", b -> b.field("digest_type", TDigestExecutionHint.HIGH_ACCURACY)); checker.registerConflictCheck("compression", b -> b.field("compression", 117)); }