diff --git a/docs/changelog/145458.yaml b/docs/changelog/145458.yaml new file mode 100644 index 0000000000000..0e303cc17c57b --- /dev/null +++ b/docs/changelog/145458.yaml @@ -0,0 +1,5 @@ +area: Downsampling +issues: [] +pr: 145458 +summary: Support multi-value dimensions in downsampling (bug fix) +type: bug diff --git a/modules/data-streams/src/main/java/org/elasticsearch/datastreams/DataStreamFeatures.java b/modules/data-streams/src/main/java/org/elasticsearch/datastreams/DataStreamFeatures.java index 38f03557ca9c7..e2e7ef959904a 100644 --- a/modules/data-streams/src/main/java/org/elasticsearch/datastreams/DataStreamFeatures.java +++ b/modules/data-streams/src/main/java/org/elasticsearch/datastreams/DataStreamFeatures.java @@ -26,6 +26,10 @@ public class DataStreamFeatures implements FeatureSpecification { "data_stream.downsample.default_aggregate_metric_fix" ); + public static final NodeFeature DOWNSAMPLE_MULTI_VALUE_DIMENSIONS = new NodeFeature( + "data_stream.downsample.fix_support_multi_value_dimensions" + ); + public static final NodeFeature LOGS_STREAM_FEATURE = new NodeFeature("logs_stream"); public static final NodeFeature FAILURE_STORE_IN_LOG_DATA_STREAMS = new NodeFeature("logs_data_streams.failure_store.enabled"); @@ -41,7 +45,8 @@ public Set getTestFeatures() { DATA_STREAM_FAILURE_STORE_TSDB_FIX, DOWNSAMPLE_AGGREGATE_DEFAULT_METRIC_FIX, LOGS_STREAM_FEATURE, - FAILURE_STORE_IN_LOG_DATA_STREAMS + FAILURE_STORE_IN_LOG_DATA_STREAMS, + DOWNSAMPLE_MULTI_VALUE_DIMENSIONS ); } } diff --git a/x-pack/plugin/downsample/qa/rest/src/yamlRestTest/resources/rest-api-spec/test/downsample/10_basic.yml b/x-pack/plugin/downsample/qa/rest/src/yamlRestTest/resources/rest-api-spec/test/downsample/10_basic.yml index 74e572708cbab..dd33abc36311e 100644 --- a/x-pack/plugin/downsample/qa/rest/src/yamlRestTest/resources/rest-api-spec/test/downsample/10_basic.yml +++ b/x-pack/plugin/downsample/qa/rest/src/yamlRestTest/resources/rest-api-spec/test/downsample/10_basic.yml @@ -1552,7 +1552,6 @@ setup: - match: { hits.hits.0._source.k8s\.pod\.label: "xyz" } - match: { hits.hits.0._source.k8s\.pod\.unmapped: "xyz" } - --- "Downsample label with ignore_above": - requires: @@ -1846,3 +1845,72 @@ setup: - match: { hits.hits.1._source._doc_count: 1 } - match: { hits.hits.1._source.k8s\.pod\.name: cat } - match: { hits.hits.1._source.k8s\.pod\.empty: "" } + +--- +"Downsample index with multi-value dimensions": + - requires: + cluster_features: [ "data_stream.downsample.fix_support_multi_value_dimensions" ] + reason: "Support multi-value dimensions in downsampling (bug fix)" + + - do: + indices.create: + index: test-multi-value + body: + settings: + number_of_shards: 1 + index: + mode: time_series + routing_path: [ metricset ] + time_series: + start_time: 2021-04-28T00:00:00Z + end_time: 2021-04-29T00:00:00Z + mappings: + properties: + "@timestamp": + type: date + metricset: + type: keyword + time_series_dimension: true + multi-counter: + type: long + time_series_metric: counter + + # Insert documents with multiple-value for a dimensions + - do: + bulk: + refresh: true + index: test-multi-value + body: + - '{"index": {}}' + - '{"@timestamp": "2021-04-28T18:50:05.467Z", "metricset": ["pod", "app"], "multi-counter" : [10, 11, 12]}' + - '{"index": {}}' + - '{"@timestamp": "2021-04-28T18:50:27.467Z", "metricset": ["pod", "app"], "multi-counter" : [21, 22, 23]}' + - is_false: errors + + - do: + indices.put_settings: + index: test-multi-value + body: + index.blocks.write: true + + - do: + indices.downsample: + index: test-multi-value + target_index: test-downsample-multi-value + body: > + { + "fixed_interval": "1h", + "sampling_method": "last_value" + } + - is_true: acknowledged + + - do: + search: + index: test-downsample-multi-value + body: + sort: [ "_tsid", "@timestamp" ] + + - length: { hits.hits: 1 } + - match: { hits.hits.0._source._doc_count: 2 } + - match: { hits.hits.0._source.metricset: ["app", "pod"] } + - match: { hits.hits.0._source.multi-counter: 21 } diff --git a/x-pack/plugin/downsample/src/main/java/org/elasticsearch/xpack/downsample/DimensionFieldProducer.java b/x-pack/plugin/downsample/src/main/java/org/elasticsearch/xpack/downsample/DimensionFieldProducer.java index f54629f445687..2a0e7cf81e600 100644 --- a/x-pack/plugin/downsample/src/main/java/org/elasticsearch/xpack/downsample/DimensionFieldProducer.java +++ b/x-pack/plugin/downsample/src/main/java/org/elasticsearch/xpack/downsample/DimensionFieldProducer.java @@ -41,12 +41,9 @@ boolean validate(FormattedDocValues docValues, IntArrayList buffer) throws IOExc for (int i = 0; i < buffer.size(); i++) { int docId = buffer.get(i); if (docValues.advanceExact(docId)) { - int docValueCount = docValues.docValueCount(); - for (int j = 0; j < docValueCount; j++) { - var value = docValues.nextValue(); - assert value.equals(this.lastValue) != false - : "Dimension value changed without tsid change [" + value + "] != [" + this.lastValue + "]"; - } + var value = retrieveDimensionValues(docValues); + assert value.equals(this.lastValue) != false + : "Dimension value changed without tsid change [" + value + "] != [" + this.lastValue + "]"; } } @@ -65,13 +62,26 @@ public void collect(FormattedDocValues docValues, IntArrayList docIdBuffer) thro if (docValues.advanceExact(docId) == false) { continue; } - int docValueCount = docValues.docValueCount(); - for (int j = 0; j < docValueCount; j++) { - collectOnce(docValues.nextValue()); - } + collectOnce(retrieveDimensionValues(docValues)); // Only need to record one dimension value from one document, within in the same tsid-and-time-interval bucket values are the // same. return; } } + + private Object retrieveDimensionValues(FormattedDocValues docValues) throws IOException { + int docValueCount = docValues.docValueCount(); + assert docValueCount > 0; + Object value; + if (docValueCount == 1) { + value = docValues.nextValue(); + } else { + var values = new Object[docValueCount]; + for (int j = 0; j < docValueCount; j++) { + values[j] = docValues.nextValue(); + } + value = values; + } + return value; + } } diff --git a/x-pack/plugin/downsample/src/test/java/org/elasticsearch/xpack/downsample/DimensionFieldProducerTests.java b/x-pack/plugin/downsample/src/test/java/org/elasticsearch/xpack/downsample/DimensionFieldProducerTests.java new file mode 100644 index 0000000000000..e108cfd1fa157 --- /dev/null +++ b/x-pack/plugin/downsample/src/test/java/org/elasticsearch/xpack/downsample/DimensionFieldProducerTests.java @@ -0,0 +1,125 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.downsample; + +import org.apache.lucene.internal.hppc.IntArrayList; +import org.apache.lucene.internal.hppc.IntObjectHashMap; +import org.elasticsearch.index.fielddata.FormattedDocValues; +import org.elasticsearch.test.ESTestCase; + +import java.io.IOException; +import java.util.Arrays; +import java.util.Iterator; + +import static org.hamcrest.Matchers.arrayContainingInAnyOrder; +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.instanceOf; +import static org.hamcrest.Matchers.nullValue; + +public class DimensionFieldProducerTests extends ESTestCase { + + public void testKeywordDimension() throws IOException { + DimensionFieldProducer dimensionProducer = new DimensionFieldProducer(randomAlphanumericOfLength(10)); + assertThat(dimensionProducer.lastValue(), nullValue()); + var docIdBuffer = IntArrayList.from(0, 1, 2); + var values = createValuesInstance(docIdBuffer, new String[] { "aaa", "aaa", "aaa" }); + dimensionProducer.collect(values, docIdBuffer); + assertThat(dimensionProducer.lastValue(), equalTo("aaa")); + dimensionProducer.reset(); + assertThat(dimensionProducer.lastValue(), nullValue()); + } + + public void testDoubleDimension() throws IOException { + DimensionFieldProducer dimensionProducer = new DimensionFieldProducer(randomAlphanumericOfLength(10)); + assertThat(dimensionProducer.lastValue(), nullValue()); + var docIdBuffer = IntArrayList.from(0, 1, 2); + var values = createValuesInstance(docIdBuffer, new Double[] { 10.20D, 10.20D, 10.20D }); + dimensionProducer.collect(values, docIdBuffer); + assertThat(dimensionProducer.lastValue(), equalTo(10.20D)); + dimensionProducer.reset(); + assertThat(dimensionProducer.lastValue(), nullValue()); + } + + public void testIntegerDimension() throws IOException { + DimensionFieldProducer dimensionProducer = new DimensionFieldProducer(randomAlphanumericOfLength(10)); + assertThat(dimensionProducer.lastValue(), nullValue()); + var docIdBuffer = IntArrayList.from(0, 1, 2); + var values = createValuesInstance(docIdBuffer, new Integer[] { 10, 10, 10 }); + dimensionProducer.collect(values, docIdBuffer); + assertThat(dimensionProducer.lastValue(), equalTo(10)); + dimensionProducer.reset(); + assertThat(dimensionProducer.lastValue(), nullValue()); + } + + public void testBooleanDimension() throws IOException { + DimensionFieldProducer dimensionProducer = new DimensionFieldProducer(randomAlphanumericOfLength(10)); + assertThat(dimensionProducer.lastValue(), nullValue()); + var docIdBuffer = IntArrayList.from(0, 1, 2); + var values = createValuesInstance(docIdBuffer, new Boolean[] { true, true, true }); + dimensionProducer.collect(values, docIdBuffer); + assertThat(dimensionProducer.lastValue(), equalTo(true)); + dimensionProducer.reset(); + assertThat(dimensionProducer.lastValue(), nullValue()); + } + + public void testMultiValueDimensions() throws IOException { + var docIdBuffer = IntArrayList.from(0); + Boolean[] multiValue = new Boolean[] { true, false }; + var values = new FormattedDocValues() { + + Iterator iterator = Arrays.stream(multiValue).iterator(); + + @Override + public boolean advanceExact(int docId) { + return true; + } + + @Override + public int docValueCount() { + return 2; + } + + @Override + public Object nextValue() { + return iterator.next(); + } + }; + + values.iterator = Arrays.stream(multiValue).iterator(); + DimensionFieldProducer multiLastValueProducer = new DimensionFieldProducer(randomAlphanumericOfLength(10)); + assertThat(multiLastValueProducer.lastValue(), nullValue()); + multiLastValueProducer.collect(values, docIdBuffer); + assertThat(multiLastValueProducer.lastValue(), instanceOf(Object[].class)); + assertThat((Object[]) multiLastValueProducer.lastValue(), arrayContainingInAnyOrder(true, false)); + } + + static FormattedDocValues createValuesInstance(IntArrayList docIdBuffer, T[] values) { + return new FormattedDocValues() { + + final IntObjectHashMap docIdToValue = IntObjectHashMap.from(docIdBuffer.toArray(), values); + + int currentDocId = -1; + + @Override + public boolean advanceExact(int target) throws IOException { + currentDocId = target; + return docIdToValue.containsKey(target); + } + + @Override + public T nextValue() throws IOException { + return docIdToValue.get(currentDocId); + } + + @Override + public int docValueCount() { + return 1; + } + }; + } +}