diff --git a/modules/data-streams/src/internalClusterTest/java/org/elasticsearch/datastreams/TSDBSyntheticIdsIT.java b/modules/data-streams/src/internalClusterTest/java/org/elasticsearch/datastreams/TSDBSyntheticIdsIT.java index c97589c5ab93d..9d98267009832 100644 --- a/modules/data-streams/src/internalClusterTest/java/org/elasticsearch/datastreams/TSDBSyntheticIdsIT.java +++ b/modules/data-streams/src/internalClusterTest/java/org/elasticsearch/datastreams/TSDBSyntheticIdsIT.java @@ -1221,6 +1221,9 @@ private static void putDataStreamTemplate(String indexPattern, int primaries, in } else if (rarely()) { settings.put(IndexSettings.INDEX_MAPPER_SOURCE_MODE_SETTING.getKey(), SourceFieldMapper.Mode.STORED); } + if (rarely()) { + settings.put(IndexSettings.USE_DOC_VALUES_SKIPPER.getKey(), false); + } settings.put(extraSettings); final var mappings = """ diff --git a/server/src/main/java/org/elasticsearch/index/codec/tsdb/TSDBSyntheticIdDocValuesHolder.java b/server/src/main/java/org/elasticsearch/index/codec/tsdb/TSDBSyntheticIdDocValuesHolder.java index e1e289b94efdc..ce3ea494b8960 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/tsdb/TSDBSyntheticIdDocValuesHolder.java +++ b/server/src/main/java/org/elasticsearch/index/codec/tsdb/TSDBSyntheticIdDocValuesHolder.java @@ -10,6 +10,7 @@ package org.elasticsearch.index.codec.tsdb; import org.apache.lucene.codecs.DocValuesProducer; +import org.apache.lucene.index.DocValuesSkipIndexType; import org.apache.lucene.index.DocValuesSkipper; import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.FieldInfos; @@ -17,6 +18,7 @@ import org.apache.lucene.index.SortedNumericDocValues; import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.util.BytesRef; +import org.elasticsearch.core.Nullable; import org.elasticsearch.index.mapper.TimeSeriesRoutingHashFieldMapper; import org.elasticsearch.index.mapper.TsidExtractingIdFieldMapper; import org.elasticsearch.index.mapper.Uid; @@ -39,6 +41,8 @@ class TSDBSyntheticIdDocValuesHolder { private final FieldInfo timestampFieldInfo; private final FieldInfo routingHashFieldInfo; private final DocValuesProducer docValuesProducer; + private final boolean hasTsIdSkipper; + private final boolean hasTimestampSkipper; private SortedNumericDocValues timestampDocValues; // sorted desc. order private SortedDocValues routingHashDocValues; // sorted asc. order @@ -52,6 +56,8 @@ class TSDBSyntheticIdDocValuesHolder { this.timestampFieldInfo = safeFieldInfo(fieldInfos, TSDBSyntheticIdPostingsFormat.TIMESTAMP); this.routingHashFieldInfo = safeFieldInfo(fieldInfos, TSDBSyntheticIdPostingsFormat.TS_ROUTING_HASH); this.docValuesProducer = docValuesProducer; + this.hasTsIdSkipper = tsIdFieldInfo.docValuesSkipIndexType() != DocValuesSkipIndexType.NONE; + this.hasTimestampSkipper = timestampFieldInfo.docValuesSkipIndexType() != DocValuesSkipIndexType.NONE; } private FieldInfo safeFieldInfo(FieldInfos fieldInfos, String fieldName) { @@ -178,6 +184,9 @@ BytesRef lookupTsIdOrd(int tsIdOrdinal) throws IOException { * @throws IOException if any I/O exception occurs */ private int findStartDocIDForTsIdOrd(int tsIdOrd) throws IOException { + if (hasTsIdSkipper == false) { + return 0; + } var skipper = docValuesProducer.getSkipper(tsIdFieldInfo); assert skipper != null; if (skipper.minValue() > tsIdOrd || tsIdOrd > skipper.maxValue()) { @@ -265,7 +274,11 @@ int findFirstDocWithTsIdOrdinalEqualTo(int tsIdOrd) throws IOException { return DocIdSetIterator.NO_MORE_DOCS; } + @Nullable DocValuesSkipper docValuesSkipperForTimestamp() throws IOException { + if (hasTimestampSkipper == false) { + return null; + } var skipper = docValuesProducer.getSkipper(timestampFieldInfo); assert skipper != null; return skipper; diff --git a/server/src/main/java/org/elasticsearch/index/codec/tsdb/TSDBSyntheticIdFieldsProducer.java b/server/src/main/java/org/elasticsearch/index/codec/tsdb/TSDBSyntheticIdFieldsProducer.java index 5cd8771f5ee8f..c4ea2553283b3 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/tsdb/TSDBSyntheticIdFieldsProducer.java +++ b/server/src/main/java/org/elasticsearch/index/codec/tsdb/TSDBSyntheticIdFieldsProducer.java @@ -232,40 +232,44 @@ public SeekStatus seekCeil(BytesRef id) throws IOException { final long timestamp = TsidExtractingIdFieldMapper.extractTimestampFromSyntheticId(id); // Use doc values skipper on timestamp to early exit or skip to the first document matching the timestamp + int nextDocID; var skipper = docValues.docValuesSkipperForTimestamp(); - if (timestamp > skipper.maxValue()) { - // timestamp is greater than the global maximum value in the segment, so the first docID matching the _tsid is guaranteed to - // have a smaller timestamp that the one we're looking for and we can early exit at the current docID position. Note that - // synthetic ids are generated so that the resulting array of bytes has a natural order that reflects the order of docs in - // the segment (_tsid asc then @timestamp desc). So if timestamp > skipper.maxValue(), it means that the next doc has a - // @timestamp smaller than what we're looking for. - docID = firstDocID; - docTsIdOrd = tsIdOrd; - docTimestamp = null; - return SeekStatus.NOT_FOUND; - } - if (skipper.minValue() > timestamp) { - // timestamp is smaller than the global minimum value in the segment, so no docs matching the _tsid will also match the - // timestamp, so we can early exit at the position of the next _tsid (if there is such one). - int nextDocTsIdOrd = tsIdOrd + 1; - if (nextDocTsIdOrd < docValues.getTsIdValueCount()) { - docID = docValues.findFirstDocWithTsIdOrdinalEqualTo(nextDocTsIdOrd); - docTsIdOrd = nextDocTsIdOrd; + if (skipper != null) { + if (timestamp > skipper.maxValue()) { + // timestamp is greater than the global maximum value in the segment, so the first docID matching the _tsid is + // guaranteed to have a smaller timestamp that the one we're looking for and we can early exit at the current docID + // position. Note that synthetic ids are generated so that the resulting array of bytes has a natural order that + // reflects the order of docs in the segment (_tsid asc then @timestamp desc). So if timestamp > skipper.maxValue(), + // it means that the next doc has a @timestamp smaller than what we're looking for. + docID = firstDocID; + docTsIdOrd = tsIdOrd; docTimestamp = null; return SeekStatus.NOT_FOUND; } - // no docs/terms to iterate on - resetDocID(DocIdSetIterator.NO_MORE_DOCS); - return SeekStatus.END; - } - skipper.advance(firstDocID); - skipper.advance(timestamp, Long.MAX_VALUE); - - int nextDocID; - if (skipper.minDocID(0) != DocIdSetIterator.NO_MORE_DOCS) { - nextDocID = Math.max(firstDocID, skipper.minDocID(0)); + if (skipper.minValue() > timestamp) { + // timestamp is smaller than the global minimum value in the segment, so no docs matching the _tsid will also match + // the timestamp, so we can early exit at the position of the next _tsid (if there is such one). + int nextDocTsIdOrd = tsIdOrd + 1; + if (nextDocTsIdOrd < docValues.getTsIdValueCount()) { + docID = docValues.findFirstDocWithTsIdOrdinalEqualTo(nextDocTsIdOrd); + docTsIdOrd = nextDocTsIdOrd; + docTimestamp = null; + return SeekStatus.NOT_FOUND; + } + // no docs/terms to iterate on + resetDocID(DocIdSetIterator.NO_MORE_DOCS); + return SeekStatus.END; + } + skipper.advance(firstDocID); + skipper.advance(timestamp, Long.MAX_VALUE); + + if (skipper.minDocID(0) != DocIdSetIterator.NO_MORE_DOCS) { + nextDocID = Math.max(firstDocID, skipper.minDocID(0)); + } else { + // we exhausted the doc values skipper, scan all docs from first doc matching _tsid + nextDocID = firstDocID; + } } else { - // we exhausted the doc values skipper, scan all docs from first doc matching _tsid nextDocID = firstDocID; } int nextDocTsIdOrd = tsIdOrd; diff --git a/server/src/test/java/org/elasticsearch/index/codec/tsdb/TSDBSyntheticIdPostingsFormatTests.java b/server/src/test/java/org/elasticsearch/index/codec/tsdb/TSDBSyntheticIdPostingsFormatTests.java index b95cc8648cf0b..bba3b4591e143 100644 --- a/server/src/test/java/org/elasticsearch/index/codec/tsdb/TSDBSyntheticIdPostingsFormatTests.java +++ b/server/src/test/java/org/elasticsearch/index/codec/tsdb/TSDBSyntheticIdPostingsFormatTests.java @@ -503,41 +503,36 @@ private static void runTest(CheckedBiConsumer