elastic · fcofdez · Dec 11, 2025 · Nov 24, 2025 · Nov 24, 2025 · Nov 24, 2025
diff --git a/docs/changelog/138515.yaml b/docs/changelog/138515.yaml
@@ -0,0 +1,5 @@
+pr: 138515
+summary: Integrate stored fields format bloom filter with synthetic `_id`
+area: Codec
+type: enhancement
+issues: []
diff --git a/...treams/src/internalClusterTest/java/org/elasticsearch/datastreams/TSDBSyntheticIdsIT.java b/...treams/src/internalClusterTest/java/org/elasticsearch/datastreams/TSDBSyntheticIdsIT.java
@@ -25,6 +25,8 @@
 import org.elasticsearch.common.util.set.Sets;
 import org.elasticsearch.index.IndexMode;
 import org.elasticsearch.index.IndexSettings;
+import org.elasticsearch.index.codec.CodecService;
+import org.elasticsearch.index.engine.EngineConfig;
 import org.elasticsearch.index.mapper.IdFieldMapper;
 import org.elasticsearch.index.query.TermQueryBuilder;
 import org.elasticsearch.plugins.Plugin;
@@ -54,6 +56,7 @@
 import static org.hamcrest.Matchers.containsString;
 import static org.hamcrest.Matchers.equalTo;
 import static org.hamcrest.Matchers.notNullValue;
+import static org.hamcrest.Matchers.nullValue;
 
 /**
  * Test suite for time series indices that use synthetic ids for documents.
@@ -101,6 +104,40 @@ public void testInvalidIndexMode() {
         );
     }
 
+    public void testInvalidCodec() {
+        assumeTrue("Test should only run with feature flag", IndexSettings.TSDB_SYNTHETIC_ID_FEATURE_FLAG);
+        final var indexName = randomIdentifier();
+        internalCluster().startDataOnlyNode();
+        var randomNonDefaultCodec = randomFrom(
+            CodecService.BEST_COMPRESSION_CODEC,
+            CodecService.LEGACY_DEFAULT_CODEC,
+            CodecService.BEST_COMPRESSION_CODEC,
+            CodecService.LUCENE_DEFAULT_CODEC
+        );
+
+        var exception = expectThrows(
+            IllegalArgumentException.class,
+            () -> createIndex(
+                indexName,
+                indexSettings(1, 0).put(IndexSettings.MODE.getKey(), IndexMode.TIME_SERIES)
+                    .put("index.routing_path", "hostname")
+                    .put(IndexSettings.USE_SYNTHETIC_ID.getKey(), true)
+                    .put(EngineConfig.INDEX_CODEC_SETTING.getKey(), randomNonDefaultCodec)
+                    .build()
+            )
+        );
+        assertThat(
+            exception.getMessage(),
+            containsString(
+                "The setting ["
+                    + IndexSettings.USE_SYNTHETIC_ID.getKey()
+                    + "] is only permitted when [index.codec] is set to [default]. Current mode: ["
+                    + randomNonDefaultCodec
+                    + "]."
+            )
+        );
+    }
+
     public void testSyntheticId() throws Exception {
         assumeTrue("Test should only run with feature flag", IndexSettings.TSDB_SYNTHETIC_ID_FEATURE_FLAG);
         final var dataStreamName = randomIdentifier();
@@ -260,12 +297,19 @@ enum Operation {
 
         flush(dataStreamName);
 
+        // TODO: Restart the node or relocate the shard randomly
+
         // Check that synthetic _id field have no postings on disk
         var indices = new HashSet<>(docs.values());
         for (var index : indices) {
             var diskUsage = diskUsage(index);
             var diskUsageIdField = AnalyzeIndexDiskUsageTestUtils.getPerFieldDiskUsage(diskUsage, IdFieldMapper.NAME);
-            assertThat("_id field should not have postings on disk", diskUsageIdField.getInvertedIndexBytes(), equalTo(0L));
+            // When _id's are only used to populate the bloom filter,
+            // IndexDiskUsageStats won't account for anything since
+            // the bloom filter it's not exposed through the Reader API and
+            // the analyzer expects to get documents with fields to do the
+            // disk usage accounting.
+            assertThat(diskUsageIdField, nullValue());
         }
     }
 
@@ -376,7 +420,12 @@ public void testGetFromTranslogBySyntheticId() throws Exception {
         for (var index : indices) {
             var diskUsage = diskUsage(index);
             var diskUsageIdField = AnalyzeIndexDiskUsageTestUtils.getPerFieldDiskUsage(diskUsage, IdFieldMapper.NAME);
-            assertThat("_id field should not have postings on disk", diskUsageIdField.getInvertedIndexBytes(), equalTo(0L));
+            // When _id's are only used to populate the bloom filter,
+            // IndexDiskUsageStats won't account for anything since
+            // the bloom filter it's not exposed through the Reader API and
+            // the analyzer expects to get documents with fields to do the
+            // disk usage accounting.
+            assertThat(diskUsageIdField, nullValue());
         }
 
         assertHitCount(client().prepareSearch(dataStreamName).setSize(0), 10L);

diff --git a/server/src/main/java/module-info.java b/server/src/main/java/module-info.java
@@ -245,6 +245,7 @@
     exports org.elasticsearch.index.codec;
     exports org.elasticsearch.index.codec.tsdb;
     exports org.elasticsearch.index.codec.bloomfilter;
+    exports org.elasticsearch.index.codec.storedfields;
     exports org.elasticsearch.index.codec.zstd;
     exports org.elasticsearch.index.engine;
     exports org.elasticsearch.index.fielddata;
@@ -481,7 +482,8 @@
             org.elasticsearch.index.codec.Elasticsearch816Codec,
             org.elasticsearch.index.codec.Elasticsearch900Codec,
             org.elasticsearch.index.codec.Elasticsearch900Lucene101Codec,
-            org.elasticsearch.index.codec.Elasticsearch92Lucene103Codec;
+            org.elasticsearch.index.codec.Elasticsearch92Lucene103Codec,
+            org.elasticsearch.index.codec.ES93TSDBDefaultCompressionLucene103Codec;
 
     provides org.apache.logging.log4j.core.util.ContextDataProvider with org.elasticsearch.common.logging.DynamicContextDataProvider;
 

diff --git a/server/src/main/java/org/elasticsearch/index/IndexSettings.java b/server/src/main/java/org/elasticsearch/index/IndexSettings.java
@@ -27,6 +27,7 @@
 import org.elasticsearch.common.util.FeatureFlag;
 import org.elasticsearch.core.Booleans;
 import org.elasticsearch.core.TimeValue;
+import org.elasticsearch.index.codec.CodecService;
 import org.elasticsearch.index.mapper.IgnoredSourceFieldMapper;
 import org.elasticsearch.index.mapper.Mapper;
 import org.elasticsearch.index.mapper.SeqNoFieldMapper;
@@ -49,6 +50,7 @@
 
 import static org.elasticsearch.cluster.metadata.IndexMetadata.SETTING_INDEX_VERSION_CREATED;
 import static org.elasticsearch.cluster.routing.allocation.ExistingShardsAllocator.EXISTING_SHARDS_ALLOCATOR_SETTING;
+import static org.elasticsearch.index.engine.EngineConfig.INDEX_CODEC_SETTING;
 import static org.elasticsearch.index.mapper.MapperService.INDEX_MAPPING_DEPTH_LIMIT_SETTING;
 import static org.elasticsearch.index.mapper.MapperService.INDEX_MAPPING_DIMENSION_FIELDS_LIMIT_SETTING;
 import static org.elasticsearch.index.mapper.MapperService.INDEX_MAPPING_FIELD_NAME_LENGTH_LIMIT_SETTING;
@@ -715,12 +717,26 @@ public void validate(Boolean enabled, Map<Setting<?>, Object> settings) {
                             )
                         );
                     }
+
+                    var codecName = (String) settings.get(INDEX_CODEC_SETTING);
+                    if (codecName.equals(CodecService.DEFAULT_CODEC) == false) {
+                        throw new IllegalArgumentException(
+                            String.format(
+                                Locale.ROOT,
+                                "The setting [%s] is only permitted when [%s] is set to [%s]. Current mode: [%s].",
+                                USE_SYNTHETIC_ID.getKey(),
+                                INDEX_CODEC_SETTING.getKey(),
+                                CodecService.DEFAULT_CODEC,
+                                codecName
+                            )
+                        );
+                    }
                 }
             }
 
             @Override
             public Iterator<Setting<?>> settings() {
-                List<Setting<?>> list = List.of(MODE);
+                List<Setting<?>> list = List.of(MODE, INDEX_CODEC_SETTING);
                 return list.iterator();
             }
         },

diff --git a/server/src/main/java/org/elasticsearch/index/IndexVersions.java b/server/src/main/java/org/elasticsearch/index/IndexVersions.java
@@ -203,6 +203,7 @@ private static Version parseUnchecked(String version) {
     public static final IndexVersion SKIPPER_DEFAULTS_ONLY_ON_TSDB = def(9_052_0_00, Version.LUCENE_10_3_2);
     public static final IndexVersion DISK_BBQ_LICENSE_ENFORCEMENT = def(9_053_0_00, Version.LUCENE_10_3_2);
     public static final IndexVersion STORE_IGNORED_KEYWORDS_IN_BINARY_DOC_VALUES = def(9_054_0_00, Version.LUCENE_10_3_2);
+    public static final IndexVersion TIME_SERIES_USE_STORED_FIELDS_BLOOM_FILTER_FOR_ID = def(9_055_0_00, Version.LUCENE_10_3_2);
 
     /*
      * STOP! READ THIS FIRST! No, really,

diff --git a/server/src/main/java/org/elasticsearch/index/codec/CodecService.java b/server/src/main/java/org/elasticsearch/index/codec/CodecService.java
@@ -16,8 +16,7 @@
 import org.elasticsearch.common.util.BigArrays;
 import org.elasticsearch.common.util.FeatureFlag;
 import org.elasticsearch.core.Nullable;
-import org.elasticsearch.index.IndexMode;
-import org.elasticsearch.index.codec.tsdb.TSDBSyntheticIdCodec;
+import org.elasticsearch.index.IndexVersions;
 import org.elasticsearch.index.codec.zstd.Zstd814StoredFieldsFormat;
 import org.elasticsearch.index.mapper.MapperService;
 
@@ -48,8 +47,17 @@ public class CodecService implements CodecProvider {
     public CodecService(@Nullable MapperService mapperService, BigArrays bigArrays) {
         final var codecs = new HashMap<String, Codec>();
 
-        Codec legacyBestSpeedCodec = new LegacyPerFieldMapperCodec(Lucene103Codec.Mode.BEST_SPEED, mapperService, bigArrays);
-        if (ZSTD_STORED_FIELDS_FEATURE_FLAG) {
+        boolean useSyntheticId = mapperService != null
+            && mapperService.getIndexSettings().useTimeSeriesSyntheticId()
+            && mapperService.getIndexSettings()
+                .getIndexVersionCreated()
+                .onOrAfter(IndexVersions.TIME_SERIES_USE_STORED_FIELDS_BLOOM_FILTER_FOR_ID);
+
+        var legacyBestSpeedCodec = new LegacyPerFieldMapperCodec(Lucene103Codec.Mode.BEST_SPEED, mapperService, bigArrays);
+        if (useSyntheticId) {
+            // Use the default Lucene compression when the synthetic id is used even if the ZSTD feature flag is enabled
+            codecs.put(DEFAULT_CODEC, new ES93TSDBDefaultCompressionLucene103Codec(legacyBestSpeedCodec, bigArrays));
+        } else if (ZSTD_STORED_FIELDS_FEATURE_FLAG) {
             codecs.put(DEFAULT_CODEC, new PerFieldMapperCodec(Zstd814StoredFieldsFormat.Mode.BEST_SPEED, mapperService, bigArrays));
         } else {
             codecs.put(DEFAULT_CODEC, legacyBestSpeedCodec);
@@ -67,8 +75,6 @@ public CodecService(@Nullable MapperService mapperService, BigArrays bigArrays)
         for (String codec : Codec.availableCodecs()) {
             codecs.put(codec, Codec.forName(codec));
         }
-        final boolean useTsdbSyntheticId = mapperService != null && mapperService.getIndexSettings().useTimeSeriesSyntheticId();
-        assert useTsdbSyntheticId == false || mapperService.getIndexSettings().getMode() == IndexMode.TIME_SERIES;
 
         this.codecs = codecs.entrySet().stream().collect(Collectors.toUnmodifiableMap(Map.Entry::getKey, e -> {
             Codec codec;
@@ -77,9 +83,6 @@ public CodecService(@Nullable MapperService mapperService, BigArrays bigArrays)
             } else {
                 codec = new DeduplicateFieldInfosCodec(e.getValue().getName(), e.getValue());
             }
-            if (useTsdbSyntheticId && codec instanceof TSDBSyntheticIdCodec == false) {
-                codec = new TSDBSyntheticIdCodec(codec.getName(), codec);
-            }
             return codec;
         }));
     }

diff --git a/...src/main/java/org/elasticsearch/index/codec/ES93TSDBDefaultCompressionLucene103Codec.java b/...src/main/java/org/elasticsearch/index/codec/ES93TSDBDefaultCompressionLucene103Codec.java
@@ -0,0 +1,24 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the "Elastic License
+ * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
+ * Public License v 1"; you may not use this file except in compliance with, at
+ * your election, the "Elastic License 2.0", the "GNU Affero General Public
+ * License v3.0 only", or the "Server Side Public License, v 1".
+ */
+
+package org.elasticsearch.index.codec;
+
+import org.apache.lucene.codecs.lucene103.Lucene103Codec;
+import org.elasticsearch.common.util.BigArrays;
+
+public class ES93TSDBDefaultCompressionLucene103Codec extends TSDBCodecWithSyntheticId {
+    /** Public no-arg constructor, needed for SPI loading at read-time. */
+    public ES93TSDBDefaultCompressionLucene103Codec() {
+        this(new Lucene103Codec(), null);
+    }
+
+    ES93TSDBDefaultCompressionLucene103Codec(Lucene103Codec delegate, BigArrays bigArrays) {
+        super("ES93TSDBDefaultCompressionLucene103Codec", delegate, bigArrays);
+    }
+}
diff --git a/server/src/main/java/org/elasticsearch/index/codec/TSDBCodecWithSyntheticId.java b/server/src/main/java/org/elasticsearch/index/codec/TSDBCodecWithSyntheticId.java
@@ -0,0 +1,63 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the "Elastic License
+ * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
+ * Public License v 1"; you may not use this file except in compliance with, at
+ * your election, the "Elastic License 2.0", the "GNU Affero General Public
+ * License v3.0 only", or the "Server Side Public License, v 1".
+ */
+
+package org.elasticsearch.index.codec;
+
+import org.apache.lucene.codecs.Codec;
+import org.apache.lucene.codecs.FilterCodec;
+import org.apache.lucene.codecs.StoredFieldsFormat;
+import org.elasticsearch.common.util.BigArrays;
+import org.elasticsearch.index.codec.bloomfilter.ES93BloomFilterStoredFieldsFormat;
+import org.elasticsearch.index.codec.storedfields.TSDBStoredFieldsFormat;
+import org.elasticsearch.index.codec.tsdb.TSDBSyntheticIdCodec;
+import org.elasticsearch.index.mapper.IdFieldMapper;
+
+/**
+ * Abstract base class for ES codecs used with time-series ({@code TIME_SERIES}) indices
+ * that employ synthetic document IDs for storage optimization.
+ *
+ * <p>This class configures the codec to use the following formats:
+ * <ul>
+ *   <li>
+ *       Use {@link TSDBSyntheticIdCodec} as the underlying codec for synthesizing the `_id` field from
+ *       the values of other fields of the document (ex: _tsid, @timestamp, etc.) so that no inverted index
+ *       or stored field are required for the `_id`. As such, looking up documents by `_id` might be very
+ *       slow and that's why it is used along with a Bloom filter.
+ *   </li>
+ *   <li>
+ *       Apply {@link TSDBStoredFieldsFormat} with bloom filter optimization for efficient ID lookups
+ *   </li>
+ * </ul>
+ *
+ * <p>Synthetic IDs in TSDB indices are generated from the document's dimensions and timestamp,
+ * replacing the standard {@code _id} field to reduce storage overhead.
+ *
+ * @see TSDBSyntheticIdCodec
+ * @see TSDBStoredFieldsFormat
+ */
+abstract class TSDBCodecWithSyntheticId extends FilterCodec {
+    private final TSDBStoredFieldsFormat storedFieldsFormat;
+
+    TSDBCodecWithSyntheticId(String name, Codec delegate, BigArrays bigArrays) {
+        super(name, new TSDBSyntheticIdCodec(delegate));
+        this.storedFieldsFormat = new TSDBStoredFieldsFormat(
+            delegate.storedFieldsFormat(),
+            new ES93BloomFilterStoredFieldsFormat(
+                bigArrays,
+                ES93BloomFilterStoredFieldsFormat.DEFAULT_BLOOM_FILTER_SIZE,
+                IdFieldMapper.NAME
+            )
+        );
+    }
+
+    @Override
+    public StoredFieldsFormat storedFieldsFormat() {
+        return storedFieldsFormat;
+    }
+}
diff --git a/server/src/main/java/org/elasticsearch/index/codec/bloomfilter/BloomFilter.java b/server/src/main/java/org/elasticsearch/index/codec/bloomfilter/BloomFilter.java
@@ -0,0 +1,38 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the "Elastic License
+ * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
+ * Public License v 1"; you may not use this file except in compliance with, at
+ * your election, the "Elastic License 2.0", the "GNU Affero General Public
+ * License v3.0 only", or the "Server Side Public License, v 1".
+ */
+
+package org.elasticsearch.index.codec.bloomfilter;
+
+import org.apache.lucene.util.BytesRef;
+
+import java.io.Closeable;
+import java.io.IOException;
+
+public interface BloomFilter extends Closeable {
+    BloomFilter NO_FILTER = new BloomFilter() {
+        @Override
+        public void close() throws IOException {
+
+        }
+
+        @Override
+        public boolean mayContainTerm(String field, BytesRef term) throws IOException {
+            return true;
+        }
+    };
+
+    /**
+     * Tests whether the given term may exist in the specified field.
+     *
+     * @param field the field name to check
+     * @param term the term to test for membership
+     * @return true if term may be present, false if definitely absent
+     */
+    boolean mayContainTerm(String field, BytesRef term) throws IOException;
+}