Skip to content
Closed
5 changes: 5 additions & 0 deletions docs/changelog/138299.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pr: 138299
summary: Add `PerFieldStoredFieldsFormat` to allow multiple stored field formats
area: Codec
type: enhancement
issues: []
5 changes: 5 additions & 0 deletions docs/changelog/138357.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pr: 138357
summary: Integrate bloom filter checks with TSDBSyntheticIdPostingsFormat
area: Codec
type: enhancement
issues: []
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@
import static org.hamcrest.Matchers.containsString;
import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.notNullValue;
import static org.hamcrest.Matchers.nullValue;

/**
* Test suite for time series indices that use synthetic ids for documents.
Expand Down Expand Up @@ -103,8 +104,10 @@ public void testInvalidIndexMode() {

public void testSyntheticId() throws Exception {
assumeTrue("Test should only run with feature flag", IndexSettings.TSDB_SYNTHETIC_ID_FEATURE_FLAG);
assumeTrue("Test should only run with feature flag", IndexSettings.USE_STORED_FIELDS_BLOOM_FILTER_FOR_ID_FEATURE_FLAG);
final var dataStreamName = randomIdentifier();
putDataStreamTemplate(dataStreamName, randomIntBetween(1, 5));
final var enableStoredFieldsBloomFilter = randomBoolean();
putDataStreamTemplate(dataStreamName, randomIntBetween(1, 5), enableStoredFieldsBloomFilter);

final var docs = new HashMap<String, String>();
final var unit = randomFrom(ChronoUnit.SECONDS, ChronoUnit.MINUTES);
Expand Down Expand Up @@ -265,14 +268,21 @@ enum Operation {
for (var index : indices) {
var diskUsage = diskUsage(index);
var diskUsageIdField = AnalyzeIndexDiskUsageTestUtils.getPerFieldDiskUsage(diskUsage, IdFieldMapper.NAME);
assertThat("_id field should not have postings on disk", diskUsageIdField.getInvertedIndexBytes(), equalTo(0L));
// If the _id stored fields bloom filter is enabled, IndexDiskUsageStats won't account for anything since
// the bloom filter it's not exposed through the Reader API.
if (enableStoredFieldsBloomFilter) {
assertThat(diskUsageIdField, nullValue());
} else {
assertThat("_id field should not have postings on disk", diskUsageIdField.getInvertedIndexBytes(), equalTo(0L));
}
}
}

public void testGetFromTranslogBySyntheticId() throws Exception {
assumeTrue("Test should only run with feature flag", IndexSettings.TSDB_SYNTHETIC_ID_FEATURE_FLAG);
final var dataStreamName = randomIdentifier();
putDataStreamTemplate(dataStreamName, 1);
final var enableStoredFieldsBloomFilter = randomBoolean();
putDataStreamTemplate(dataStreamName, 1, enableStoredFieldsBloomFilter);

final var docs = new HashMap<String, String>();
final var unit = randomFrom(ChronoUnit.SECONDS, ChronoUnit.MINUTES);
Expand Down Expand Up @@ -376,7 +386,13 @@ public void testGetFromTranslogBySyntheticId() throws Exception {
for (var index : indices) {
var diskUsage = diskUsage(index);
var diskUsageIdField = AnalyzeIndexDiskUsageTestUtils.getPerFieldDiskUsage(diskUsage, IdFieldMapper.NAME);
assertThat("_id field should not have postings on disk", diskUsageIdField.getInvertedIndexBytes(), equalTo(0L));
// If the _id stored fields bloom filter is enabled, IndexDiskUsageStats won't account for anything since
// the bloom filter it's not exposed through the Reader API.
if (enableStoredFieldsBloomFilter) {
assertThat(diskUsageIdField, nullValue());
} else {
assertThat("_id field should not have postings on disk", diskUsageIdField.getInvertedIndexBytes(), equalTo(0L));
}
}

assertHitCount(client().prepareSearch(dataStreamName).setSize(0), 10L);
Expand Down Expand Up @@ -413,11 +429,12 @@ private static BulkItemResponse[] createDocuments(String indexName, XContentBuil
return bulkResponse.getItems();
}

private static void putDataStreamTemplate(String indexPattern, int shards) throws IOException {
private static void putDataStreamTemplate(String indexPattern, int shards, boolean enableStoredFieldsBloomFilter) throws IOException {
final var settings = indexSettings(shards, 0).put(IndexSettings.MODE.getKey(), IndexMode.TIME_SERIES.getName())
.put(IndexSettings.BLOOM_FILTER_ID_FIELD_ENABLED_SETTING.getKey(), false)
.put(IndexSettings.INDEX_REFRESH_INTERVAL_SETTING.getKey(), -1)
.put(IndexSettings.USE_SYNTHETIC_ID.getKey(), true);
.put(IndexSettings.USE_SYNTHETIC_ID.getKey(), true)
.put(IndexSettings.USE_STORED_FIELD_BLOOM_FILTER_ID.getKey(), enableStoredFieldsBloomFilter);

final var mappings = """
{
Expand Down
5 changes: 4 additions & 1 deletion server/src/main/java/module-info.java
Original file line number Diff line number Diff line change
Expand Up @@ -245,6 +245,7 @@
exports org.elasticsearch.index.codec;
exports org.elasticsearch.index.codec.tsdb;
exports org.elasticsearch.index.codec.bloomfilter;
exports org.elasticsearch.index.codec.storedfields;
exports org.elasticsearch.index.codec.zstd;
exports org.elasticsearch.index.engine;
exports org.elasticsearch.index.fielddata;
Expand Down Expand Up @@ -475,7 +476,9 @@
org.elasticsearch.index.codec.Elasticsearch816Codec,
org.elasticsearch.index.codec.Elasticsearch900Codec,
org.elasticsearch.index.codec.Elasticsearch900Lucene101Codec,
org.elasticsearch.index.codec.Elasticsearch92Lucene103Codec;
org.elasticsearch.index.codec.Elasticsearch92Lucene103Codec,
org.elasticsearch.index.codec.Elasticsearch93DefaultCompressionLucene103,
org.elasticsearch.index.codec.Elasticsearch93ZstdCompressionLucene103Codec;

provides org.apache.logging.log4j.core.util.ContextDataProvider with org.elasticsearch.common.logging.DynamicContextDataProvider;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -252,6 +252,9 @@ public final class IndexScopedSettings extends AbstractScopedSettings {
if (IndexSettings.TSDB_SYNTHETIC_ID_FEATURE_FLAG) {
settings.add(IndexSettings.USE_SYNTHETIC_ID);
}
if (IndexSettings.USE_STORED_FIELDS_BLOOM_FILTER_FOR_ID_FEATURE_FLAG) {
settings.add(IndexSettings.USE_STORED_FIELD_BLOOM_FILTER_ID);
}
settings.add(IndexSettings.INDEX_MAPPING_EXCLUDE_SOURCE_VECTORS_SETTING);
BUILT_IN_INDEX_SETTINGS = Collections.unmodifiableSet(settings);
};
Expand Down
65 changes: 65 additions & 0 deletions server/src/main/java/org/elasticsearch/index/IndexSettings.java
Original file line number Diff line number Diff line change
Expand Up @@ -735,6 +735,57 @@ public Iterator<Setting<?>> settings() {
Property.Final
);

public static final boolean USE_STORED_FIELDS_BLOOM_FILTER_FOR_ID_FEATURE_FLAG = new FeatureFlag("stored_field_bloom_filter")
.isEnabled();
public static final Setting<Boolean> USE_STORED_FIELD_BLOOM_FILTER_ID = Setting.boolSetting(
"index.mapping.use_stored_field_bloom_filter_id",
false,
new Setting.Validator<>() {
@Override
public void validate(Boolean enabled) {
if (enabled) {
if (USE_STORED_FIELDS_BLOOM_FILTER_FOR_ID_FEATURE_FLAG == false) {
throw new IllegalArgumentException(
String.format(
Locale.ROOT,
"The setting [%s] is only permitted when the feature flag is enabled.",
USE_STORED_FIELD_BLOOM_FILTER_ID.getKey()
)
);
}
}
}

@Override
public void validate(Boolean enabled, Map<Setting<?>, Object> settings) {
if (enabled) {
// Verify if index mode is TIME_SERIES
var indexMode = (IndexMode) settings.get(MODE);
if (indexMode != IndexMode.TIME_SERIES) {
throw new IllegalArgumentException(
String.format(
Locale.ROOT,
"The setting [%s] is only permitted when [%s] is set to [%s]. Current mode: [%s].",
USE_STORED_FIELD_BLOOM_FILTER_ID.getKey(),
MODE.getKey(),
IndexMode.TIME_SERIES.name(),
indexMode.name()
)
);
}
}
}

@Override
public Iterator<Setting<?>> settings() {
List<Setting<?>> list = List.of(MODE);
return list.iterator();
}
},
Property.IndexScope,
Property.Final
);

/**
* The {@link IndexMode "mode"} of the index.
*/
Expand Down Expand Up @@ -1020,6 +1071,7 @@ private void setRetentionLeaseMillis(final TimeValue retentionLease) {
private final boolean useTimeSeriesSyntheticId;
private final boolean useTimeSeriesDocValuesFormat;
private final boolean useEs812PostingsFormat;
private final boolean useStoredFieldsBloomFilterForId;

/**
* The maximum number of refresh listeners allows on this shard.
Expand Down Expand Up @@ -1230,6 +1282,12 @@ public IndexSettings(final IndexMetadata indexMetadata, final Settings nodeSetti
} else {
useTimeSeriesSyntheticId = false;
}
useStoredFieldsBloomFilterForId = IndexSettings.USE_STORED_FIELDS_BLOOM_FILTER_FOR_ID_FEATURE_FLAG
&& scopedSettings.get(USE_STORED_FIELD_BLOOM_FILTER_ID);
if (useStoredFieldsBloomFilterForId) {
assert indexMetadata.getIndexMode() == IndexMode.TIME_SERIES : indexMetadata.getIndexMode();
assert indexMetadata.getCreationVersion().onOrAfter(IndexVersions.TIME_SERIES_USE_STORED_FIELDS_BLOOM_FILTER_FOR_ID);
}
if (recoverySourceSyntheticEnabled) {
if (DiscoveryNode.isStateless(settings)) {
throw new IllegalArgumentException("synthetic recovery source is only allowed in stateful");
Expand Down Expand Up @@ -1969,6 +2027,13 @@ public boolean useTimeSeriesSyntheticId() {
return useTimeSeriesSyntheticId;
}

/**
* @return whether _id fields are stored as bloom filters in time-series indices for fast lookups.
*/
public boolean useStoredFieldsBloomFilterForId() {
return useStoredFieldsBloomFilterForId;
}

/**
* @return Whether the time series doc value format should be used.
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -194,6 +194,7 @@ private static Version parseUnchecked(String version) {
public static final IndexVersion TIME_SERIES_USE_SYNTHETIC_ID = def(9_044_0_00, Version.LUCENE_10_3_1);
public static final IndexVersion TIME_SERIES_DIMENSIONS_USE_SKIPPERS = def(9_045_0_00, Version.LUCENE_10_3_1);
public static final IndexVersion TIME_SERIES_ALL_FIELDS_USE_SKIPPERS = def(9_046_0_00, Version.LUCENE_10_3_1);
public static final IndexVersion TIME_SERIES_USE_STORED_FIELDS_BLOOM_FILTER_FOR_ID = def(9_047_0_00, Version.LUCENE_10_3_1);

/*
* STOP! READ THIS FIRST! No, really,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
import org.elasticsearch.common.util.FeatureFlag;
import org.elasticsearch.core.Nullable;
import org.elasticsearch.index.IndexMode;
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.codec.tsdb.TSDBSyntheticIdCodec;
import org.elasticsearch.index.codec.zstd.Zstd814StoredFieldsFormat;
import org.elasticsearch.index.mapper.MapperService;
Expand Down Expand Up @@ -49,7 +50,19 @@ public CodecService(@Nullable MapperService mapperService, BigArrays bigArrays)
final var codecs = new HashMap<String, Codec>();

Codec legacyBestSpeedCodec = new LegacyPerFieldMapperCodec(Lucene103Codec.Mode.BEST_SPEED, mapperService, bigArrays);
if (ZSTD_STORED_FIELDS_FEATURE_FLAG) {
if (IndexSettings.USE_STORED_FIELDS_BLOOM_FILTER_FOR_ID_FEATURE_FLAG) {
if (ZSTD_STORED_FIELDS_FEATURE_FLAG) {
codecs.put(
DEFAULT_CODEC,
new PerFieldMapperCodecZstdCompression(Zstd814StoredFieldsFormat.Mode.BEST_SPEED, mapperService, bigArrays)
);
} else {
codecs.put(
DEFAULT_CODEC,
new PerFieldMapperCodecDefaultCompression(Lucene103Codec.Mode.BEST_SPEED, mapperService, bigArrays)
);
}
} else if (ZSTD_STORED_FIELDS_FEATURE_FLAG) {
codecs.put(DEFAULT_CODEC, new PerFieldMapperCodec(Zstd814StoredFieldsFormat.Mode.BEST_SPEED, mapperService, bigArrays));
} else {
codecs.put(DEFAULT_CODEC, legacyBestSpeedCodec);
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,135 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the "Elastic License
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
* Public License v 1"; you may not use this file except in compliance with, at
* your election, the "Elastic License 2.0", the "GNU Affero General Public
* License v3.0 only", or the "Server Side Public License, v 1".
*/

package org.elasticsearch.index.codec;

import org.apache.lucene.codecs.DocValuesFormat;
import org.apache.lucene.codecs.FilterCodec;
import org.apache.lucene.codecs.KnnVectorsFormat;
import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.codecs.StoredFieldsFormat;
import org.apache.lucene.codecs.lucene103.Lucene103Codec;
import org.apache.lucene.codecs.lucene103.Lucene103PostingsFormat;
import org.apache.lucene.codecs.lucene90.Lucene90DocValuesFormat;
import org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsFormat;
import org.apache.lucene.codecs.perfield.PerFieldDocValuesFormat;
import org.apache.lucene.codecs.perfield.PerFieldKnnVectorsFormat;
import org.apache.lucene.codecs.perfield.PerFieldPostingsFormat;
import org.elasticsearch.index.codec.storedfields.ESLucene90StoredFieldsFormat;
import org.elasticsearch.index.codec.storedfields.ESStoredFieldsFormat;
import org.elasticsearch.index.codec.storedfields.PerFieldStoredFieldsFormat;

public class Elasticsearch93DefaultCompressionLucene103 extends FilterCodec {
private final PostingsFormat defaultPostingsFormat;
private final PostingsFormat postingsFormat = new PerFieldPostingsFormat() {
@Override
public PostingsFormat getPostingsFormatForField(String field) {
return Elasticsearch93DefaultCompressionLucene103.this.getPostingsFormatForField(field);
}
};

private final DocValuesFormat defaultDVFormat;
private final DocValuesFormat docValuesFormat = new PerFieldDocValuesFormat() {
@Override
public DocValuesFormat getDocValuesFormatForField(String field) {
return Elasticsearch93DefaultCompressionLucene103.this.getDocValuesFormatForField(field);
}
};

private final KnnVectorsFormat defaultKnnVectorsFormat;
private final KnnVectorsFormat knnVectorsFormat = new PerFieldKnnVectorsFormat() {
@Override
public KnnVectorsFormat getKnnVectorsFormatForField(String field) {
return Elasticsearch93DefaultCompressionLucene103.this.getKnnVectorsFormatForField(field);
}
};

protected final ESStoredFieldsFormat defaultStoredFieldsFormat;
private final StoredFieldsFormat storedFieldsFormat = new PerFieldStoredFieldsFormat() {
@Override
protected ESStoredFieldsFormat getStoredFieldsFormatForField(String field) {
return Elasticsearch93DefaultCompressionLucene103.this.getStoredFieldsFormatForField(field);
}
};

/** Public no-arg constructor, needed for SPI loading at read-time. */
public Elasticsearch93DefaultCompressionLucene103() {
this(Lucene103Codec.Mode.BEST_SPEED);
}

public Elasticsearch93DefaultCompressionLucene103(Lucene103Codec.Mode mode) {
super("Elasticsearch93Lucene103", new Lucene103Codec());
this.defaultStoredFieldsFormat = new ESLucene90StoredFieldsFormat(mode);
this.defaultPostingsFormat = new Lucene103PostingsFormat();
this.defaultDVFormat = new Lucene90DocValuesFormat();
this.defaultKnnVectorsFormat = new Lucene99HnswVectorsFormat();
}

@Override
public StoredFieldsFormat storedFieldsFormat() {
return storedFieldsFormat;
}

@Override
public final PostingsFormat postingsFormat() {
return postingsFormat;
}

@Override
public final DocValuesFormat docValuesFormat() {
return docValuesFormat;
}

@Override
public final KnnVectorsFormat knnVectorsFormat() {
return knnVectorsFormat;
}

/**
* Returns the postings format that should be used for writing new segments of <code>field</code>.
*
* <p>The default implementation always returns "Lucene912".
*
* <p><b>WARNING:</b> if you subclass, you are responsible for index backwards compatibility:
* future version of Lucene are only guaranteed to be able to read the default implementation,
*/
public PostingsFormat getPostingsFormatForField(String field) {
return defaultPostingsFormat;
}

/**
* Returns the docvalues format that should be used for writing new segments of <code>field</code>
* .
*
* <p>The default implementation always returns "Lucene912".
*
* <p><b>WARNING:</b> if you subclass, you are responsible for index backwards compatibility:
* future version of Lucene are only guaranteed to be able to read the default implementation.
*/
public DocValuesFormat getDocValuesFormatForField(String field) {
return defaultDVFormat;
}

/**
* Returns the vectors format that should be used for writing new segments of <code>field</code>
*
* <p>The default implementation always returns "Lucene912".
*
* <p><b>WARNING:</b> if you subclass, you are responsible for index backwards compatibility:
* future version of Lucene are only guaranteed to be able to read the default implementation.
*/
public KnnVectorsFormat getKnnVectorsFormatForField(String field) {
return defaultKnnVectorsFormat;
}

// We need to return ES... for the SPI loading
public ESStoredFieldsFormat getStoredFieldsFormatForField(String field) {
return defaultStoredFieldsFormat;
}
}
Loading