Skip to content
Original file line number Diff line number Diff line change
Expand Up @@ -257,6 +257,9 @@ public final class IndexScopedSettings extends AbstractScopedSettings {
if (IndexSettings.DISABLE_SEQUENCE_NUMBERS_FEATURE_FLAG) {
settings.add(IndexSettings.DISABLE_SEQUENCE_NUMBERS);
}
if (IndexSettings.ALLOW_LARGE_BINARY_BLOCK_SIZE.isEnabled()) {
settings.add(IndexSettings.USE_TIME_SERIES_DOC_VALUES_FORMAT_LARGE_BINARY_BLOCK_SIZE);
}
settings.add(IndexSettings.INDEX_MAPPING_EXCLUDE_SOURCE_VECTORS_SETTING);
BUILT_IN_INDEX_SETTINGS = Collections.unmodifiableSet(settings);
};
Expand Down
29 changes: 25 additions & 4 deletions server/src/main/java/org/elasticsearch/index/IndexSettings.java
Original file line number Diff line number Diff line change
Expand Up @@ -888,6 +888,14 @@ public Iterator<Setting<?>> settings() {
Property.Final
);

public static final FeatureFlag ALLOW_LARGE_BINARY_BLOCK_SIZE = new FeatureFlag("allow_large_binary_block_size");
public static final Setting<Boolean> USE_TIME_SERIES_DOC_VALUES_FORMAT_LARGE_BINARY_BLOCK_SIZE = Setting.boolSetting(
"index.use_time_series_doc_values_format_large_binary_block_size",
false,
Property.IndexScope,
Property.Final
);

/**
* Legacy index setting, kept for 7.x BWC compatibility. This setting has no effect in 8.x. Do not use.
* TODO: Remove in 9.0
Expand Down Expand Up @@ -1148,7 +1156,8 @@ private void setRetentionLeaseMillis(final TimeValue retentionLease) {
private final boolean useDocValuesSkipperForHostname;
private final boolean useTimeSeriesSyntheticId;
private final boolean useTimeSeriesDocValuesFormat;
private final boolean useTimeSeriesDocValuesFormatLargeBlockSize;
private final boolean useTimeSeriesDocValuesFormatLargeNumericBlockSize;
private final boolean useTimeSeriesDocValuesFormatLargeBinaryBlockSize;
private final boolean useEs812PostingsFormat;
private final boolean disableSequenceNumbers;

Expand Down Expand Up @@ -1355,7 +1364,9 @@ public IndexSettings(final IndexMetadata indexMetadata, final Settings nodeSetti
: version.onOrAfter(IndexVersions.SKIPPERS_ENABLED_BY_DEFAULT) && version.before(IndexVersions.SKIPPER_DEFAULTS_ONLY_ON_TSDB);
seqNoIndexOptions = scopedSettings.get(SEQ_NO_INDEX_OPTIONS_SETTING);
useTimeSeriesDocValuesFormat = scopedSettings.get(USE_TIME_SERIES_DOC_VALUES_FORMAT_SETTING);
useTimeSeriesDocValuesFormatLargeBlockSize = scopedSettings.get(USE_TIME_SERIES_DOC_VALUES_FORMAT_LARGE_BLOCK_SIZE);
useTimeSeriesDocValuesFormatLargeNumericBlockSize = scopedSettings.get(USE_TIME_SERIES_DOC_VALUES_FORMAT_LARGE_BLOCK_SIZE);
useTimeSeriesDocValuesFormatLargeBinaryBlockSize = ALLOW_LARGE_BINARY_BLOCK_SIZE.isEnabled()
&& scopedSettings.get(USE_TIME_SERIES_DOC_VALUES_FORMAT_LARGE_BINARY_BLOCK_SIZE);
useEs812PostingsFormat = scopedSettings.get(USE_ES_812_POSTINGS_FORMAT);
intraMergeParallelismEnabled = scopedSettings.get(INTRA_MERGE_PARALLELISM_ENABLED_SETTING);
useTimeSeriesSyntheticId = IndexSettings.TSDB_SYNTHETIC_ID_FEATURE_FLAG && scopedSettings.get(SYNTHETIC_ID);
Expand Down Expand Up @@ -2136,8 +2147,18 @@ public boolean useTimeSeriesDocValuesFormat() {
/**
* @return Whether the time series doc value format with large numeric block size should be used.
*/
public boolean isUseTimeSeriesDocValuesFormatLargeBlockSize() {
return useTimeSeriesDocValuesFormatLargeBlockSize;
public boolean isUseTimeSeriesDocValuesFormatLargeNumericBlockSize() {
return useTimeSeriesDocValuesFormatLargeNumericBlockSize;
}

/**
* Checks if the time series DocValues format is configured to use a large binary block size.
*
* @return {@code true} if the time series DocValues format is using a large binary block size;
* {@code false} otherwise.
*/
public boolean isUseTimeSeriesDocValuesFormatLargeBinaryBlockSize() {
return useTimeSeriesDocValuesFormatLargeBinaryBlockSize;
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -196,8 +196,9 @@ public DocValuesFormat getDocValuesFormatForField(String field) {

if (useTSDBDocValuesFormat(field)) {
var indexCreatedVersion = mapperService.getIndexSettings().getIndexVersionCreated();
boolean useLargeBlockSize = mapperService.getIndexSettings().isUseTimeSeriesDocValuesFormatLargeBlockSize();
return TSDBDocValuesFormatFactory.createDocValuesFormat(indexCreatedVersion, useLargeBlockSize);
boolean useLargeNumericBlockSize = mapperService.getIndexSettings().isUseTimeSeriesDocValuesFormatLargeNumericBlockSize();
boolean useLargeBinaryBlockSize = mapperService.getIndexSettings().isUseTimeSeriesDocValuesFormatLargeBinaryBlockSize();
return TSDBDocValuesFormatFactory.createDocValuesFormat(indexCreatedVersion, useLargeNumericBlockSize, useLargeBinaryBlockSize);
}

return docValuesFormat;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,8 +55,6 @@
import java.util.List;

import static org.elasticsearch.index.codec.tsdb.es819.DocValuesConsumerUtil.compatibleWithOptimizedMerge;
import static org.elasticsearch.index.codec.tsdb.es819.ES819TSDBDocValuesFormat.BLOCK_BYTES_THRESHOLD;
import static org.elasticsearch.index.codec.tsdb.es819.ES819TSDBDocValuesFormat.BLOCK_COUNT_THRESHOLD;
import static org.elasticsearch.index.codec.tsdb.es819.ES819TSDBDocValuesFormat.DIRECT_MONOTONIC_BLOCK_SHIFT;
import static org.elasticsearch.index.codec.tsdb.es819.ES819TSDBDocValuesFormat.SKIP_INDEX_LEVEL_SHIFT;
import static org.elasticsearch.index.codec.tsdb.es819.ES819TSDBDocValuesFormat.SKIP_INDEX_MAX_LEVEL;
Expand All @@ -79,6 +77,8 @@ final class ES819TSDBDocValuesConsumer extends XDocValuesConsumer {
final BinaryDVCompressionMode binaryDVCompressionMode;
private final boolean enablePerBlockCompression; // only false for testing
private final DocOffsetsCodec.Encoder docOffsetsEncoder;
private final int blockBytesThreshold;
private final int blockCountThreshold;

ES819TSDBDocValuesConsumer(
BinaryDVCompressionMode binaryDVCompressionMode,
Expand All @@ -89,6 +89,8 @@ final class ES819TSDBDocValuesConsumer extends XDocValuesConsumer {
int minDocsPerOrdinalForOrdinalRangeEncoding,
boolean enableOptimizedMerge,
int numericBlockShift,
int blockBytesThreshold,
int blockCountThreshold,
String dataCodec,
String dataExtension,
String metaCodec,
Expand All @@ -97,6 +99,8 @@ final class ES819TSDBDocValuesConsumer extends XDocValuesConsumer {
this.binaryDVCompressionMode = binaryDVCompressionMode;
this.enablePerBlockCompression = enablePerBlockCompression;
this.docOffsetsEncoder = docOffsetsEncoder;
this.blockBytesThreshold = blockBytesThreshold;
this.blockCountThreshold = blockCountThreshold;
this.state = state;
this.termsDictBuffer = new byte[1 << 14];
this.dir = state.directory;
Expand Down Expand Up @@ -534,7 +538,7 @@ public void close() throws IOException {
private final class CompressedBinaryBlockWriter implements BinaryWriter {
final Compressor compressor;

final int[] docOffsets = new int[BLOCK_COUNT_THRESHOLD + 1]; // start for each doc plus start of doc that would be after last
final int[] docOffsets = new int[blockCountThreshold + 1];

int uncompressedBlockLength = 0;
int maxUncompressedBlockLength = 0;
Expand All @@ -561,7 +565,7 @@ public void addDoc(BytesRef v) throws IOException {
numDocsInCurrentBlock++;
docOffsets[numDocsInCurrentBlock] = uncompressedBlockLength;

if (uncompressedBlockLength >= BLOCK_BYTES_THRESHOLD || numDocsInCurrentBlock >= BLOCK_COUNT_THRESHOLD) {
if (uncompressedBlockLength >= blockBytesThreshold || numDocsInCurrentBlock >= blockCountThreshold) {
flushData();
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -69,8 +69,8 @@ public class ES819TSDBDocValuesFormat extends org.apache.lucene.codecs.DocValues
* is 128k, or if the number of values is 1024. These values are a tradeoff between the high compression ratio and decompression
* speed of large blocks, and the ability to avoid decompressing unneeded values provided by small blocks.
*/
public static final int BLOCK_BYTES_THRESHOLD = 128 * 1024;
public static final int BLOCK_COUNT_THRESHOLD = 1024;
public static final int BINARY_DV_BLOCK_BYTES_THRESHOLD_DEFAULT = 128 * 1024;
public static final int BINARY_DV_BLOCK_COUNT_THRESHOLD_DEFAULT = 1024;

// number of documents in an interval
static final int DEFAULT_SKIP_INDEX_INTERVAL_SIZE = 4096;
Expand Down Expand Up @@ -140,6 +140,8 @@ private static boolean getOptimizedMergeEnabledDefault() {
final BinaryDVCompressionMode binaryDVCompressionMode;
final boolean enablePerBlockCompression;
final DocOffsetsCodec docOffsetsCodec;
final int blockBytesThreshold;
final int blockCountThreshold;

public static ES819TSDBDocValuesFormat getInstance(boolean useLargeNumericBlock) {
return useLargeNumericBlock ? new ES819TSDBDocValuesFormat(NUMERIC_LARGE_BLOCK_SHIFT) : new ES819TSDBDocValuesFormat();
Expand Down Expand Up @@ -229,6 +231,32 @@ public ES819TSDBDocValuesFormat(
final boolean enablePerBlockCompression,
final int numericBlockShift,
DocOffsetsCodec docOffsetsCodec
) {
this(
codecName,
skipIndexIntervalSize,
minDocsPerOrdinalForRangeEncoding,
enableOptimizedMerge,
binaryDVCompressionMode,
enablePerBlockCompression,
numericBlockShift,
docOffsetsCodec,
BINARY_DV_BLOCK_BYTES_THRESHOLD_DEFAULT,
BINARY_DV_BLOCK_COUNT_THRESHOLD_DEFAULT
);
}

public ES819TSDBDocValuesFormat(
String codecName,
int skipIndexIntervalSize,
int minDocsPerOrdinalForRangeEncoding,
boolean enableOptimizedMerge,
BinaryDVCompressionMode binaryDVCompressionMode,
final boolean enablePerBlockCompression,
final int numericBlockShift,
DocOffsetsCodec docOffsetsCodec,
int blockBytesThreshold,
int blockCountThreshold
) {
super(codecName);
assert numericBlockShift == NUMERIC_BLOCK_SHIFT || numericBlockShift == NUMERIC_LARGE_BLOCK_SHIFT : numericBlockShift;
Expand All @@ -242,6 +270,8 @@ public ES819TSDBDocValuesFormat(
this.enablePerBlockCompression = enablePerBlockCompression;
this.numericBlockShift = numericBlockShift;
this.docOffsetsCodec = docOffsetsCodec;
this.blockBytesThreshold = blockBytesThreshold;
this.blockCountThreshold = blockCountThreshold;
}

@Override
Expand All @@ -255,6 +285,8 @@ public DocValuesConsumer fieldsConsumer(SegmentWriteState state) throws IOExcept
minDocsPerOrdinalForRangeEncoding,
enableOptimizedMerge,
numericBlockShift,
blockBytesThreshold,
blockCountThreshold,
DATA_CODEC,
DATA_EXTENSION,
META_CODEC,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,30 +25,25 @@
public class ES819Version3TSDBDocValuesFormat extends ES819TSDBDocValuesFormat {

static final String CODEC_NAME = "ES8193TSDB";
static final int BINARY_DV_BLOCK_BYTES_THRESHOLD_DEFAULT = 1024 * 1024;
static final int BINARY_DV_BLOCK_COUNT_THRESHOLD_DEFAULT = 32768;

public ES819Version3TSDBDocValuesFormat() {
super(
CODEC_NAME,
DEFAULT_SKIP_INDEX_INTERVAL_SIZE,
ORDINAL_RANGE_ENCODING_MIN_DOC_PER_ORDINAL,
OPTIMIZED_MERGE_ENABLE_DEFAULT,
BinaryDVCompressionMode.COMPRESSED_ZSTD_LEVEL_1,
true,
NUMERIC_BLOCK_SHIFT,
DocOffsetsCodec.BITPACKING
);
this(false, false);
}

public ES819Version3TSDBDocValuesFormat(boolean useLargeNumericBlock) {
public ES819Version3TSDBDocValuesFormat(boolean largeNumericBlock, boolean largeBinaryBlock) {
super(
CODEC_NAME,
DEFAULT_SKIP_INDEX_INTERVAL_SIZE,
ORDINAL_RANGE_ENCODING_MIN_DOC_PER_ORDINAL,
OPTIMIZED_MERGE_ENABLE_DEFAULT,
BinaryDVCompressionMode.COMPRESSED_ZSTD_LEVEL_1,
true,
useLargeNumericBlock ? NUMERIC_LARGE_BLOCK_SHIFT : NUMERIC_BLOCK_SHIFT,
DocOffsetsCodec.BITPACKING
largeNumericBlock ? NUMERIC_LARGE_BLOCK_SHIFT : NUMERIC_BLOCK_SHIFT,
DocOffsetsCodec.BITPACKING,
largeBinaryBlock ? BINARY_DV_BLOCK_BYTES_THRESHOLD_DEFAULT : ES819TSDBDocValuesFormat.BINARY_DV_BLOCK_BYTES_THRESHOLD_DEFAULT,
largeBinaryBlock ? BINARY_DV_BLOCK_COUNT_THRESHOLD_DEFAULT : ES819TSDBDocValuesFormat.BINARY_DV_BLOCK_COUNT_THRESHOLD_DEFAULT
);
}

Expand All @@ -68,7 +63,9 @@ public ES819Version3TSDBDocValuesFormat(
binaryDVCompressionMode,
enablePerBlockCompression,
numericBlockShift,
DocOffsetsCodec.BITPACKING
DocOffsetsCodec.BITPACKING,
BINARY_DV_BLOCK_BYTES_THRESHOLD_DEFAULT,
BINARY_DV_BLOCK_COUNT_THRESHOLD_DEFAULT
);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -23,24 +23,35 @@ public final class TSDBDocValuesFormatFactory {
static final DocValuesFormat ES_819_2_TSDB_DOC_VALUES_FORMAT_LARGE_NUMERIC_BLOCK = ES819TSDBDocValuesFormat.getInstance(true);

static final DocValuesFormat ES_819_3_TSDB_DOC_VALUES_FORMAT = new ES819Version3TSDBDocValuesFormat();
static final DocValuesFormat ES_819_3_TSDB_DOC_VALUES_FORMAT_LARGE_NUMERIC_BLOCK = new ES819Version3TSDBDocValuesFormat(true);
static final DocValuesFormat ES_819_3_TSDB_DOC_VALUES_FORMAT_LARGE_BINARY_BLOCK = new ES819Version3TSDBDocValuesFormat(false, true);
static final DocValuesFormat ES_819_3_TSDB_DOC_VALUES_FORMAT_LARGE_NUMERIC_BLOCK = new ES819Version3TSDBDocValuesFormat(true, false);

private TSDBDocValuesFormatFactory() {}

/**
* Creates and returns a DocValuesFormat instance based on the specified index version
* and whether to use a large numeric block size.
*
* @param indexCreatedVersion the version of the index being created, which determines
* the applicable DocValuesFormat version.
* @param useLargeBlockSize a boolean flag indicating whether to use a large numeric block size.
* @param indexCreatedVersion the version of the index being created, which determines
* the applicable DocValuesFormat version.
* @param useLargeNumericBlockSize a boolean flag indicating whether to use a large numeric block size.
* @param useLargeBinaryBlockSize a boolean flag indicating whether to use a large binary block size.
* @return the appropriate DocValuesFormat instance based on the index version and block size selection.
*/
public static DocValuesFormat createDocValuesFormat(IndexVersion indexCreatedVersion, boolean useLargeBlockSize) {
public static DocValuesFormat createDocValuesFormat(
IndexVersion indexCreatedVersion,
boolean useLargeNumericBlockSize,
boolean useLargeBinaryBlockSize
) {
if (indexCreatedVersion.onOrAfter(IndexVersions.TIME_SERIES_DOC_VALUES_FORMAT_VERSION_3)) {
return useLargeBlockSize ? ES_819_3_TSDB_DOC_VALUES_FORMAT_LARGE_NUMERIC_BLOCK : ES_819_3_TSDB_DOC_VALUES_FORMAT;
if (useLargeBinaryBlockSize) {
// At this stage, we don't need large numeric blocks if large binary block is requested:
assert useLargeNumericBlockSize == false;
return ES_819_3_TSDB_DOC_VALUES_FORMAT_LARGE_BINARY_BLOCK;
}
return useLargeNumericBlockSize ? ES_819_3_TSDB_DOC_VALUES_FORMAT_LARGE_NUMERIC_BLOCK : ES_819_3_TSDB_DOC_VALUES_FORMAT;
} else {
return useLargeBlockSize ? ES_819_2_TSDB_DOC_VALUES_FORMAT_LARGE_NUMERIC_BLOCK : ES_819_2_TSDB_DOC_VALUES_FORMAT;
return useLargeNumericBlockSize ? ES_819_2_TSDB_DOC_VALUES_FORMAT_LARGE_NUMERIC_BLOCK : ES_819_2_TSDB_DOC_VALUES_FORMAT;
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -71,8 +71,8 @@
import java.util.function.Supplier;
import java.util.stream.IntStream;

import static org.elasticsearch.index.codec.tsdb.es819.ES819TSDBDocValuesFormat.BLOCK_BYTES_THRESHOLD;
import static org.elasticsearch.index.codec.tsdb.es819.ES819TSDBDocValuesFormat.BLOCK_COUNT_THRESHOLD;
import static org.elasticsearch.index.codec.tsdb.es819.ES819TSDBDocValuesFormat.BINARY_DV_BLOCK_BYTES_THRESHOLD_DEFAULT;
import static org.elasticsearch.index.codec.tsdb.es819.ES819TSDBDocValuesFormat.BINARY_DV_BLOCK_COUNT_THRESHOLD_DEFAULT;
import static org.elasticsearch.index.codec.tsdb.es819.ES819TSDBDocValuesFormat.NUMERIC_BLOCK_SHIFT;
import static org.elasticsearch.index.codec.tsdb.es819.ES819TSDBDocValuesFormat.NUMERIC_LARGE_BLOCK_SHIFT;
import static org.elasticsearch.test.ESTestCase.between;
Expand Down Expand Up @@ -143,7 +143,7 @@ public void testBlockWiseBinary() throws Exception {
boolean sparse = randomBoolean();
int numBlocksBound = 10;
// Since average size is 25b will hit count threshold rather than size threshold, so use count threshold compute needed docs.
int numNonNullValues = randomIntBetween(0, numBlocksBound * BLOCK_COUNT_THRESHOLD);
int numNonNullValues = randomIntBetween(0, numBlocksBound * BINARY_DV_BLOCK_COUNT_THRESHOLD_DEFAULT);

List<String> binaryValues = new ArrayList<>();
int numNonNull = 0;
Expand All @@ -164,7 +164,7 @@ public void testBlockWiseBinary() throws Exception {
public void testBlockWiseBinarySmallValues() throws Exception {
boolean sparse = randomBoolean();
int numBlocksBound = 5;
int numNonNullValues = randomIntBetween(0, numBlocksBound * BLOCK_COUNT_THRESHOLD);
int numNonNullValues = randomIntBetween(0, numBlocksBound * BINARY_DV_BLOCK_COUNT_THRESHOLD_DEFAULT);

List<String> binaryValues = new ArrayList<>();
int numNonNull = 0;
Expand All @@ -186,7 +186,7 @@ public void testBlockWiseBinaryWithEmptySequences() throws Exception {
List<String> binaryValues = new ArrayList<>();
int numSequences = 10;
for (int i = 0; i < numSequences; i++) {
int numInSequence = randomIntBetween(0, 3 * BLOCK_COUNT_THRESHOLD);
int numInSequence = randomIntBetween(0, 3 * BINARY_DV_BLOCK_COUNT_THRESHOLD_DEFAULT);
boolean emptySequence = randomBoolean();
for (int j = 0; j < numInSequence; j++) {
binaryValues.add(emptySequence ? "" : randomAlphaOfLengthBetween(0, 5));
Expand All @@ -198,14 +198,17 @@ public void testBlockWiseBinaryWithEmptySequences() throws Exception {
public void testBlockWiseBinaryLargeValues() throws Exception {
boolean sparse = randomBoolean();
int numBlocksBound = 5;
int binaryDataSize = randomIntBetween(0, numBlocksBound * BLOCK_BYTES_THRESHOLD);
int binaryDataSize = randomIntBetween(0, numBlocksBound * BINARY_DV_BLOCK_BYTES_THRESHOLD_DEFAULT);
List<String> binaryValues = new ArrayList<>();
int totalSize = 0;
while (totalSize < binaryDataSize) {
if (sparse && randomBoolean()) {
binaryValues.add(null);
} else {
final String value = randomAlphaOfLengthBetween(BLOCK_BYTES_THRESHOLD / 2, 2 * BLOCK_BYTES_THRESHOLD);
final String value = randomAlphaOfLengthBetween(
BINARY_DV_BLOCK_BYTES_THRESHOLD_DEFAULT / 2,
2 * BINARY_DV_BLOCK_BYTES_THRESHOLD_DEFAULT
);
binaryValues.add(value);
totalSize += value.length();
}
Expand Down
Loading