diff --git a/server/src/main/java/org/elasticsearch/index/IndexVersions.java b/server/src/main/java/org/elasticsearch/index/IndexVersions.java index 2dc8f3133265a..c6f94206e00c9 100644 --- a/server/src/main/java/org/elasticsearch/index/IndexVersions.java +++ b/server/src/main/java/org/elasticsearch/index/IndexVersions.java @@ -218,6 +218,7 @@ private static Version parseUnchecked(String version) { public static final IndexVersion FALLBACK_TEXT_FIELDS_BINARY_DOC_VALUES_FORMAT_CHECK = def(9_065_0_00, Version.LUCENE_10_3_2); public static final IndexVersion READ_SI_FILES_FROM_MEMORY_FOR_HOLLOW_COMMITS = def(9_066_0_00, Version.LUCENE_10_3_2); public static final IndexVersion FLATTENED_FIELD_TSDB_CODEC_USE_BINARY_DOC_VALUES = def(9_067_0_00, Version.LUCENE_10_3_2); + public static final IndexVersion STORE_PATTERN_TEXT_FIELDS_IN_BINARY_DOC_VALUES = def(9_068_0_00, Version.LUCENE_10_3_2); /* * STOP! READ THIS FIRST! No, really, diff --git a/x-pack/plugin/logsdb/qa/rolling-upgrade/src/javaRestTest/java/org/elasticsearch/xpack/logsdb/AbstractStringTypeLogsdbRollingUpgradeTestCase.java b/x-pack/plugin/logsdb/qa/rolling-upgrade/src/javaRestTest/java/org/elasticsearch/xpack/logsdb/AbstractStringTypeLogsdbRollingUpgradeTestCase.java index 1dab9a3c39be7..4b3caf7719c49 100644 --- a/x-pack/plugin/logsdb/qa/rolling-upgrade/src/javaRestTest/java/org/elasticsearch/xpack/logsdb/AbstractStringTypeLogsdbRollingUpgradeTestCase.java +++ b/x-pack/plugin/logsdb/qa/rolling-upgrade/src/javaRestTest/java/org/elasticsearch/xpack/logsdb/AbstractStringTypeLogsdbRollingUpgradeTestCase.java @@ -47,6 +47,7 @@ public AbstractStringTypeLogsdbRollingUpgradeTestCase(String dataStreamName, Str @Before public void createIndex() throws Exception { + checkRequiredFeatures(); LogsdbIndexingRollingUpgradeIT.maybeEnableLogsdbByDefault(); // data stream name should already be reflective of whats being tested, so template id can be random @@ -54,6 +55,14 @@ public void createIndex() throws Exception { LogsdbIndexingRollingUpgradeIT.createTemplate(dataStreamName, templateId, template); } + /** + * Override this method to add feature checks that must pass before the test runs. + * Use {@code assumeTrue} to skip the test if required features are not available. + */ + protected void checkRequiredFeatures() throws Exception { + // Default: no additional feature requirements + } + protected List getMessages() { return messages; } diff --git a/x-pack/plugin/logsdb/qa/rolling-upgrade/src/javaRestTest/java/org/elasticsearch/xpack/logsdb/PatternTextRollingUpgradeIT.java b/x-pack/plugin/logsdb/qa/rolling-upgrade/src/javaRestTest/java/org/elasticsearch/xpack/logsdb/PatternTextRollingUpgradeIT.java new file mode 100644 index 0000000000000..9e22eb9f93599 --- /dev/null +++ b/x-pack/plugin/logsdb/qa/rolling-upgrade/src/javaRestTest/java/org/elasticsearch/xpack/logsdb/PatternTextRollingUpgradeIT.java @@ -0,0 +1,109 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.logsdb; + +import com.carrotsearch.randomizedtesting.annotations.ParametersFactory; + +import java.util.Arrays; + +public class PatternTextRollingUpgradeIT extends AbstractStringTypeLogsdbRollingUpgradeTestCase { + + private static final String MIN_VERSION = "gte_v9.2.0"; + private static final String DATA_STREAM_NAME_PREFIX = "logs-pattern-text-bwc-test"; + + private static final String TEMPLATE = """ + { + "mappings": { + "properties": { + "@timestamp" : { + "type": "date" + }, + "length": { + "type": "long" + }, + "factor": { + "type": "double" + }, + "message": { + "type": "pattern_text" + } + } + } + }"""; + + private static final String TEMPLATE_WITH_MULTI_FIELD = """ + { + "mappings": { + "properties": { + "@timestamp" : { + "type": "date" + }, + "length": { + "type": "long" + }, + "factor": { + "type": "double" + }, + "message": { + "type": "pattern_text", + "fields": { + "kwd": { + "type": "keyword" + } + } + } + } + } + }"""; + + private static final String TEMPLATE_WITH_MULTI_FIELD_AND_IGNORE_ABOVE = """ + { + "mappings": { + "properties": { + "@timestamp" : { + "type": "date" + }, + "length": { + "type": "long" + }, + "factor": { + "type": "double" + }, + "message": { + "type": "pattern_text", + "fields": { + "kwd": { + "type": "keyword", + "ignore_above": 50 + } + } + } + } + } + }"""; + + public PatternTextRollingUpgradeIT(String template, String testScenario) { + super(DATA_STREAM_NAME_PREFIX + "." + testScenario, template); + } + + @ParametersFactory + public static Iterable data() { + return Arrays.asList( + new Object[][] { + { TEMPLATE, "basic" }, + { TEMPLATE_WITH_MULTI_FIELD, "with-keyword-multi-field" }, + { TEMPLATE_WITH_MULTI_FIELD_AND_IGNORE_ABOVE, "with-keyword-multi-field-and-ignore-above" } } + ); + } + + @Override + protected void checkRequiredFeatures() { + assumeTrue("pattern_text only available from 9.2.0 onward", oldClusterHasFeature(MIN_VERSION)); + } + +} diff --git a/x-pack/plugin/logsdb/src/main/java/org/elasticsearch/xpack/logsdb/patterntext/PatternTextCompositeValues.java b/x-pack/plugin/logsdb/src/main/java/org/elasticsearch/xpack/logsdb/patterntext/PatternTextCompositeValues.java index 667726241448f..f0cbab9901739 100644 --- a/x-pack/plugin/logsdb/src/main/java/org/elasticsearch/xpack/logsdb/patterntext/PatternTextCompositeValues.java +++ b/x-pack/plugin/logsdb/src/main/java/org/elasticsearch/xpack/logsdb/patterntext/PatternTextCompositeValues.java @@ -20,28 +20,31 @@ import java.util.Set; /** - * If there are values which exceed 32kb, they cannot be stored as doc values - * and must be in a stored field. This class combines the doc values with the - * larges values which are in stored fields. Despite being backed by stored - * fields, this class implements a doc value interface. + * Values which exceeds 32kb cannot be stored as sorted set doc values. Such values must be stored separately in binary doc values, which + * do not have length limitations. This class combines the regular doc values with the raw values from binary doc values. */ public final class PatternTextCompositeValues extends BinaryDocValues { + private final LeafStoredFieldLoader storedTemplateLoader; private final String storedMessageFieldName; private final BinaryDocValues patternTextDocValues; private final SortedSetDocValues templateIdDocValues; + private final BinaryDocValues rawTextDocValues; private boolean hasDocValue = false; + private boolean hasRawTextDocValue = false; PatternTextCompositeValues( LeafStoredFieldLoader storedTemplateLoader, String storedMessageFieldName, BinaryDocValues patternTextDocValues, - SortedSetDocValues templateIdDocValues + SortedSetDocValues templateIdDocValues, + BinaryDocValues rawTextDocValues ) { this.storedTemplateLoader = storedTemplateLoader; this.storedMessageFieldName = storedMessageFieldName; this.patternTextDocValues = patternTextDocValues; this.templateIdDocValues = templateIdDocValues; + this.rawTextDocValues = rawTextDocValues; } static PatternTextCompositeValues from(LeafReader leafReader, PatternTextFieldType fieldType) throws IOException { @@ -57,9 +60,25 @@ static PatternTextCompositeValues from(LeafReader leafReader, PatternTextFieldTy fieldType.argsInfoFieldName(), fieldType.useBinaryDocValuesArgs() ); + + // load binary doc values (for newer indices that store raw values in binary doc values) + BinaryDocValues rawBinaryDocValues = leafReader.getBinaryDocValues(fieldType.storedNamed()); + if (rawBinaryDocValues == null) { + // use an empty object here to avoid making null checks later + rawBinaryDocValues = DocValues.emptyBinary(); + } + + // load stored field loader (for older indices that store raw values in stored fields) StoredFieldLoader storedFieldLoader = StoredFieldLoader.create(false, Set.of(fieldType.storedNamed())); LeafStoredFieldLoader storedTemplateLoader = storedFieldLoader.getLoader(leafReader.getContext(), null); - return new PatternTextCompositeValues(storedTemplateLoader, fieldType.storedNamed(), docValues, templateIdDocValues); + + return new PatternTextCompositeValues( + storedTemplateLoader, + fieldType.storedNamed(), + docValues, + templateIdDocValues, + rawBinaryDocValues + ); } public BytesRef binaryValue() throws IOException { @@ -67,7 +86,14 @@ public BytesRef binaryValue() throws IOException { return patternTextDocValues.binaryValue(); } - // If there is no doc value, the value was too large and was put in a stored field + // if there is no doc value, then the value was too large to be analyzed or templating was disabled + + // for newer indices, the value is stored in binary doc values + if (hasRawTextDocValue) { + return rawTextDocValues.binaryValue(); + } + + // for older indices, it's stored in stored fields var storedFields = storedTemplateLoader.storedFields(); List storedValues = storedFields.get(storedMessageFieldName); assert storedValues != null && storedValues.size() == 1 && storedValues.getFirst() instanceof BytesRef; @@ -81,7 +107,8 @@ public int docID() { public boolean advanceExact(int i) throws IOException { boolean hasValue = templateIdDocValues.advanceExact(i); hasDocValue = patternTextDocValues.advanceExact(i); - if (hasValue && hasDocValue == false) { + hasRawTextDocValue = rawTextDocValues.advanceExact(i); + if (hasValue && hasDocValue == false && hasRawTextDocValue == false) { storedTemplateLoader.advanceTo(i); } return hasValue; @@ -100,6 +127,6 @@ public int advance(int i) { @Override public long cost() { - return templateIdDocValues.cost() + patternTextDocValues.cost(); + return templateIdDocValues.cost() + patternTextDocValues.cost() + rawTextDocValues.cost(); } } diff --git a/x-pack/plugin/logsdb/src/main/java/org/elasticsearch/xpack/logsdb/patterntext/PatternTextFieldMapper.java b/x-pack/plugin/logsdb/src/main/java/org/elasticsearch/xpack/logsdb/patterntext/PatternTextFieldMapper.java index 242117064f348..390ec3648ee43 100644 --- a/x-pack/plugin/logsdb/src/main/java/org/elasticsearch/xpack/logsdb/patterntext/PatternTextFieldMapper.java +++ b/x-pack/plugin/logsdb/src/main/java/org/elasticsearch/xpack/logsdb/patterntext/PatternTextFieldMapper.java @@ -23,6 +23,7 @@ import org.elasticsearch.index.IndexVersions; import org.elasticsearch.index.analysis.AnalyzerScope; import org.elasticsearch.index.analysis.NamedAnalyzer; +import org.elasticsearch.index.mapper.BinaryDocValuesSyntheticFieldLoader; import org.elasticsearch.index.mapper.CompositeSyntheticFieldLoader; import org.elasticsearch.index.mapper.DocumentParserContext; import org.elasticsearch.index.mapper.FieldMapper; @@ -97,17 +98,31 @@ public static class Builder extends TextFamilyBuilder { private final Parameter analyzer; private final Parameter disableTemplating; private final IndexVersion indexCreatedVersion; + private final boolean useBinaryDocValuesForRawText; public Builder(String name, MappingParserContext context) { - this(name, context.indexVersionCreated(), context.getIndexSettings(), context.isWithinMultiField()); + this( + name, + context.indexVersionCreated(), + context.getIndexSettings(), + context.isWithinMultiField(), + useBinaryDocValuesForRawText(context.getIndexSettings()) + ); } - public Builder(String name, IndexVersion indexCreatedVersion, IndexSettings indexSettings, boolean isWithinMultiField) { + public Builder( + String name, + IndexVersion indexCreatedVersion, + IndexSettings indexSettings, + boolean isWithinMultiField, + boolean useBinaryDocValuesForRawText + ) { super(name, indexCreatedVersion, isWithinMultiField); this.indexSettings = indexSettings; this.analyzer = analyzerParam(name, m -> ((PatternTextFieldMapper) m).analyzer); this.disableTemplating = disableTemplatingParameter(indexSettings); this.indexCreatedVersion = indexCreatedVersion; + this.useBinaryDocValuesForRawText = useBinaryDocValuesForRawText; } private boolean useBinaryDocValuesForArgsColumn() { @@ -130,7 +145,8 @@ private PatternTextFieldType buildFieldType(FieldType fieldType, MapperBuilderCo meta.getValue(), context.isSourceSynthetic(), isWithinMultiField(), - useBinaryDocValuesForArgsColumn() + useBinaryDocValuesForArgsColumn(), + useBinaryDocValuesForRawText ); } @@ -217,6 +233,7 @@ public PatternTextFieldMapper build(MapperBuilderContext context) { private final FieldType fieldType; private final KeywordFieldMapper templateIdMapper; private final boolean useBinaryDocValueArgs; + private final boolean useBinaryDocValuesForRawText; private PatternTextFieldMapper( String simpleName, @@ -236,6 +253,7 @@ private PatternTextFieldMapper( this.indexOptions = builder.indexOptions.getValue(); this.templateIdMapper = templateIdMapper; this.useBinaryDocValueArgs = builder.useBinaryDocValuesForArgsColumn(); + this.useBinaryDocValuesForRawText = builder.useBinaryDocValuesForRawText; } @Override @@ -245,7 +263,8 @@ public Map indexAnalyzers() { @Override public FieldMapper.Builder getMergeBuilder() { - return new Builder(leafName(), indexCreatedVersion, indexSettings, fieldType().isWithinMultiField()).init(this); + return new Builder(leafName(), indexCreatedVersion, indexSettings, fieldType().isWithinMultiField(), useBinaryDocValuesForRawText) + .init(this); } @Override @@ -275,7 +294,7 @@ protected void parseCreateField(DocumentParserContext context) throws IOExceptio context.doc().add(new Field(fieldType().name(), value, fieldType)); if (fieldType().disableTemplating()) { - context.doc().add(new StoredField(fieldType().storedNamed(), new BytesRef(value))); + storePatternAsRawText(context, value); return; } @@ -285,8 +304,8 @@ protected void parseCreateField(DocumentParserContext context) throws IOExceptio // Add template_id doc_values context.doc().add(templateIdMapper.buildKeywordField(new BytesRef(parts.templateId()))); - if (parts.useStoredField()) { - context.doc().add(new StoredField(fieldType().storedNamed(), new BytesRef(value))); + if (parts.useBinaryDocValuesForRawText()) { + storePatternAsRawText(context, value); } else { // Add template doc_values context.doc().add(new SortedSetDocValuesField(fieldType().templateFieldName(), new BytesRef(parts.template()))); @@ -307,6 +326,27 @@ protected void parseCreateField(DocumentParserContext context) throws IOExceptio } } + /** + * Store the value as a raw text field, without analyzing it. This can happen when templating is disabled or when the value is too long + * to be analyzed. + * + * Values may be stored in binary doc values or in stored fields, both of which don't have the same length limitations as regular doc + * values do. + */ + private void storePatternAsRawText(DocumentParserContext context, final String value) { + if (useBinaryDocValuesForRawText) { + context.doc().add(new BinaryDocValuesField(fieldType().storedNamed(), new BytesRef(value))); + } else { + // for bwc, store in stored fields + context.doc().add(new StoredField(fieldType().storedNamed(), new BytesRef(value))); + } + } + + private static boolean useBinaryDocValuesForRawText(IndexSettings indexSettings) { + return indexSettings.getIndexVersionCreated().onOrAfter(IndexVersions.STORE_PATTERN_TEXT_FIELDS_IN_BINARY_DOC_VALUES) + && indexSettings.useTimeSeriesDocValuesFormat(); + } + @Override protected String contentType() { return PatternTextFieldType.CONTENT_TYPE; @@ -329,6 +369,17 @@ protected SyntheticSourceSupport syntheticSourceSupport() { private SourceLoader.SyntheticFieldLoader getSyntheticFieldLoader() { if (fieldType().disableTemplating()) { + if (useBinaryDocValuesForRawText) { + return new BinaryDocValuesSyntheticFieldLoader(fieldType().storedNamed()) { + @Override + protected void writeValue(XContentBuilder b, BytesRef value) throws IOException { + // pattern text fields are not multi-valued, so there is no special encoding here unlike other fields that use + // binary doc values. As a result, we don't need to much and this function remains simple + b.field(leafName(), value.utf8ToString()); + } + }; + } + return new StringStoredFieldFieldLoader(fieldType().storedNamed(), fieldType().name(), leafName()) { @Override protected void write(XContentBuilder b, Object value) throws IOException { diff --git a/x-pack/plugin/logsdb/src/main/java/org/elasticsearch/xpack/logsdb/patterntext/PatternTextFieldType.java b/x-pack/plugin/logsdb/src/main/java/org/elasticsearch/xpack/logsdb/patterntext/PatternTextFieldType.java index 3fd8a29b53e72..b91dfad422d30 100644 --- a/x-pack/plugin/logsdb/src/main/java/org/elasticsearch/xpack/logsdb/patterntext/PatternTextFieldType.java +++ b/x-pack/plugin/logsdb/src/main/java/org/elasticsearch/xpack/logsdb/patterntext/PatternTextFieldType.java @@ -36,6 +36,7 @@ import org.elasticsearch.index.mapper.TextFieldMapper; import org.elasticsearch.index.mapper.TextSearchInfo; import org.elasticsearch.index.mapper.ValueFetcher; +import org.elasticsearch.index.mapper.blockloader.docvalues.BytesRefsFromBinaryBlockLoader; import org.elasticsearch.index.mapper.extras.SourceConfirmedTextQuery; import org.elasticsearch.index.mapper.extras.SourceIntervalsSource; import org.elasticsearch.index.query.SearchExecutionContext; @@ -66,6 +67,7 @@ public class PatternTextFieldType extends TextFamilyFieldType { private final boolean hasPositions; private final boolean disableTemplating; private final boolean useBinaryDocValuesArgs; + private final boolean useBinaryDocValuesRawText; PatternTextFieldType( String name, @@ -75,7 +77,8 @@ public class PatternTextFieldType extends TextFamilyFieldType { Map meta, boolean isSyntheticSource, boolean isWithinMultiField, - boolean useBinaryDocValueArgs + boolean useBinaryDocValueArgs, + boolean useBinaryDocValuesRawText ) { // Though this type is based on doc_values, hasDocValues is set to false as the pattern_text type is not aggregatable. // This does not stop its child .template type from being aggregatable. @@ -85,6 +88,7 @@ public class PatternTextFieldType extends TextFamilyFieldType { this.hasPositions = tsi.hasPositions(); this.disableTemplating = disableTemplating; this.useBinaryDocValuesArgs = useBinaryDocValueArgs; + this.useBinaryDocValuesRawText = useBinaryDocValuesRawText; } // For testing only @@ -102,7 +106,8 @@ public class PatternTextFieldType extends TextFamilyFieldType { Collections.emptyMap(), syntheticSource, false, - useBinaryDocValueArgs + useBinaryDocValueArgs, + true ); } @@ -150,7 +155,7 @@ private IOFunction, IOExcepti SearchExecutionContext searchExecutionContext ) { if (disableTemplating) { - return storedFieldFetcher(storedNamed()); + return useBinaryDocValuesRawText ? binaryDocValuesFetcher(storedNamed()) : storedFieldFetcher(storedNamed()); } return context -> { @@ -166,6 +171,18 @@ private IOFunction, IOExcepti }; } + private static IOFunction, IOException>> binaryDocValuesFetcher(String name) { + return context -> { + var docValues = context.reader().getBinaryDocValues(name); + return docId -> { + if (docValues != null && docValues.advanceExact(docId)) { + return List.of(docValues.binaryValue()); + } + return List.of(); + }; + }; + } + private static IOFunction, IOException>> storedFieldFetcher(String name) { var loader = StoredFieldLoader.create(false, Set.of(name)); return context -> { @@ -173,7 +190,8 @@ private static IOFunction, IO return docId -> { leafLoader.advanceTo(docId); var storedFields = leafLoader.storedFields(); - return storedFields.get(name); + var values = storedFields.get(name); + return values != null ? values : List.of(); }; }; } @@ -309,7 +327,13 @@ public Query phrasePrefixQuery(TokenStream stream, int slop, int maxExpansions, @Override public BlockLoader blockLoader(BlockLoaderContext blContext) { if (disableTemplating) { - return new BlockStoredFieldsReader.BytesFromBytesRefsBlockLoader(storedNamed()); + if (useBinaryDocValuesRawText) { + // for newer indices, raw pattern text values are stored in binary doc values + return new BytesRefsFromBinaryBlockLoader(storedNamed()); + } else { + // for older indices (bwc), raw pattern text values are stored in stored fields + return new BlockStoredFieldsReader.BytesFromBytesRefsBlockLoader(storedNamed()); + } } return new PatternTextBlockLoader((leafReader -> PatternTextCompositeValues.from(leafReader, this))); diff --git a/x-pack/plugin/logsdb/src/main/java/org/elasticsearch/xpack/logsdb/patterntext/PatternTextValueProcessor.java b/x-pack/plugin/logsdb/src/main/java/org/elasticsearch/xpack/logsdb/patterntext/PatternTextValueProcessor.java index 66954daaf940d..fcb6054e35e8d 100644 --- a/x-pack/plugin/logsdb/src/main/java/org/elasticsearch/xpack/logsdb/patterntext/PatternTextValueProcessor.java +++ b/x-pack/plugin/logsdb/src/main/java/org/elasticsearch/xpack/logsdb/patterntext/PatternTextValueProcessor.java @@ -21,7 +21,13 @@ public class PatternTextValueProcessor { private static final Pattern DELIMITER = Pattern.compile("[\\s\\[\\]]"); public static final int MAX_LOG_LEN_TO_STORE_AS_DOC_VALUE = 8 * 1024; - public record Parts(String template, String templateId, List args, List argsInfo, boolean useStoredField) { + public record Parts( + String template, + String templateId, + List args, + List argsInfo, + boolean useBinaryDocValuesForRawText + ) { Parts(String template, List args, List argsInfo) { this(template, PatternTextValueProcessor.templateId(template), args, argsInfo, false); } diff --git a/x-pack/plugin/logsdb/src/test/java/org/elasticsearch/xpack/logsdb/patterntext/PatternTextDocValuesTests.java b/x-pack/plugin/logsdb/src/test/java/org/elasticsearch/xpack/logsdb/patterntext/PatternTextDocValuesTests.java index 1ba4d18e69639..7f9e9e619db4b 100644 --- a/x-pack/plugin/logsdb/src/test/java/org/elasticsearch/xpack/logsdb/patterntext/PatternTextDocValuesTests.java +++ b/x-pack/plugin/logsdb/src/test/java/org/elasticsearch/xpack/logsdb/patterntext/PatternTextDocValuesTests.java @@ -28,6 +28,7 @@ public class PatternTextDocValuesTests extends ESTestCase { enum Storage { DOC_VALUE, STORED_FIELD, + RAW_DOC_VALUE, EMPTY } @@ -62,13 +63,20 @@ static Message noArg(String message) { static Message empty() { return new Message(Storage.EMPTY, false, null); } + + static Message rawDocValue(String message) { + return new Message(Storage.RAW_DOC_VALUE, false, message); + } } - private static List makeRandomMessages(int numDocs, boolean includeStored) { + /** + * @param includeRawText indicates whether values that will be stored as raw text should be generated + */ + private static List makeRandomMessages(int numDocs, boolean includeRawText) { List messages = new ArrayList<>(); for (int i = 0; i < numDocs; i++) { // if arg is present, it's at the beginning - Storage storage = includeStored ? randomFrom(Storage.values()) : randomFrom(Storage.DOC_VALUE, Storage.EMPTY); + Storage storage = includeRawText ? randomFrom(Storage.values()) : randomFrom(Storage.DOC_VALUE, Storage.EMPTY); String message = randomAlphaOfLength(10) + " " + i; boolean hasArg = storage == Storage.DOC_VALUE && randomBoolean(); messages.add(new Message(storage, hasArg, storage == Storage.EMPTY ? null : message)); @@ -94,7 +102,12 @@ private static BinaryDocValues makeCompositeDocValues(List messages) th String storedFieldName = "message.stored"; var storedValues = messages.stream().map(m -> m.storage == Storage.STORED_FIELD ? new BytesRef(m.message) : null).toList(); var storedLoader = new SimpleStoredFieldLoader(storedValues, storedFieldName); - return new PatternTextCompositeValues(storedLoader, storedFieldName, patternTextDocValues, templateId); + var rawDocValues = messages.stream() + .map(m -> m.storage == Storage.RAW_DOC_VALUE ? m.message : null) + .toList() + .toArray(new String[0]); + var rawBinaryDocValues = new SimpleBinaryDocValues(rawDocValues); + return new PatternTextCompositeValues(storedLoader, storedFieldName, patternTextDocValues, templateId, rawBinaryDocValues); } private static BinaryDocValues makeDocValuesDense() throws IOException { @@ -108,12 +121,21 @@ private static BinaryDocValues makeDocValueMissingValues() throws IOException { } private static BinaryDocValues makeCompositeDense() throws IOException { - return makeCompositeDocValues(List.of(Message.stored("1 a"), Message.withArg("2 b"), Message.stored("3 c"), Message.noArg("4 d"))); + return makeCompositeDocValues( + List.of(Message.stored("1 a"), Message.withArg("2 b"), Message.rawDocValue("3 c"), Message.noArg("4 d")) + ); } private static BinaryDocValues makeCompositeMissingValues() throws IOException { return makeCompositeDocValues( - List.of(Message.stored("1 a"), Message.empty(), Message.withArg("3 c"), Message.empty(), Message.noArg("5 e"), Message.empty()) + List.of( + Message.stored("1 a"), + Message.empty(), + Message.rawDocValue("3 c"), + Message.empty(), + Message.noArg("5 e"), + Message.empty() + ) ); } diff --git a/x-pack/plugin/logsdb/src/test/java/org/elasticsearch/xpack/logsdb/patterntext/PatternTextFieldTypeTests.java b/x-pack/plugin/logsdb/src/test/java/org/elasticsearch/xpack/logsdb/patterntext/PatternTextFieldTypeTests.java index 63ef4da3fcc7c..716df8488e1d5 100644 --- a/x-pack/plugin/logsdb/src/test/java/org/elasticsearch/xpack/logsdb/patterntext/PatternTextFieldTypeTests.java +++ b/x-pack/plugin/logsdb/src/test/java/org/elasticsearch/xpack/logsdb/patterntext/PatternTextFieldTypeTests.java @@ -31,8 +31,11 @@ import org.elasticsearch.common.lucene.search.AutomatonQueries; import org.elasticsearch.common.lucene.search.MultiPhrasePrefixQuery; import org.elasticsearch.common.unit.Fuzziness; +import org.elasticsearch.index.mapper.BlockLoader; +import org.elasticsearch.index.mapper.BlockStoredFieldsReader; import org.elasticsearch.index.mapper.FieldTypeTestCase; import org.elasticsearch.index.mapper.MappedFieldType; +import org.elasticsearch.index.mapper.blockloader.docvalues.BytesRefsFromBinaryBlockLoader; import org.elasticsearch.index.mapper.extras.SourceIntervalsSource; import org.hamcrest.Matchers; @@ -217,4 +220,53 @@ public void testRangeIntervals() { ((SourceIntervalsSource) rangeIntervals).getIntervalsSource() ); } + + public void testBlockLoaderWhenTemplatingIsEnabled() { + PatternTextFieldType ft = new PatternTextFieldType("field", hasPositions, syntheticSource, useBinaryDocValueArgs); + BlockLoader blockLoader = ft.blockLoader(null); + assertThat(blockLoader, Matchers.instanceOf(PatternTextBlockLoader.class)); + } + + public void testBlockLoaderWhenTemplatingIsDisabledAndBinaryDocValuesAreUsed() { + PatternTextFieldType ft = new PatternTextFieldType( + "field", + new org.elasticsearch.index.mapper.TextSearchInfo( + hasPositions ? PatternTextFieldMapper.Defaults.FIELD_TYPE_POSITIONS : PatternTextFieldMapper.Defaults.FIELD_TYPE_DOCS, + null, + DelimiterAnalyzer.INSTANCE, + DelimiterAnalyzer.INSTANCE + ), + DelimiterAnalyzer.INSTANCE, + true, + Collections.emptyMap(), + syntheticSource, + false, + useBinaryDocValueArgs, + true + ); + BlockLoader blockLoader = ft.blockLoader(null); + assertThat(blockLoader, Matchers.instanceOf(BytesRefsFromBinaryBlockLoader.class)); + } + + public void testBlockLoaderWhenTemplatingIsDisabledAndStoredFieldsAreUsed() { + PatternTextFieldType ft = new PatternTextFieldType( + "field", + new org.elasticsearch.index.mapper.TextSearchInfo( + hasPositions ? PatternTextFieldMapper.Defaults.FIELD_TYPE_POSITIONS : PatternTextFieldMapper.Defaults.FIELD_TYPE_DOCS, + null, + DelimiterAnalyzer.INSTANCE, + DelimiterAnalyzer.INSTANCE + ), + DelimiterAnalyzer.INSTANCE, + true, + Collections.emptyMap(), + syntheticSource, + false, + useBinaryDocValueArgs, + false + ); + BlockLoader blockLoader = ft.blockLoader(null); + assertThat(blockLoader, Matchers.instanceOf(BlockStoredFieldsReader.BytesFromBytesRefsBlockLoader.class)); + } + } diff --git a/x-pack/plugin/logsdb/src/test/java/org/elasticsearch/xpack/logsdb/patterntext/PatternTextIntegrationTests.java b/x-pack/plugin/logsdb/src/test/java/org/elasticsearch/xpack/logsdb/patterntext/PatternTextIntegrationTests.java index 3d3cd07069790..0f9ccc5450b62 100644 --- a/x-pack/plugin/logsdb/src/test/java/org/elasticsearch/xpack/logsdb/patterntext/PatternTextIntegrationTests.java +++ b/x-pack/plugin/logsdb/src/test/java/org/elasticsearch/xpack/logsdb/patterntext/PatternTextIntegrationTests.java @@ -11,6 +11,7 @@ import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; +import org.apache.lucene.index.BinaryDocValues; import org.elasticsearch.action.DocWriteRequest; import org.elasticsearch.action.admin.indices.create.CreateIndexRequest; import org.elasticsearch.action.admin.indices.refresh.RefreshRequest; @@ -58,12 +59,14 @@ public class PatternTextIntegrationTests extends ESSingleNodeTestCase { private static final Logger logger = LogManager.getLogger(PatternTextIntegrationTests.class); - @ParametersFactory(argumentFormatting = "indexOptions=%s, disableTemplating=%b") + @ParametersFactory(argumentFormatting = "indexOptions=%s, disableTemplating=%b, useBinaryDocValuesForRawText=%s") public static List args() { List args = new ArrayList<>(); for (var indexOption : new String[] { "docs", "positions" }) { for (var templating : new boolean[] { true, false }) { - args.add(new Object[] { indexOption, templating }); + for (var useBinaryDocValues : new boolean[] { true, false }) { + args.add(new Object[] { indexOption, templating, useBinaryDocValues }); + } } } return Collections.unmodifiableList(args); @@ -72,11 +75,13 @@ public static List args() { private final String indexOptions; private final boolean disableTemplating; private final String mapping; + private final boolean useBinaryDocValues; - public PatternTextIntegrationTests(String indexOptions, boolean disableTemplating) { + public PatternTextIntegrationTests(String indexOptions, boolean disableTemplating, boolean useBinaryDocValues) { this.indexOptions = indexOptions; this.disableTemplating = disableTemplating; this.mapping = getMapping(indexOptions, disableTemplating); + this.useBinaryDocValues = useBinaryDocValues; } @Override @@ -107,8 +112,6 @@ protected Collection> getPlugins() { } """; - private static final Settings LOGSDB_SETTING = Settings.builder().put(IndexSettings.MODE.getKey(), "logsdb").build(); - @After public void cleanup() { assertAcked(admin().indices().prepareDelete(INDEX)); @@ -119,8 +122,16 @@ private String getMapping(String indexOptions, boolean disableTemplating) { .replace("%disable_templating%", Boolean.toString(disableTemplating)); } + private Settings getSettings() { + return Settings.builder() + .put(IndexSettings.MODE.getKey(), "logsdb") + // binary doc values are used when time series doc values format is enabled + .put(IndexSettings.USE_TIME_SERIES_DOC_VALUES_FORMAT_SETTING.getKey(), useBinaryDocValues) + .build(); + } + public void testSourceMatchAllManyValues() throws IOException { - var createRequest = indicesAdmin().prepareCreate(INDEX).setSettings(LOGSDB_SETTING).setMapping(mapping); + var createRequest = indicesAdmin().prepareCreate(INDEX).setSettings(getSettings()).setMapping(mapping); createIndex(INDEX, createRequest); int numDocs = randomIntBetween(1, 100); @@ -132,7 +143,7 @@ public void testSourceMatchAllManyValues() throws IOException { } public void testLargeValueIsStored() throws IOException { - var createRequest = indicesAdmin().prepareCreate(INDEX).setSettings(LOGSDB_SETTING).setMapping(mapping); + var createRequest = indicesAdmin().prepareCreate(INDEX).setSettings(getSettings()).setMapping(mapping); IndexService indexService = createIndex(INDEX, createRequest); // large message @@ -143,17 +154,23 @@ public void testLargeValueIsStored() throws IOException { assertMappings(); assertMessagesInSource(messages); - // assert contains stored field try (var searcher = indexService.getShard(0).acquireSearcher(INDEX)) { try (var indexReader = searcher.getIndexReader()) { - var document = indexReader.storedFields().document(0); - assertEquals(document.getField("field_pattern_text.stored").binaryValue().utf8ToString(), message); + if (useBinaryDocValues) { + var leafReader = indexReader.leaves().get(0).reader(); + BinaryDocValues docValues = leafReader.getBinaryDocValues("field_pattern_text.stored"); + assertTrue(docValues.advanceExact(0)); + assertEquals(message, docValues.binaryValue().utf8ToString()); + } else { + var document = indexReader.storedFields().document(0); + assertEquals(message, document.getField("field_pattern_text.stored").binaryValue().utf8ToString()); + } } } } public void testSmallValueNotStored() throws IOException { - var createRequest = indicesAdmin().prepareCreate(INDEX).setSettings(LOGSDB_SETTING).setMapping(mapping); + var createRequest = indicesAdmin().prepareCreate(INDEX).setSettings(getSettings()).setMapping(mapping); IndexService indexService = createIndex(INDEX, createRequest); // small message @@ -164,13 +181,22 @@ public void testSmallValueNotStored() throws IOException { assertMappings(); assertMessagesInSource(messages); - // assert only contains stored field if templating is disabled try (var searcher = indexService.getShard(0).acquireSearcher(INDEX)) { try (var indexReader = searcher.getIndexReader()) { - var document = indexReader.storedFields().document(0); if (disableTemplating) { - assertEquals(document.getField("field_pattern_text.stored").binaryValue().utf8ToString(), message); + // when templating is disabled, large values are either stored in binary doc values or stored fields + if (useBinaryDocValues) { + var leafReader = indexReader.leaves().get(0).reader(); + BinaryDocValues docValues = leafReader.getBinaryDocValues("field_pattern_text.stored"); + assertTrue(docValues.advanceExact(0)); + assertEquals(message, docValues.binaryValue().utf8ToString()); + } else { + var document = indexReader.storedFields().document(0); + assertEquals(message, document.getField("field_pattern_text.stored").binaryValue().utf8ToString()); + } } else { + // when templating is enabled, small values are templated, not stored + var document = indexReader.storedFields().document(0); assertNull(document.getField("field_pattern_text.stored")); } } @@ -179,7 +205,7 @@ public void testSmallValueNotStored() throws IOException { public void testPhraseQuery() throws IOException { var createRequest = new CreateIndexRequest(INDEX).mapping(mapping); - createRequest.settings(LOGSDB_SETTING); + createRequest.settings(getSettings()); assertAcked(admin().indices().create(createRequest)); String smallMessage = "cat dog 123 house mouse"; @@ -199,7 +225,7 @@ public void testQueryResultsSameAsMatchOnlyText() throws IOException { var createRequest = new CreateIndexRequest(INDEX).mapping(mapping); if (randomBoolean()) { - createRequest.settings(LOGSDB_SETTING); + createRequest.settings(getSettings()); } assertAcked(admin().indices().create(createRequest));