Skip to content
Merged
Show file tree
Hide file tree
Changes from 22 commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
354624f
Support old postings formats
ywelsch Mar 22, 2022
56d0e7b
javadoc
ywelsch Mar 24, 2022
bbaa535
review comments
ywelsch Mar 28, 2022
31d37af
Merge remote-tracking branch 'elastic/master' into old-postings
ywelsch Mar 28, 2022
eb30319
Merge remote-tracking branch 'elastic/master' into old-postings
ywelsch Apr 21, 2022
b8ffa8d
Merge remote-tracking branch 'elastic/master' into old-postings
ywelsch Apr 26, 2022
0d40083
Verify / rewrite mappings using full analysis service
ywelsch Apr 27, 2022
b7dd421
allow queries on text field type
ywelsch Apr 27, 2022
7f590ad
make analyzer lenient and updateable
ywelsch Apr 27, 2022
8d03678
Merge remote-tracking branch 'elastic/master' into old-postings
ywelsch Apr 27, 2022
c33e835
fix tests
ywelsch Apr 27, 2022
ac09d02
fix test
ywelsch Apr 27, 2022
e99a176
test fixes
ywelsch Apr 27, 2022
c0e508f
use constant scoring
ywelsch Apr 28, 2022
55c462c
Merge remote-tracking branch 'elastic/master' into archive-text-field…
ywelsch Apr 29, 2022
97c49e2
Merge remote-tracking branch 'elastic/master' into archive-text-field…
ywelsch May 2, 2022
d28cea8
revert change
ywelsch May 2, 2022
0415143
tests
ywelsch May 10, 2022
7ef03bd
tests
ywelsch May 10, 2022
9d4b7d0
Merge remote-tracking branch 'elastic/master' into archive-text-field…
ywelsch May 10, 2022
1da6dd1
fixø
ywelsch May 10, 2022
948f2a9
remove
ywelsch May 10, 2022
56d391c
no fielddata
ywelsch May 10, 2022
6780c48
Merge remote-tracking branch 'elastic/master' into archive-text-field…
ywelsch May 10, 2022
604d70c
no spans
ywelsch May 10, 2022
817b5a3
disable norms properly
ywelsch May 11, 2022
80419e3
Merge remote-tracking branch 'elastic/master' into archive-text-field…
ywelsch May 11, 2022
1f96dfc
fix existsQuery on text fields
ywelsch May 11, 2022
334efdf
Merge remote-tracking branch 'elastic/master' into archive-text-field…
ywelsch May 11, 2022
3033e12
Merge remote-tracking branch 'elastic/master' into archive-text-field…
ywelsch May 16, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,8 @@ public Builder(String name, Version indexCreatedVersion, IndexAnalyzers indexAna
this.analyzers = new TextParams.Analyzers(
indexAnalyzers,
m -> ((MatchOnlyTextFieldMapper) m).indexAnalyzer,
m -> ((MatchOnlyTextFieldMapper) m).positionIncrementGap
m -> ((MatchOnlyTextFieldMapper) m).positionIncrementGap,
indexCreatedVersion
);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
import org.apache.lucene.util.automaton.Automata;
import org.apache.lucene.util.automaton.Automaton;
import org.apache.lucene.util.automaton.Operations;
import org.elasticsearch.Version;
import org.elasticsearch.common.collect.Iterators;
import org.elasticsearch.index.analysis.AnalyzerScope;
import org.elasticsearch.index.analysis.IndexAnalyzers;
Expand Down Expand Up @@ -92,7 +93,7 @@ public static class Defaults {
public static final int MAX_SHINGLE_SIZE = 3;
}

public static final TypeParser PARSER = new TypeParser((n, c) -> new Builder(n, c.getIndexAnalyzers()));
public static final TypeParser PARSER = new TypeParser((n, c) -> new Builder(n, c.indexVersionCreated(), c.getIndexAnalyzers()));

private static Builder builder(FieldMapper in) {
return ((SearchAsYouTypeFieldMapper) in).builder;
Expand Down Expand Up @@ -141,12 +142,16 @@ public static class Builder extends FieldMapper.Builder {

private final Parameter<Map<String, String>> meta = Parameter.metaParam();

public Builder(String name, IndexAnalyzers indexAnalyzers) {
private final Version indexCreatedVersion;

public Builder(String name, Version indexCreatedVersion, IndexAnalyzers indexAnalyzers) {
super(name);
this.indexCreatedVersion = indexCreatedVersion;
this.analyzers = new TextParams.Analyzers(
indexAnalyzers,
m -> builder(m).analyzers.getIndexAnalyzer(),
m -> builder(m).analyzers.positionIncrementGap.getValue()
m -> builder(m).analyzers.positionIncrementGap.getValue(),
indexCreatedVersion
);
}

Expand Down Expand Up @@ -702,7 +707,7 @@ protected String contentType() {
}

public FieldMapper.Builder getMergeBuilder() {
return new Builder(simpleName(), builder.analyzers.indexAnalyzers).init(this);
return new Builder(simpleName(), builder.indexCreatedVersion, builder.analyzers.indexAnalyzers).init(this);
}

public static String getShingleFieldName(String parentField, int shingleSize) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import org.apache.lucene.document.FieldType;
import org.apache.lucene.index.IndexOptions;
import org.elasticsearch.ElasticsearchParseException;
import org.elasticsearch.Version;
import org.elasticsearch.index.analysis.AnalyzerScope;
import org.elasticsearch.index.analysis.IndexAnalyzers;
import org.elasticsearch.index.analysis.NamedAnalyzer;
Expand Down Expand Up @@ -86,12 +87,16 @@ public static class Builder extends FieldMapper.Builder {

private final Parameter<Map<String, String>> meta = Parameter.metaParam();

public Builder(String name, IndexAnalyzers indexAnalyzers) {
private final Version indexCreatedVersion;

public Builder(String name, Version indexCreatedVersion, IndexAnalyzers indexAnalyzers) {
super(name);
this.indexCreatedVersion = indexCreatedVersion;
this.analyzers = new TextParams.Analyzers(
indexAnalyzers,
m -> builder(m).analyzers.getIndexAnalyzer(),
m -> builder(m).analyzers.positionIncrementGap.getValue()
m -> builder(m).analyzers.positionIncrementGap.getValue(),
indexCreatedVersion
);
}

Expand Down Expand Up @@ -145,7 +150,7 @@ public AnnotatedTextFieldMapper build(MapperBuilderContext context) {
}
}

public static TypeParser PARSER = new TypeParser((n, c) -> new Builder(n, c.getIndexAnalyzers()));
public static TypeParser PARSER = new TypeParser((n, c) -> new Builder(n, c.indexVersionCreated(), c.getIndexAnalyzers()));

/**
* Parses markdown-like syntax into plain text and AnnotationTokens with offsets for
Expand Down Expand Up @@ -519,6 +524,6 @@ protected String contentType() {

@Override
public FieldMapper.Builder getMergeBuilder() {
return new Builder(simpleName(), builder.analyzers.indexAnalyzers).init(this);
return new Builder(simpleName(), builder.indexCreatedVersion, builder.analyzers.indexAnalyzers).init(this);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
import org.apache.lucene.queries.intervals.Intervals;
import org.apache.lucene.queries.intervals.IntervalsSource;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.Version;
import org.elasticsearch.index.mapper.FieldTypeTestCase;
import org.elasticsearch.index.mapper.MappedFieldType;
import org.elasticsearch.index.mapper.MapperBuilderContext;
Expand All @@ -28,7 +29,7 @@ public void testIntervals() throws IOException {
}

public void testFetchSourceValue() throws IOException {
MappedFieldType fieldType = new AnnotatedTextFieldMapper.Builder("field", createDefaultIndexAnalyzers()).build(
MappedFieldType fieldType = new AnnotatedTextFieldMapper.Builder("field", Version.CURRENT, createDefaultIndexAnalyzers()).build(
MapperBuilderContext.ROOT
).fieldType();

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,15 +18,12 @@
import org.elasticsearch.common.settings.IndexScopedSettings;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.xcontent.LoggingDeprecationHandler;
import org.elasticsearch.core.Nullable;
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.analysis.AnalyzerScope;
import org.elasticsearch.index.analysis.IndexAnalyzers;
import org.elasticsearch.index.analysis.NamedAnalyzer;
import org.elasticsearch.index.mapper.DocumentMapper;
import org.elasticsearch.index.mapper.MapperRegistry;
import org.elasticsearch.index.mapper.MapperService;
import org.elasticsearch.index.mapper.Mapping;
import org.elasticsearch.index.similarity.SimilarityService;
import org.elasticsearch.script.ScriptCompiler;
import org.elasticsearch.script.ScriptService;
Expand Down Expand Up @@ -92,7 +89,7 @@ public IndexMetadata verifyIndexMetadata(IndexMetadata indexMetadata, Version mi
// Next we have to run this otherwise if we try to create IndexSettings
// with broken settings it would fail in checkMappingsCompatibility
newMetadata = archiveBrokenIndexSettings(newMetadata);
createAndValidateMapping(newMetadata);
checkMappingsCompatibility(newMetadata);
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The changes in this class revert the changes made in #85059, as we now validate against a MapperService with all analyzers configured when restoring the legacy index in RestoreService. The reason for doing it this way now is that it provides better error messages on restore, but also handles a tricky situation where the Mapping returned by these methods here would have their analyzer settings misconfigured as checkMappingsCompatibility would not create a proper environment with actual analyzers configured.

return newMetadata;
}

Expand Down Expand Up @@ -129,10 +126,8 @@ private static void checkSupportedVersion(IndexMetadata indexMetadata, Version m
* Note that we don't expect users to encounter mapping incompatibilities, since our index compatibility
* policy guarantees we can read mappings from previous compatible index versions. A failure here would
* indicate a compatibility bug (which are unfortunately not that uncommon).
* @return the mapping
*/
@Nullable
public Mapping createAndValidateMapping(IndexMetadata indexMetadata) {
private void checkMappingsCompatibility(IndexMetadata indexMetadata) {
try {

// We cannot instantiate real analysis server or similarity service at this point because the node
Expand Down Expand Up @@ -199,8 +194,6 @@ public Set<Entry<String, NamedAnalyzer>> entrySet() {
scriptService
);
mapperService.merge(indexMetadata, MapperService.MergeReason.MAPPING_RECOVERY);
DocumentMapper documentMapper = mapperService.documentMapper();
return documentMapper == null ? null : documentMapper.mapping();
}
} catch (Exception ex) {
// Wrap the inner exception so we have the index name in the exception message
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@

package org.elasticsearch.index.mapper;

import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.apache.logging.log4j.message.ParameterizedMessage;
import org.apache.lucene.index.LeafReaderContext;
import org.elasticsearch.Version;
import org.elasticsearch.common.Explicit;
Expand Down Expand Up @@ -48,6 +51,8 @@
import java.util.function.Supplier;

public abstract class FieldMapper extends Mapper implements Cloneable {
private static final Logger logger = LogManager.getLogger(FieldMapper.class);

public static final Setting<Boolean> IGNORE_MALFORMED_SETTING = Setting.boolSetting(
"index.mapping.ignore_malformed",
false,
Expand Down Expand Up @@ -1042,23 +1047,48 @@ public static <T extends Enum<T>> Parameter<T> restrictedEnumParam(
* @param updateable whether the parameter can be changed by a mapping update
* @param initializer a function that reads the parameter value from an existing mapper
* @param defaultAnalyzer the default value, to be used if the parameter is undefined in a mapping
* @param indexCreatedVersion the version on which this index was created
*/
public static Parameter<NamedAnalyzer> analyzerParam(
String name,
boolean updateable,
Function<FieldMapper, NamedAnalyzer> initializer,
Supplier<NamedAnalyzer> defaultAnalyzer
Supplier<NamedAnalyzer> defaultAnalyzer,
Version indexCreatedVersion
) {
return new Parameter<>(name, updateable, defaultAnalyzer, (n, c, o) -> {
String analyzerName = o.toString();
NamedAnalyzer a = c.getIndexAnalyzers().get(analyzerName);
if (a == null) {
throw new IllegalArgumentException("analyzer [" + analyzerName + "] has not been configured in mappings");
if (indexCreatedVersion.isLegacyIndexVersion()) {
logger.warn(
new ParameterizedMessage("Could not find analyzer [{}] of legacy index, falling back to default", analyzerName)
);
a = defaultAnalyzer.get();
} else {
throw new IllegalArgumentException("analyzer [" + analyzerName + "] has not been configured in mappings");
}
}
return a;
}, initializer, (b, n, v) -> b.field(n, v.name()), NamedAnalyzer::name);
}

/**
* Defines a parameter that takes an analyzer name
* @param name the parameter name
* @param updateable whether the parameter can be changed by a mapping update
* @param initializer a function that reads the parameter value from an existing mapper
* @param defaultAnalyzer the default value, to be used if the parameter is undefined in a mapping
*/
public static Parameter<NamedAnalyzer> analyzerParam(
String name,
boolean updateable,
Function<FieldMapper, NamedAnalyzer> initializer,
Supplier<NamedAnalyzer> defaultAnalyzer
) {
return analyzerParam(name, updateable, initializer, defaultAnalyzer, Version.CURRENT);
}

/**
* Declares a metadata parameter
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@
import org.elasticsearch.common.lucene.Lucene;
import org.elasticsearch.common.lucene.search.AutomatonQueries;
import org.elasticsearch.common.lucene.search.MultiPhrasePrefixQuery;
import org.elasticsearch.common.unit.Fuzziness;
import org.elasticsearch.common.xcontent.support.XContentMapValues;
import org.elasticsearch.index.analysis.AnalyzerScope;
import org.elasticsearch.index.analysis.IndexAnalyzers;
Expand Down Expand Up @@ -278,7 +279,8 @@ public Builder(String name, Version indexCreatedVersion, IndexAnalyzers indexAna
this.analyzers = new TextParams.Analyzers(
indexAnalyzers,
m -> ((TextFieldMapper) m).indexAnalyzer,
m -> (((TextFieldMapper) m).positionIncrementGap)
m -> (((TextFieldMapper) m).positionIncrementGap),
indexCreatedVersion
);
}

Expand Down Expand Up @@ -329,7 +331,7 @@ protected List<Parameter<?>> getParameters() {
);
}

private TextFieldType buildFieldType(FieldType fieldType, MapperBuilderContext context) {
private TextFieldType buildFieldType(FieldType fieldType, MapperBuilderContext context, Version indexCreatedVersion) {
NamedAnalyzer searchAnalyzer = analyzers.getSearchAnalyzer();
NamedAnalyzer searchQuoteAnalyzer = analyzers.getSearchQuoteAnalyzer();
if (analyzers.positionIncrementGap.isConfigured()) {
Expand All @@ -340,7 +342,12 @@ private TextFieldType buildFieldType(FieldType fieldType, MapperBuilderContext c
}
}
TextSearchInfo tsi = new TextSearchInfo(fieldType, similarity.getValue(), searchAnalyzer, searchQuoteAnalyzer);
TextFieldType ft = new TextFieldType(context.buildFullName(name), index.getValue(), store.getValue(), tsi, meta.getValue());
TextFieldType ft;
if (indexCreatedVersion.isLegacyIndexVersion()) {
ft = new ConstantScoreTextFieldType(context.buildFullName(name), index.getValue(), store.getValue(), tsi, meta.getValue());
} else {
ft = new TextFieldType(context.buildFullName(name), index.getValue(), store.getValue(), tsi, meta.getValue());
}
ft.eagerGlobalOrdinals = eagerGlobalOrdinals.getValue();
if (fieldData.getValue()) {
ft.setFielddata(true, freqFilter.getValue());
Expand Down Expand Up @@ -430,7 +437,7 @@ public Map<String, NamedAnalyzer> indexAnalyzers(String name, SubFieldInfo phras
@Override
public TextFieldMapper build(MapperBuilderContext context) {
FieldType fieldType = TextParams.buildFieldType(index, store, indexOptions, norms, termVectors);
TextFieldType tft = buildFieldType(fieldType, context);
TextFieldType tft = buildFieldType(fieldType, context, indexCreatedVersion);
SubFieldInfo phraseFieldInfo = buildPhraseInfo(fieldType, tft);
SubFieldInfo prefixFieldInfo = buildPrefixInfo(context, fieldType, tft);
MultiFields multiFields = multiFieldsBuilder.build(this, context);
Expand All @@ -453,7 +460,12 @@ public TextFieldMapper build(MapperBuilderContext context) {
}
}

public static final TypeParser PARSER = new TypeParser((n, c) -> new Builder(n, c.indexVersionCreated(), c.getIndexAnalyzers()));
private static final Version MINIMUM_COMPATIBILITY_VERSION = Version.fromString("5.0.0");

public static final TypeParser PARSER = new TypeParser(
(n, c) -> new Builder(n, c.indexVersionCreated(), c.getIndexAnalyzers()),
MINIMUM_COMPATIBILITY_VERSION
);

private static class PhraseWrappedAnalyzer extends AnalyzerWrapper {

Expand Down Expand Up @@ -897,6 +909,78 @@ public IndexFieldData.Builder fielddataBuilder(String fullyQualifiedIndexName, S

}

public static class ConstantScoreTextFieldType extends TextFieldType {

public ConstantScoreTextFieldType(String name, boolean indexed, boolean stored, TextSearchInfo tsi, Map<String, String> meta) {
super(name, indexed, stored, tsi, meta);
}

public ConstantScoreTextFieldType(String name) {
this(
name,
true,
false,
new TextSearchInfo(Defaults.FIELD_TYPE, null, Lucene.STANDARD_ANALYZER, Lucene.STANDARD_ANALYZER),
Collections.emptyMap()
);
}

public ConstantScoreTextFieldType(String name, boolean indexed, boolean stored, Map<String, String> meta) {
this(
name,
indexed,
stored,
new TextSearchInfo(Defaults.FIELD_TYPE, null, Lucene.STANDARD_ANALYZER, Lucene.STANDARD_ANALYZER),
meta
);
}

@Override
public Query termQuery(Object value, SearchExecutionContext context) {
// Disable scoring
return new ConstantScoreQuery(super.termQuery(value, context));
}

@Override
public Query fuzzyQuery(
Object value,
Fuzziness fuzziness,
int prefixLength,
int maxExpansions,
boolean transpositions,
SearchExecutionContext context
) {
// Disable scoring
return new ConstantScoreQuery(super.fuzzyQuery(value, fuzziness, prefixLength, maxExpansions, transpositions, context));
}

@Override
public Query phraseQuery(TokenStream stream, int slop, boolean enablePosIncrements, SearchExecutionContext queryShardContext)
throws IOException {
// Disable scoring
return new ConstantScoreQuery(super.phraseQuery(stream, slop, enablePosIncrements, queryShardContext));
}

@Override
public Query multiPhraseQuery(
TokenStream stream,
int slop,
boolean enablePositionIncrements,
SearchExecutionContext queryShardContext
) throws IOException {
// Disable scoring
return new ConstantScoreQuery(super.multiPhraseQuery(stream, slop, enablePositionIncrements, queryShardContext));
}

@Override
public Query phrasePrefixQuery(TokenStream stream, int slop, int maxExpansions, SearchExecutionContext queryShardContext)
throws IOException {
// Disable scoring
return new ConstantScoreQuery(super.phrasePrefixQuery(stream, slop, maxExpansions, queryShardContext));
}

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Span prefix queries will do something weird here, won't they? But then span prefix queries are kind of weird in any case. I think they're enough of an edge case that we can override spanPrefixQuery() and throw an exception saying we don't support them on legacy indexes

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I agree that we don't need to support them. I've pushed 604d70c but I'm not sure how to add a test for it (I couldn't find existing tests that exercise this method).

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It looks as though the two places it would be used are 1) a prefix query wrapped in a span multiterm query, which can be replaced by an interval query; and 2) a match_phrase_prefix query that uses multiterm synonyms, which will get factored away when we rework things to use QueryBuilders properly. So I think we can happily just throw an exception here and not worry about it further :)

}

private final Version indexCreatedVersion;
private final boolean index;
private final boolean store;
Expand Down
Loading