Skip to content

Commit

Permalink
Always use constant_score query for match_only_text (#16964) (#16969)
Browse files Browse the repository at this point in the history
In some cases, when we create a term query over a `match_only_text`
field, it may still try to compute scores, which prevents early
termination. We should *always* use a constant score query when
querying `match_only_text`, since we don't have the statistics
required to compute scores.

---------

Signed-off-by: Michael Froh <[email protected]>
(cherry picked from commit 0b36599)
  • Loading branch information
msfroh authored Jan 8, 2025
1 parent 8a17b8a commit e5c51e6
Show file tree
Hide file tree
Showing 4 changed files with 52 additions and 1 deletion.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
- Ensure consistency of system flag on IndexMetadata after diff is applied ([#16644](https://github.com/opensearch-project/OpenSearch/pull/16644))
- Skip remote-repositories validations for node-joins when RepositoriesService is not in sync with cluster-state ([#16763](https://github.com/opensearch-project/OpenSearch/pull/16763))
- Fix _list/shards API failing when closed indices are present ([#16606](https://github.com/opensearch-project/OpenSearch/pull/16606))
- Always use `constant_score` query for `match_only_text` field ([#16964](https://github.com/opensearch-project/OpenSearch/pull/16964))

### Security

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.ConstantScoreQuery;
import org.apache.lucene.search.MultiPhraseQuery;
import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.Query;
Expand Down Expand Up @@ -290,6 +291,16 @@ public Query phrasePrefixQuery(TokenStream stream, int slop, int maxExpansions,
return new SourceFieldMatchQuery(builder.build(), phrasePrefixQuery, this, context);
}

@Override
public Query termQuery(Object value, QueryShardContext context) {
return new ConstantScoreQuery(super.termQuery(value, context));
}

@Override
public Query termQueryCaseInsensitive(Object value, QueryShardContext context) {
return new ConstantScoreQuery(super.termQueryCaseInsensitive(value, context));
}

private List<List<Term>> getTermsFromTokenStream(TokenStream stream) throws IOException {
final List<List<Term>> termArray = new ArrayList<>();
TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,13 @@
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.ConstantScoreQuery;
import org.apache.lucene.search.MultiPhraseQuery;
import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.tests.analysis.MockSynonymAnalyzer;
import org.opensearch.common.lucene.search.AutomatonQueries;
import org.opensearch.common.lucene.search.MultiPhrasePrefixQuery;
import org.opensearch.core.common.Strings;
import org.opensearch.core.xcontent.MediaTypeRegistry;
Expand All @@ -28,6 +30,7 @@
import org.opensearch.index.query.MatchPhraseQueryBuilder;
import org.opensearch.index.query.QueryShardContext;
import org.opensearch.index.query.SourceFieldMatchQuery;
import org.opensearch.index.query.TermQueryBuilder;
import org.opensearch.index.search.MatchQuery;
import org.junit.Before;

Expand Down Expand Up @@ -391,7 +394,7 @@ public void testPhraseQuery() throws IOException {

assertThat(q, is(expectedQuery));
Query q4 = new MatchPhraseQueryBuilder("field", "singleton").toQuery(queryShardContext);
assertThat(q4, is(new TermQuery(new Term("field", "singleton"))));
assertThat(q4, is(new ConstantScoreQuery(new TermQuery(new Term("field", "singleton")))));

Query q2 = new MatchPhraseQueryBuilder("field", "three words here").toQuery(queryShardContext);
expectedQuery = new SourceFieldMatchQuery(
Expand Down Expand Up @@ -447,4 +450,22 @@ public void testPhraseQuery() throws IOException {
);
assertThat(q6, is(expectedQuery));
}

public void testTermQuery() throws Exception {
MapperService mapperService = createMapperService(mapping(b -> {
b.startObject("field");
{
b.field("type", textFieldName);
b.field("analyzer", "my_stop_analyzer"); // "standard" will be replaced with MockSynonymAnalyzer
}
b.endObject();
}));
QueryShardContext queryShardContext = createQueryShardContext(mapperService);

Query q = new TermQueryBuilder("field", "foo").rewrite(queryShardContext).toQuery(queryShardContext);
assertEquals(new ConstantScoreQuery(new TermQuery(new Term("field", "foo"))), q);

q = new TermQueryBuilder("field", "foo").caseInsensitive(true).rewrite(queryShardContext).toQuery(queryShardContext);
assertEquals(new ConstantScoreQuery(AutomatonQueries.caseInsensitiveTermQuery(new Term("field", "foo"))), q);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,11 @@

package org.opensearch.index.mapper;

import org.apache.lucene.index.Term;
import org.apache.lucene.search.ConstantScoreQuery;
import org.apache.lucene.search.TermQuery;
import org.opensearch.common.lucene.Lucene;
import org.opensearch.common.lucene.search.AutomatonQueries;

public class MatchOnlyTextFieldTypeTests extends TextFieldTypeTests {

Expand All @@ -28,4 +32,18 @@ TextFieldMapper.TextFieldType createFieldType(boolean searchable) {
ParametrizedFieldMapper.Parameter.metaParam().get()
);
}

@Override
public void testTermQuery() {
MappedFieldType ft = createFieldType(true);
assertEquals(new ConstantScoreQuery(new TermQuery(new Term("field", "foo"))), ft.termQuery("foo", null));
assertEquals(
new ConstantScoreQuery(AutomatonQueries.caseInsensitiveTermQuery(new Term("field", "fOo"))),
ft.termQueryCaseInsensitive("fOo", null)
);

MappedFieldType unsearchable = createFieldType(false);
IllegalArgumentException e = expectThrows(IllegalArgumentException.class, () -> unsearchable.termQuery("bar", null));
assertEquals("Cannot search on field [field] since it is not indexed.", e.getMessage());
}
}

0 comments on commit e5c51e6

Please sign in to comment.