Skip to content

Commit 190f1e1

Browse files
authored
Fix synonym phrase query expansion for cross_fields parsing (#28045)
* Fix synonym phrase query expansion for cross_fields parsing The `cross_fields` mode for query parser ignores phrase query generated by multi-word synonyms. In such case only the first field of each analyzer group is kept. This change fixes this issue by expanding the phrase query for each analyzer group to **all** fields using a disjunction max query.
1 parent 3895add commit 190f1e1

File tree

3 files changed

+110
-3
lines changed

3 files changed

+110
-3
lines changed

server/src/main/java/org/elasticsearch/index/search/MatchQuery.java

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
import org.apache.lucene.search.BooleanClause.Occur;
3030
import org.apache.lucene.search.BooleanQuery;
3131
import org.apache.lucene.search.BoostQuery;
32+
import org.apache.lucene.search.DisjunctionMaxQuery;
3233
import org.apache.lucene.search.FuzzyQuery;
3334
import org.apache.lucene.search.MultiPhraseQuery;
3435
import org.apache.lucene.search.MultiTermQuery;
@@ -350,7 +351,12 @@ protected Query analyzePhrase(String field, TokenStream stream, int slop) throws
350351
throw exc;
351352
}
352353
}
353-
return super.analyzePhrase(field, stream, slop);
354+
Query query = super.analyzePhrase(field, stream, slop);
355+
if (query instanceof PhraseQuery) {
356+
// synonyms that expand to multiple terms can return a phrase query.
357+
return blendPhraseQuery((PhraseQuery) query, mapper);
358+
}
359+
return query;
354360
}
355361

356362
/**
@@ -472,6 +478,14 @@ private Query boolToExtendedCommonTermsQuery(BooleanQuery bq, Occur highFreqOccu
472478
}
473479
}
474480

481+
/**
482+
* Called when a phrase query is built with {@link QueryBuilder#analyzePhrase(String, TokenStream, int)}.
483+
* Subclass can override this function to blend this query to multiple fields.
484+
*/
485+
protected Query blendPhraseQuery(PhraseQuery query, MappedFieldType fieldType) {
486+
return query;
487+
}
488+
475489
protected Query blendTermsQuery(Term[] terms, MappedFieldType fieldType) {
476490
return new SynonymQuery(terms);
477491
}
@@ -494,5 +508,4 @@ protected Query blendTermQuery(Term term, MappedFieldType fieldType) {
494508
}
495509
return termQuery(fieldType, term.bytes(), lenient);
496510
}
497-
498511
}

server/src/main/java/org/elasticsearch/index/search/MultiMatchQuery.java

Lines changed: 46 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,10 +25,10 @@
2525
import org.apache.lucene.search.BoostQuery;
2626
import org.apache.lucene.search.DisjunctionMaxQuery;
2727
import org.apache.lucene.search.MatchNoDocsQuery;
28+
import org.apache.lucene.search.PhraseQuery;
2829
import org.apache.lucene.search.Query;
2930
import org.apache.lucene.search.TermQuery;
3031
import org.apache.lucene.util.BytesRef;
31-
import org.elasticsearch.ElasticsearchParseException;
3232
import org.elasticsearch.common.lucene.search.Queries;
3333
import org.elasticsearch.index.mapper.MappedFieldType;
3434
import org.elasticsearch.index.query.AbstractQueryBuilder;
@@ -143,6 +143,10 @@ public Query blendTerms(Term[] terms, MappedFieldType fieldType) {
143143
public Query termQuery(MappedFieldType fieldType, BytesRef value) {
144144
return MultiMatchQuery.this.termQuery(fieldType, value, lenient);
145145
}
146+
147+
public Query blendPhrase(PhraseQuery query, MappedFieldType type) {
148+
return MultiMatchQuery.super.blendPhraseQuery(query, type);
149+
}
146150
}
147151

148152
final class CrossFieldsQueryBuilder extends QueryBuilder {
@@ -226,6 +230,17 @@ public Query termQuery(MappedFieldType fieldType, BytesRef value) {
226230
*/
227231
return blendTerm(new Term(fieldType.name(), value.utf8ToString()), fieldType);
228232
}
233+
234+
@Override
235+
public Query blendPhrase(PhraseQuery query, MappedFieldType type) {
236+
if (blendedFields == null) {
237+
return super.blendPhrase(query, type);
238+
}
239+
/**
240+
* We build phrase queries for multi-word synonyms when {@link QueryBuilder#autoGenerateSynonymsPhraseQuery} is true.
241+
*/
242+
return MultiMatchQuery.blendPhrase(query, blendedFields);
243+
}
229244
}
230245

231246
static Query blendTerm(QueryShardContext context, BytesRef value, Float commonTermsCutoff, float tieBreaker,
@@ -288,6 +303,28 @@ static Query blendTerms(QueryShardContext context, BytesRef[] values, Float comm
288303
}
289304
}
290305

306+
/**
307+
* Expand a {@link PhraseQuery} to multiple fields that share the same analyzer.
308+
* Returns a {@link DisjunctionMaxQuery} with a disjunction for each expanded field.
309+
*/
310+
static Query blendPhrase(PhraseQuery query, FieldAndFieldType... fields) {
311+
List<Query> disjunctions = new ArrayList<>();
312+
for (FieldAndFieldType field : fields) {
313+
int[] positions = query.getPositions();
314+
Term[] terms = query.getTerms();
315+
PhraseQuery.Builder builder = new PhraseQuery.Builder();
316+
for (int i = 0; i < terms.length; i++) {
317+
builder.add(new Term(field.fieldType.name(), terms[i].bytes()), positions[i]);
318+
}
319+
Query q = builder.build();
320+
if (field.boost != AbstractQueryBuilder.DEFAULT_BOOST) {
321+
q = new BoostQuery(q, field.boost);
322+
}
323+
disjunctions.add(q);
324+
}
325+
return new DisjunctionMaxQuery(disjunctions, 0.0f);
326+
}
327+
291328
@Override
292329
protected Query blendTermQuery(Term term, MappedFieldType fieldType) {
293330
if (queryBuilder == null) {
@@ -304,6 +341,14 @@ protected Query blendTermsQuery(Term[] terms, MappedFieldType fieldType) {
304341
return queryBuilder.blendTerms(terms, fieldType);
305342
}
306343

344+
@Override
345+
protected Query blendPhraseQuery(PhraseQuery query, MappedFieldType fieldType) {
346+
if (queryBuilder == null) {
347+
return super.blendPhraseQuery(query, fieldType);
348+
}
349+
return queryBuilder.blendPhrase(query, fieldType);
350+
}
351+
307352
static final class FieldAndFieldType {
308353
final MappedFieldType fieldType;
309354
final float boost;

server/src/test/java/org/elasticsearch/index/search/MultiMatchQueryTests.java

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,12 +19,16 @@
1919

2020
package org.elasticsearch.index.search;
2121

22+
import org.apache.lucene.analysis.MockSynonymAnalyzer;
2223
import org.apache.lucene.index.Term;
2324
import org.apache.lucene.queries.BlendedTermQuery;
25+
import org.apache.lucene.search.BooleanClause;
26+
import org.apache.lucene.search.BooleanQuery;
2427
import org.apache.lucene.search.BoostQuery;
2528
import org.apache.lucene.search.DisjunctionMaxQuery;
2629
import org.apache.lucene.search.MatchAllDocsQuery;
2730
import org.apache.lucene.search.MatchNoDocsQuery;
31+
import org.apache.lucene.search.PhraseQuery;
2832
import org.apache.lucene.search.Query;
2933
import org.apache.lucene.search.SynonymQuery;
3034
import org.apache.lucene.search.TermQuery;
@@ -43,7 +47,11 @@
4347
import org.junit.Before;
4448

4549
import java.io.IOException;
50+
import java.util.ArrayList;
4651
import java.util.Arrays;
52+
import java.util.HashMap;
53+
import java.util.List;
54+
import java.util.Map;
4755

4856
import static org.elasticsearch.index.query.QueryBuilders.multiMatchQuery;
4957
import static org.hamcrest.Matchers.equalTo;
@@ -220,4 +228,45 @@ public void testMultiMatchCrossFieldsWithSynonyms() throws IOException {
220228
assertThat(parsedQuery, equalTo(expectedQuery));
221229

222230
}
231+
232+
public void testMultiMatchCrossFieldsWithSynonymsPhrase() throws IOException {
233+
QueryShardContext queryShardContext = indexService.newQueryShardContext(
234+
randomInt(20), null, () -> { throw new UnsupportedOperationException(); }, null);
235+
MultiMatchQuery parser = new MultiMatchQuery(queryShardContext);
236+
parser.setAnalyzer(new MockSynonymAnalyzer());
237+
Map<String, Float> fieldNames = new HashMap<>();
238+
fieldNames.put("name.first", 1.0f);
239+
fieldNames.put("name.last", 1.0f);
240+
Query query = parser.parse(MultiMatchQueryBuilder.Type.CROSS_FIELDS, fieldNames, "guinea pig", null);
241+
242+
Term[] terms = new Term[2];
243+
terms[0] = new Term("name.first", "cavy");
244+
terms[1] = new Term("name.last", "cavy");
245+
float[] boosts = new float[2];
246+
Arrays.fill(boosts, 1.0f);
247+
248+
List<Query> phraseDisjuncts = new ArrayList<>();
249+
phraseDisjuncts.add(
250+
new PhraseQuery.Builder()
251+
.add(new Term("name.first", "guinea"))
252+
.add(new Term("name.first", "pig"))
253+
.build()
254+
);
255+
phraseDisjuncts.add(
256+
new PhraseQuery.Builder()
257+
.add(new Term("name.last", "guinea"))
258+
.add(new Term("name.last", "pig"))
259+
.build()
260+
);
261+
BooleanQuery expected = new BooleanQuery.Builder()
262+
.add(
263+
new BooleanQuery.Builder()
264+
.add(new DisjunctionMaxQuery(phraseDisjuncts, 0.0f), BooleanClause.Occur.SHOULD)
265+
.add(BlendedTermQuery.dismaxBlendedQuery(terms, boosts, 1.0f), BooleanClause.Occur.SHOULD)
266+
.build(),
267+
BooleanClause.Occur.SHOULD
268+
)
269+
.build();
270+
assertEquals(expected, query);
271+
}
223272
}

0 commit comments

Comments
 (0)