From c6642557783522cfef503c5b36df341bf3a3a36e Mon Sep 17 00:00:00 2001 From: Harrison Engel Date: Mon, 14 Jul 2025 19:53:19 +0000 Subject: [PATCH 1/4] Add a combined_fields query to utilize Lucene CombinedField and get BM25F ranking. Signed-off-by: Harrison Engel --- .../test/search/400_combined_fields.yml | 91 +++ .../query/CombinedFieldsQueryBuilder.java | 383 +++++++++++++ .../opensearch/index/query/QueryBuilders.java | 11 + .../org/opensearch/search/SearchModule.java | 4 + .../CombinedFieldsQueryBuilderTests.java | 516 ++++++++++++++++++ .../opensearch/search/SearchModuleTests.java | 1 + 6 files changed, 1006 insertions(+) create mode 100644 rest-api-spec/src/main/resources/rest-api-spec/test/search/400_combined_fields.yml create mode 100644 server/src/main/java/org/opensearch/index/query/CombinedFieldsQueryBuilder.java create mode 100644 server/src/test/java/org/opensearch/index/query/CombinedFieldsQueryBuilderTests.java diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/search/400_combined_fields.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/search/400_combined_fields.yml new file mode 100644 index 0000000000000..e54f7fc9c12bb --- /dev/null +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/search/400_combined_fields.yml @@ -0,0 +1,91 @@ +setup: + - do: + indices.create: + index: cf_test + body: + mappings: + properties: + headline: + type: text + summary: + type: text + content: + type: text + + - do: + index: + index: cf_test + id: 1 + body: + headline: "Graph Theory and Edge Computing Converge" + summary: "Bridging the worlds of graph algorithms and distributed edge devices at scale." + content: "Industry experts explain how graph data structures enhance edge computing frameworks for real-time analytics at the network edge, illustrating graph traversal techniques executed close to edge devices." + - do: + index: + index: cf_test + id: 2 + body: + headline: "Edge Computing in the Wild: Deployments Across Continents" + summary: "Scaling real-time applications by processing data at the edge." + content: | + Edge gateways, edge devices, and edge sensors form a layered edge architecture that keeps data close to where it is generated. By processing requests at the edge, organisations avoid costly round-trips to central clouds, and edge latency drops dramatically. Multiple independent edge sites synchronise using lightweight protocols. A small knowledge graph records device relationships, but the spotlight stays firmly on the edge environment and its growing ecosystem of edge workloads. + - do: + index: + index: cf_test + id: 3 + body: + headline: "Graph Analytics Today: Algorithms Behind Recommendations" + summary: "From social networks to logistics, graph structures unlock hidden insights." + content: | + Graph neural networks, graph embeddings, and large-scale graph processing illustrate the growing importance of graph analytics. Modern graph engines accelerate graph traversal, community detection, and path search to deliver recommendations in milliseconds. Engineers seldom push these compute-heavy jobs to the edge, choosing instead to optimise in-memory graph stores and indexing strategies. The article closes with a brief reference to how edge devices may someday stream data directly into graph pipelines. + - do: + index: + index: cf_test + id: 4 + body: + headline: "Graph Theory Fundamentals: Understanding Network Structures" + summary: "Core concepts in graph theory and their applications in modern computing." + content: | + Graph theory provides the mathematical foundation for understanding complex network structures. From social networks to transportation systems, graphs model relationships between entities. Graph algorithms like depth-first search, breadth-first search, and Dijkstra's shortest path algorithm are fundamental tools in computer science. Graph databases store and query these relationships efficiently, enabling powerful analytics on interconnected data. + - do: + indices.refresh: + index: cf_test + +--- +"Combined_fields query uses BM25F ranking across weighted fields": + - skip: + version: " - 3.1.99" + reason: The combined_fields query is available in 3.2 and later. + - do: + search: + index: cf_test + body: + query: + combined_fields: + query: "graph edge" + fields: ["headline^3", "summary^2", "content"] + operator: "and" + - match: { hits.total.value: 3 } + - match: { hits.hits.0._id: "1" } + - gt: { hits.hits.2._score: 0.0 } + +--- +"Combined_fields query with OR operator favors high-frequency single-term docs": + - skip: + version: " - 3.1.99" + reason: The combined_fields query is available in 3.2 and later. + - do: + search: + index: cf_test + body: + query: + combined_fields: + query: "graph edge" + fields: ["headline", "summary^50", "content"] + operator: "or" + - match: { hits.total.value: 4 } + - match: { hits.hits.0._id: "1" } + - gt: { hits.hits.0._score: 0.0 } + - gt: { hits.hits.1._score: 0.0 } + - gt: { hits.hits.2._score: 0.0 } + - gt: { hits.hits.3._score: 0.0 } diff --git a/server/src/main/java/org/opensearch/index/query/CombinedFieldsQueryBuilder.java b/server/src/main/java/org/opensearch/index/query/CombinedFieldsQueryBuilder.java new file mode 100644 index 0000000000000..a6046da10aba0 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/query/CombinedFieldsQueryBuilder.java @@ -0,0 +1,383 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.query; + +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.index.Term; +import org.apache.lucene.search.BooleanClause; +import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.CombinedFieldQuery; +import org.apache.lucene.search.Query; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.QueryBuilder; +import org.opensearch.common.lucene.search.Queries; +import org.opensearch.core.ParseField; +import org.opensearch.core.common.io.stream.StreamInput; +import org.opensearch.core.common.io.stream.StreamOutput; +import org.opensearch.core.xcontent.ConstructingObjectParser; +import org.opensearch.core.xcontent.XContentBuilder; +import org.opensearch.core.xcontent.XContentParser; +import org.opensearch.index.mapper.MappedFieldType; +import org.opensearch.index.mapper.TextFieldMapper; +import org.opensearch.index.search.QueryParserHelper; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; +import java.util.Locale; +import java.util.Map; +import java.util.Objects; + +/** + * Combined fields query that treats multiple fields as a + * single stream and scores terms as if you had indexed them + * as a single term in a single field. + * + * @opensearch.experimental + */ +public class CombinedFieldsQueryBuilder extends AbstractQueryBuilder { + + public static final String NAME = "combined_fields"; + + private static final ParseField QUERY_FIELD = new ParseField("query"); + private static final ParseField FIELDS_FIELD = new ParseField("fields"); + private static final ParseField OPERATOR_FIELD = new ParseField("operator"); + + private Object queryValue; + private Map fieldToWeight; + private Operator operator = Operator.OR; + + /** + * Constructor for CombinedFieldsQueryBuilder. + * @param value + * @param fields + */ + public CombinedFieldsQueryBuilder(Object value, String... fields) { + if (value == null) { + throw new IllegalArgumentException(String.format(Locale.ROOT, "[%s] requires query value", NAME)); + } + if (fields == null) { + throw new IllegalArgumentException(String.format(Locale.ROOT, "[%s] requires field list", NAME)); + } + this.queryValue = value; + this.fieldToWeight = QueryParserHelper.parseFieldsAndWeights(Arrays.asList(fields)); + } + + /* + * Create a CombinedFieldsQueryBuilder from a StreamInput. + * Used in conjuction with doWriteTo. + * Wire format: + * - base query properties (set byte size by AbstractQueryBuilder base class) + * - query value object (variable size) + * - numFields of fieldToWeight map (var int) + * - repeated field name and weight pairs (string, float) + * - operator (enum) + * @param in + * @throws IOException + */ + public CombinedFieldsQueryBuilder(StreamInput in) throws IOException { + // First, read in the base query properties (boost, queryName, etc.) + super(in); + + this.queryValue = in.readGenericValue(); + + int numFields = in.readVInt(); + this.fieldToWeight = new HashMap<>(); + for (int i = 0; i < numFields; i++) { + String field = in.readString(); + float weight = in.readFloat(); + this.fieldToWeight.put(field, weight); + } + + this.operator = Operator.readFromStream(in); + } + + /* + * doWriteTo writes the query to a StreamOutput. + * Used in conjuction with the constructor that takes a StreamInput. + * Wire format: + * - base query properties (set byte size by AbstractQueryBuilder base class) + * - query value object (variable size) + * - numFields of fieldToWeight map (var int) + * - repeated field name and weight pairs (string, float) + * - operator (enum) + */ + @Override + protected void doWriteTo(StreamOutput out) throws IOException { + out.writeGenericValue(queryValue); + + out.writeVInt(fieldToWeight.size()); + for (Map.Entry fieldEntry : fieldToWeight.entrySet()) { + out.writeString(fieldEntry.getKey()); + out.writeFloat(fieldEntry.getValue()); + } + + this.operator.writeTo(out); + } + + // ======================== + // FIELD ACCESSORS + // ======================== + + /** + * Returns the query value to be analyzed and matched. + */ + public Object queryValue() { + return this.queryValue; + } + + /** + * Returns the map of field names to their boost values. + */ + public Map fieldToWeight() { + return this.fieldToWeight; + } + + /** + * Returns the operator used to combine terms. + */ + public Operator operator() { + return this.operator; + } + + /** + * Sets the operator used to combine terms (AND or OR). + * + * @param operator the operator to use + * @return this query builder for method chaining + */ + public CombinedFieldsQueryBuilder operator(Operator operator) { + this.operator = operator; + return this; + } + + // ======================== + // XCONTENT SERIALIZATION + // ======================== + + @Override + public void doXContent(XContentBuilder builder, Params params) throws IOException { + builder.startObject(NAME); + builder.field(QUERY_FIELD.getPreferredName(), queryValue); + builder.startArray(FIELDS_FIELD.getPreferredName()); + for (Map.Entry fieldEntry : this.fieldToWeight.entrySet()) { + builder.value(String.format(Locale.ROOT, "%s^%s", fieldEntry.getKey(), fieldEntry.getValue())); + } + builder.endArray(); + builder.field(OPERATOR_FIELD.getPreferredName(), operator.toString()); + printBoostAndQueryName(builder); + builder.endObject(); + } + + // ======================== + // XContent PARSING + // ======================== + + private static final ConstructingObjectParser XCONTENT_PARSER = new ConstructingObjectParser<>( + NAME, + a -> new CombinedFieldsQueryBuilder(a[0]) + ); + + static { + XCONTENT_PARSER.declareString(ConstructingObjectParser.constructorArg(), QUERY_FIELD); + + XCONTENT_PARSER.declareStringArray((builder, values) -> { + Map fieldToWeight = QueryParserHelper.parseFieldsAndWeights(values); + builder.fieldToWeight = fieldToWeight; + }, FIELDS_FIELD); + + XCONTENT_PARSER.declareString(CombinedFieldsQueryBuilder::operator, Operator::fromString, OPERATOR_FIELD); + + XCONTENT_PARSER.declareFloat(CombinedFieldsQueryBuilder::boost, BOOST_FIELD); + XCONTENT_PARSER.declareString(CombinedFieldsQueryBuilder::queryName, NAME_FIELD); + } + + public static CombinedFieldsQueryBuilder fromXContent(XContentParser parser) throws IOException { + return XCONTENT_PARSER.parse(parser, null); + } + + // ======================== + // ABSTRACT METHOD IMPLEMENTATIONS + // ======================== + + @Override + public String getWriteableName() { + return NAME; + } + + // ======================== + // CORE QUERY BUILDING LOGIC + // ======================== + + @Override + protected Query doToQuery(QueryShardContext context) throws IOException { + + Map mappedFields = QueryParserHelper.resolveMappingFields(context, fieldToWeight); + + if (mappedFields.isEmpty()) { + return Queries.newUnmappedFieldsQuery(fieldToWeight.keySet()); + } + + List fieldTypes = extractAndValidateMappedFieldTypes(context, mappedFields); + Analyzer sharedAnalyzer = validateAndGetSharedAnalyzer(fieldTypes); + + return buildQuery(mappedFields, sharedAnalyzer); + } + + // ======================== + // HELPER METHODS + // ======================== + + /** + * Builds the list of mapped fields. + */ + private List extractAndValidateMappedFieldTypes(QueryShardContext context, Map mappedFields) { + List fields = new ArrayList<>(); + + for (Map.Entry entry : mappedFields.entrySet()) { + String name = entry.getKey(); + MappedFieldType fieldType = context.getFieldType(name); + if (fieldType == null) { + continue; + } + + validateFieldType(fieldType); + fields.add(fieldType); + } + + return fields; + } + + /** + * Validates that the field type is supported by combined fields queries. + */ + private void validateFieldType(MappedFieldType fieldType) { + if (fieldType.familyTypeName().equals(TextFieldMapper.CONTENT_TYPE) == false) { + throw new IllegalArgumentException( + String.format( + Locale.ROOT, + "Field [%s] of type [%s] does not support [%s] queries", + fieldType.name(), + fieldType.typeName(), + NAME + ) + ); + } + } + + /** + * Validates that all fields use the same analyzer and returns the shared analyzer. + * This is a Lucene requirement for CombinedFieldQuery. + */ + private Analyzer validateAndGetSharedAnalyzer(List fieldTypes) { + Analyzer sharedAnalyzer = null; + + for (MappedFieldType fieldType : fieldTypes) { + Analyzer analyzer = fieldType.getTextSearchInfo().getSearchAnalyzer(); + if (sharedAnalyzer != null && analyzer.equals(sharedAnalyzer) == false) { + throw new IllegalArgumentException( + String.format(Locale.ROOT, "All fields in [%s] query must have the same search analyzer", NAME) + ); + } + sharedAnalyzer = analyzer; + } + + return sharedAnalyzer; + } + + /** + * Builds the final query using the prepared fields and analyzer. + */ + private Query buildQuery(Map mappedFieldToWeight, Analyzer sharedAnalyzer) { + assert mappedFieldToWeight.isEmpty() == false; + + String placeholderFieldName = mappedFieldToWeight.keySet().iterator().next(); + Builder builder = new Builder(mappedFieldToWeight, sharedAnalyzer, this.operator); + Query query = builder.createBooleanQuery(placeholderFieldName, this.queryValue.toString(), this.operator.toBooleanClauseOccur()); + + return query == null ? createZeroTermsQuery() : query; + } + + private Query createZeroTermsQuery() { + return Queries.newMatchNoDocsQuery("Matching no documents because no terms present"); + } + + // ======================== + // INNER CLASS + // ======================== + + /** + * Internal builder class that extends Lucene's QueryBuilder to create + * combined field queries using the BM25F ranking algorithm. + */ + private static class Builder extends QueryBuilder { + private final Map mappedFieldToWeight; + private final Operator operator; + + Builder(Map mappedFieldToWeight, Analyzer analyzer, Operator operator) { + super(analyzer); + this.mappedFieldToWeight = mappedFieldToWeight; + this.operator = operator; + } + + @Override + public Query createPhraseQuery(String field, String queryText, int phraseSlop) { + throw new IllegalArgumentException("[combined_fields] queries don't support phrases"); + } + + @Override + protected Query analyzePhrase(String field, TokenStream stream, int slop) throws IOException { + throw new IllegalArgumentException("[combined_fields] queries don't support phrases"); + } + + @Override + protected Query newSynonymQuery(String fieldPlaceholder, TermAndBoost[] terms) { + BooleanQuery.Builder builder = new BooleanQuery.Builder(); + BooleanClause.Occur occur = this.operator.toBooleanClauseOccur(); + + for (TermAndBoost termAndBoost : terms) { + BytesRef term = termAndBoost.term(); + + CombinedFieldQuery.Builder combinedFieldBuilder = new CombinedFieldQuery.Builder(term); + for (Map.Entry entry : mappedFieldToWeight.entrySet()) { + combinedFieldBuilder.addField(entry.getKey(), entry.getValue()); + } + + builder.add(combinedFieldBuilder.build(), occur); + } + return builder.build(); + } + + @Override + protected Query newTermQuery(Term term, float boost) { + QueryBuilder.TermAndBoost termAndBoost = new QueryBuilder.TermAndBoost(term.bytes(), boost); + return newSynonymQuery("", new QueryBuilder.TermAndBoost[] { termAndBoost }); + } + } + + // ======================== + // STANDARD QUERY BUILDER METHODS + // ======================== + + @Override + protected int doHashCode() { + return Objects.hash(queryValue, fieldToWeight, operator); + } + + @Override + protected boolean doEquals(CombinedFieldsQueryBuilder other) { + return Objects.equals(queryValue, other.queryValue) + && Objects.equals(fieldToWeight, other.fieldToWeight) + && Objects.equals(operator, other.operator); + } + +} diff --git a/server/src/main/java/org/opensearch/index/query/QueryBuilders.java b/server/src/main/java/org/opensearch/index/query/QueryBuilders.java index 1debba73136b2..db1f48cafcf32 100644 --- a/server/src/main/java/org/opensearch/index/query/QueryBuilders.java +++ b/server/src/main/java/org/opensearch/index/query/QueryBuilders.java @@ -111,6 +111,17 @@ public static MatchBoolPrefixQueryBuilder matchBoolPrefixQuery(String name, Obje return new MatchBoolPrefixQueryBuilder(name, text); } + /** + * Creates a combined_fields query for the provided value across the given field names. + * + * @param value The query value (to be analyzed). + * @param fields The target field names. + * @return a {@link org.opensearch.index.query.CombinedFieldsQueryBuilder} instance. + */ + public static org.opensearch.index.query.CombinedFieldsQueryBuilder combinedFieldsQuery(Object value, String... fields) { + return new org.opensearch.index.query.CombinedFieldsQueryBuilder(value, fields); + } + /** * Creates a text query with type "PHRASE" for the provided field name and text. * diff --git a/server/src/main/java/org/opensearch/search/SearchModule.java b/server/src/main/java/org/opensearch/search/SearchModule.java index cee66b734e76b..21e0e08f363d8 100644 --- a/server/src/main/java/org/opensearch/search/SearchModule.java +++ b/server/src/main/java/org/opensearch/search/SearchModule.java @@ -47,6 +47,7 @@ import org.opensearch.core.xcontent.XContentParser; import org.opensearch.index.query.BoolQueryBuilder; import org.opensearch.index.query.BoostingQueryBuilder; +import org.opensearch.index.query.CombinedFieldsQueryBuilder; import org.opensearch.index.query.CommonTermsQueryBuilder; import org.opensearch.index.query.ConstantScoreQueryBuilder; import org.opensearch.index.query.DisMaxQueryBuilder; @@ -1118,6 +1119,9 @@ private void registerQueryParsers(List plugins) { registerQuery(new QuerySpec<>(RangeQueryBuilder.NAME, RangeQueryBuilder::new, RangeQueryBuilder::fromXContent)); registerQuery(new QuerySpec<>(PrefixQueryBuilder.NAME, PrefixQueryBuilder::new, PrefixQueryBuilder::fromXContent)); registerQuery(new QuerySpec<>(WildcardQueryBuilder.NAME, WildcardQueryBuilder::new, WildcardQueryBuilder::fromXContent)); + registerQuery( + new QuerySpec<>(CombinedFieldsQueryBuilder.NAME, CombinedFieldsQueryBuilder::new, CombinedFieldsQueryBuilder::fromXContent) + ); registerQuery( new QuerySpec<>(ConstantScoreQueryBuilder.NAME, ConstantScoreQueryBuilder::new, ConstantScoreQueryBuilder::fromXContent) ); diff --git a/server/src/test/java/org/opensearch/index/query/CombinedFieldsQueryBuilderTests.java b/server/src/test/java/org/opensearch/index/query/CombinedFieldsQueryBuilderTests.java new file mode 100644 index 0000000000000..19bda4dafb877 --- /dev/null +++ b/server/src/test/java/org/opensearch/index/query/CombinedFieldsQueryBuilderTests.java @@ -0,0 +1,516 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.query; + +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.standard.StandardAnalyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.TextField; +import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.search.BooleanClause; +import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.CombinedFieldQuery; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.MatchAllDocsQuery; +import org.apache.lucene.search.MatchNoDocsQuery; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.TermQuery; +import org.apache.lucene.search.TopDocs; +import org.apache.lucene.store.ByteBuffersDirectory; +import org.apache.lucene.store.Directory; +import org.opensearch.action.admin.indices.mapping.put.PutMappingRequest; +import org.opensearch.common.compress.CompressedXContent; +import org.opensearch.index.mapper.MapperService; +import org.opensearch.test.AbstractQueryTestCase; + +import java.io.IOException; +import java.util.Map; + +import static org.hamcrest.CoreMatchers.anyOf; +import static org.hamcrest.CoreMatchers.instanceOf; +import static org.hamcrest.Matchers.containsString; +import static org.hamcrest.Matchers.notNullValue; + +/** + * Comprehensive test suite for CombinedFieldsQueryBuilder functionality. + * + * Test Structure: + * 1. Test Infrastructure Setup + * 2. Query Construction Tests + * 3. Configuration Tests (operators, zero terms, etc.) + * 4. Serialization Tests + * 5. Functional Behavior Tests + * 6. Error Condition Tests + * 7. Edge Case Tests + */ +public class CombinedFieldsQueryBuilderTests extends AbstractQueryTestCase { + + // ======================== + // TEST DATA CONSTANTS + // ======================== + + private static final String[] TEST_QUERY_TEXTS = { + "machine learning algorithms", + "distributed systems architecture", + "natural language processing techniques", + "cloud computing infrastructure", + "data science methodologies", + "artificial intelligence applications" }; + + // ======================== + // TEST INFRASTRUCTURE SETUP + // ======================== + + @Override + protected void initializeAdditionalMappings(MapperService mapperService) throws IOException { + mapperService.merge( + "_doc", + new CompressedXContent( + PutMappingRequest.simpleMapping( + "content", + "type=text", + "description", + "type=text", + "summary", + "type=text", + "metadata", + "type=keyword", + "tags", + "type=keyword", + "boosted_field", + "type=text" + ).toString() + ), + MapperService.MergeReason.MAPPING_UPDATE + ); + } + + @Override + protected CombinedFieldsQueryBuilder doCreateTestQueryBuilder() { + String queryText = randomFrom(TEST_QUERY_TEXTS); + String primaryField = randomFrom(TEXT_FIELD_NAME, TEXT_ALIAS_FIELD_NAME, "content", "description", "summary"); + + CombinedFieldsQueryBuilder queryBuilder = new CombinedFieldsQueryBuilder(queryText, primaryField); + + // Add additional fields with random boosts + int additionalFields = randomIntBetween(0, 2); + for (int i = 0; i < additionalFields; i++) { + String additionalField = randomFrom(TEXT_FIELD_NAME, TEXT_ALIAS_FIELD_NAME, "content", "description", "summary"); + float boost = 1.0f + randomFloat() * 3.0f; + String fieldWithBoost = additionalField + "^" + boost; + queryBuilder = new CombinedFieldsQueryBuilder(queryText, primaryField, fieldWithBoost); + } + + // Random configuration + if (randomBoolean()) { + queryBuilder.operator(randomFrom(Operator.values())); + } + + return queryBuilder; + } + + @Override + protected void doAssertLuceneQuery(CombinedFieldsQueryBuilder queryBuilder, Query query, QueryShardContext context) { + assertThat("Query should not be null", query, notNullValue()); + assertThat( + "Query should be of expected type", + query, + anyOf( + instanceOf(BooleanQuery.class), + instanceOf(TermQuery.class), + instanceOf(MatchAllDocsQuery.class), + instanceOf(MatchNoDocsQuery.class), + instanceOf(CombinedFieldQuery.class) + ) + ); + } + + @Override + protected boolean supportsBoost() { + return true; + } + + @Override + protected boolean supportsQueryName() { + return true; + } + + @Override + protected boolean builderGeneratesCacheableQueries() { + return true; + } + + // ======================== + // QUERY CONSTRUCTION TESTS + // ======================== + + /** + * Tests basic query construction with single field. + */ + public void testBasicQueryConstruction() { + String queryText = "test query"; + CombinedFieldsQueryBuilder queryBuilder = new CombinedFieldsQueryBuilder(queryText, TEXT_FIELD_NAME); + + assertEquals("Query text should match", queryText, queryBuilder.queryValue()); + assertEquals("Should have one field", 1, queryBuilder.fieldToWeight().size()); + assertTrue("Should contain the specified field", queryBuilder.fieldToWeight().containsKey(TEXT_FIELD_NAME)); + } + + /** + * Tests query construction with multiple fields and boosts. + */ + public void testMultipleFieldsWithBoosts() { + String queryText = "machine learning algorithms"; + CombinedFieldsQueryBuilder queryBuilder = new CombinedFieldsQueryBuilder( + queryText, + "title", + "content^2.0", + "summary^0.5", + "tags^1.5" + ); + + Map fields = queryBuilder.fieldToWeight(); + + assertEquals("Should have 4 fields", 4, fields.size()); + assertEquals("Title should have default boost", 1.0f, fields.get("title"), 1e-6); + assertEquals("Content should have boost 2.0", 2.0f, fields.get("content"), 1e-6); + assertEquals("Summary should have boost 0.5", 0.5f, fields.get("summary"), 1e-6); + assertEquals("Tags should have boost 1.5", 1.5f, fields.get("tags"), 1e-6); + } + + /** + * Tests comprehensive query construction with all features. + */ + public void testComprehensiveQueryConstruction() throws IOException { + String queryText = "comprehensive test query with all features"; + CombinedFieldsQueryBuilder queryBuilder = new CombinedFieldsQueryBuilder(queryText, TEXT_FIELD_NAME, TEXT_ALIAS_FIELD_NAME + "^1.5") + .operator(Operator.AND) + .boost(1.8f) + .queryName("comprehensive_test"); + + assertEquals("Query text should match", queryText, queryBuilder.queryValue()); + assertEquals("Field count should be 2", 2, queryBuilder.fieldToWeight().size()); + assertEquals("Operator should be AND", Operator.AND, queryBuilder.operator()); + assertEquals("Boost should be 1.8", 1.8f, queryBuilder.boost(), 1e-6); + assertEquals("Query name should match", "comprehensive_test", queryBuilder.queryName()); + + Query query = queryBuilder.toQuery(createShardContext()); + assertThat("Generated query should not be null", query, notNullValue()); + } + + // ======================== + // CONFIGURATION TESTS + // ======================== + + /** + * Tests operator configuration and behavior. + */ + public void testOperatorConfiguration() { + String queryText = "natural language processing"; + CombinedFieldsQueryBuilder queryBuilder = new CombinedFieldsQueryBuilder(queryText, TEXT_FIELD_NAME); + + queryBuilder.operator(Operator.OR); + assertEquals("Operator should be OR", Operator.OR, queryBuilder.operator()); + + queryBuilder.operator(Operator.AND); + assertEquals("Operator should be AND", Operator.AND, queryBuilder.operator()); + } + + /** + * Tests OR operator query generation behavior. + */ + public void testOrOperatorQueryGeneration() throws IOException { + String queryText = "distributed systems architecture"; + CombinedFieldsQueryBuilder queryBuilder = new CombinedFieldsQueryBuilder(queryText, TEXT_FIELD_NAME).operator(Operator.OR); + + Query query = queryBuilder.toQuery(createShardContext()); + + assertThat("Query should be a BooleanQuery for OR operator", query, instanceOf(BooleanQuery.class)); + + BooleanQuery booleanQuery = (BooleanQuery) query; + booleanQuery.clauses().forEach(clause -> { + assertEquals("All clauses should be SHOULD for OR operator", BooleanClause.Occur.SHOULD, clause.occur()); + }); + + assertThat("Should have clauses for multi-term query", booleanQuery.clauses().size(), notNullValue()); + } + + /** + * Tests boost configuration and validation. + */ + public void testBoostConfiguration() { + String queryText = "boosted query test"; + CombinedFieldsQueryBuilder queryBuilder = new CombinedFieldsQueryBuilder(queryText, TEXT_FIELD_NAME); + + float positiveBoost = 2.5f; + queryBuilder.boost(positiveBoost); + assertEquals("Boost should be set correctly", positiveBoost, queryBuilder.boost(), 1e-6); + + IllegalArgumentException exception = expectThrows(IllegalArgumentException.class, () -> queryBuilder.boost(-1.0f)); + assertThat("Exception message should mention negative boost", exception.getMessage(), containsString("negative [boost]")); + } + + /** + * Tests query name configuration. + */ + public void testQueryNameConfiguration() { + String queryText = "named query test"; + CombinedFieldsQueryBuilder queryBuilder = new CombinedFieldsQueryBuilder(queryText, TEXT_FIELD_NAME); + + String queryName = "test_named_query"; + queryBuilder.queryName(queryName); + assertEquals("Query name should be set correctly", queryName, queryBuilder.queryName()); + } + + // ======================== + // SERIALIZATION TESTS + // ======================== + + /** + * Tests JSON serialization with comprehensive field configuration. + */ + public void testJsonSerializationWithMultipleFields() throws IOException { + String json = "{\n" + + " \"combined_fields\" : {\n" + + " \"query\" : \"distributed systems architecture\",\n" + + " \"fields\" : [ \"title^2.0\", \"content^1.5\", \"tags^0.8\", \"metadata^1.2\" ],\n" + + " \"operator\" : \"AND\",\n" + + " \"boost\" : 1.5,\n" + + " \"_name\" : \"test_combined_fields_query\"\n" + + " }\n" + + "}"; + + Map expectedFieldToWeight = Map.of("title", 2.0f, "content", 1.5f, "tags", 0.8f, "metadata", 1.2f); + CombinedFieldsQueryBuilder parsed = (CombinedFieldsQueryBuilder) parseQuery(json); + + assertEquals("Query value should match", "distributed systems architecture", parsed.queryValue()); + assertEquals("Field count should match", 4, parsed.fieldToWeight().size()); + for (Map.Entry entry : parsed.fieldToWeight().entrySet()) { + Float weight = expectedFieldToWeight.get(entry.getKey()); + assertEquals("Field weight should match for " + entry.getKey(), weight, entry.getValue(), 1e-6); + } + assertEquals("Operator should match", Operator.AND, parsed.operator()); + assertEquals("Boost should match", 1.5f, parsed.boost(), 1e-6); + assertEquals("Query name should match", "test_combined_fields_query", parsed.queryName()); + } + + /** + * Tests serialization and deserialization consistency. + */ + public void testSerializationConsistency() throws IOException { + String queryText = "serialization test query"; + CombinedFieldsQueryBuilder original = new CombinedFieldsQueryBuilder(queryText, TEXT_FIELD_NAME, TEXT_ALIAS_FIELD_NAME + "^1.5") + .operator(Operator.OR) + .boost(2.0f) + .queryName("serialization_test"); + + String json = original.toString(); + CombinedFieldsQueryBuilder deserialized = (CombinedFieldsQueryBuilder) parseQuery(json); + + assertEquals("Query value should be preserved", original.queryValue(), deserialized.queryValue()); + assertEquals("Fields should be preserved", original.fieldToWeight(), deserialized.fieldToWeight()); + assertEquals("Operator should be preserved", original.operator(), deserialized.operator()); + assertEquals("Boost should be preserved", original.boost(), deserialized.boost(), 1e-6); + assertEquals("Query name should be preserved", original.queryName(), deserialized.queryName()); + } + + // ======================== + // FUNCTIONAL BEHAVIOR TESTS + // ======================== + + /** + * Tests functional operator behavior with real Lucene index. + */ + public void testOperatorBehaviorFunctional() throws IOException { + Directory dir = new ByteBuffersDirectory(); + Analyzer analyzer = new StandardAnalyzer(); + IndexWriterConfig iwc = new IndexWriterConfig(analyzer); + IndexWriter writer = new IndexWriter(dir, iwc); + + addTestDocument(writer, "content", "machine learning"); + addTestDocument(writer, "content", "machine"); + addTestDocument(writer, "content", "learning"); + writer.close(); + + IndexReader reader = DirectoryReader.open(dir); + IndexSearcher searcher = new IndexSearcher(reader); + + // Test AND operator - should only match "machine learning" + CombinedFieldsQueryBuilder andQuery = new CombinedFieldsQueryBuilder("machine learning", "content").operator(Operator.AND); + Query luceneAndQuery = andQuery.toQuery(createShardContext()); + TopDocs andResults = searcher.search(luceneAndQuery, 10); + assertEquals("AND operator should match only complete phrase", 1, andResults.totalHits.value()); + + // Test OR operator - should match all documents + CombinedFieldsQueryBuilder orQuery = new CombinedFieldsQueryBuilder("machine learning", "content").operator(Operator.OR); + Query luceneOrQuery = orQuery.toQuery(createShardContext()); + TopDocs orResults = searcher.search(luceneOrQuery, 10); + assertEquals("OR operator should match all documents", 3, orResults.totalHits.value()); + + reader.close(); + dir.close(); + } + + // ======================== + // ERROR CONDITION TESTS + // ======================== + + /** + * Tests constructor validation with null/invalid inputs. + */ + public void testConstructorValidation() { + IllegalArgumentException nullValueException = expectThrows( + IllegalArgumentException.class, + () -> new CombinedFieldsQueryBuilder(null, "content") + ); + assertThat("Exception should mention query value", nullValueException.getMessage(), containsString("requires query value")); + + IllegalArgumentException nullFieldsException = expectThrows( + IllegalArgumentException.class, + () -> new CombinedFieldsQueryBuilder("test", (String[]) null) + ); + assertThat("Exception should mention field list", nullFieldsException.getMessage(), containsString("requires field list")); + } + + /** + * Tests rejection of non-text field types. + */ + public void testNonTextFieldRejection() throws IOException { + String queryText = "test query"; + CombinedFieldsQueryBuilder queryBuilder = new CombinedFieldsQueryBuilder(queryText, INT_FIELD_NAME); + + IllegalArgumentException exception = expectThrows(IllegalArgumentException.class, () -> queryBuilder.toQuery(createShardContext())); + assertThat( + "Exception should mention field type support", + exception.getMessage(), + containsString("does not support [combined_fields] queries") + ); + } + + /** + * Tests rejection of mixed field types. + */ + public void testMixedFieldTypesRejection() throws IOException { + String queryText = "test query"; + CombinedFieldsQueryBuilder queryBuilder = new CombinedFieldsQueryBuilder(queryText, TEXT_FIELD_NAME, INT_FIELD_NAME); + + IllegalArgumentException exception = expectThrows(IllegalArgumentException.class, () -> queryBuilder.toQuery(createShardContext())); + assertThat( + "Exception should mention field type support", + exception.getMessage(), + containsString("does not support [combined_fields] queries") + ); + } + + /** + * Tests field boost parsing validation. + */ + public void testFieldBoostValidation() { + String queryText = "test query"; + + // Valid boost values + CombinedFieldsQueryBuilder validQuery = new CombinedFieldsQueryBuilder(queryText, "field^1.5", "field2^2.0"); + Map fields = validQuery.fieldToWeight(); + assertEquals("Should parse boost correctly", 1.5f, fields.get("field"), 1e-6); + assertEquals("Should parse boost correctly", 2.0f, fields.get("field2"), 1e-6); + + // Invalid boost format + NumberFormatException exception = expectThrows( + NumberFormatException.class, + () -> new CombinedFieldsQueryBuilder(queryText, "field^invalid") + ); + assertThat("Exception should mention invalid input", exception.getMessage(), containsString("invalid")); + } + + // ======================== + // EDGE CASE TESTS + // ======================== + + /** + * Tests handling of unmapped fields. + */ + public void testUnmappedFieldsHandling() throws IOException { + String queryText = "test query with unmapped fields"; + CombinedFieldsQueryBuilder queryBuilder = new CombinedFieldsQueryBuilder(queryText, "unmapped_field_1", "unmapped_field_2"); + + Query query = queryBuilder.toQuery(createShardContext()); + assertThat("Query should be MatchNoDocsQuery for unmapped fields", query, instanceOf(MatchNoDocsQuery.class)); + } + + /** + * Tests handling of wildcard field patterns. + */ + public void testWildcardFieldPatterns() throws IOException { + String queryText = "test wildcard field patterns"; + CombinedFieldsQueryBuilder queryBuilder = new CombinedFieldsQueryBuilder(queryText, "wildcard_field_*"); + + Query query = queryBuilder.toQuery(createShardContext()); + assertThat("Query should be generated for wildcard fields", query, notNullValue()); + } + + /** + * Tests handling of empty query text. + */ + public void testEmptyQueryTextHandling() throws IOException { + String queryText = ""; + CombinedFieldsQueryBuilder queryBuilder = new CombinedFieldsQueryBuilder(queryText, TEXT_FIELD_NAME); + + // Should handle empty query gracefully + Query query = queryBuilder.toQuery(createShardContext()); + + // Verify no exceptions are thrown + assertThat("Query should be generated for empty text", query, notNullValue()); + } + + /** + * Tests handling of whitespace-only query text. + */ + public void testWhitespaceOnlyQueryText() throws IOException { + String queryText = " "; + CombinedFieldsQueryBuilder queryBuilder = new CombinedFieldsQueryBuilder(queryText, TEXT_FIELD_NAME); + + // Should handle whitespace-only query gracefully + Query query = queryBuilder.toQuery(createShardContext()); + // Verify no exceptions are thrown + } + + /** + * Tests phrase query handling (should not support phrases). + */ + public void testPhraseQueryHandling() throws IOException { + String queryText = "\"phrase query\""; + CombinedFieldsQueryBuilder queryBuilder = new CombinedFieldsQueryBuilder(queryText, TEXT_FIELD_NAME); + + // Should work but phrase aspect may be ignored or handled appropriately + try { + Query query = queryBuilder.toQuery(createShardContext()); + assertThat("Query should be generated even with phrase-like text", query, notNullValue()); + } catch (IllegalArgumentException e) { + // If phrases are explicitly not supported, this is also acceptable + assertThat("Exception should mention phrase support", e.getMessage(), containsString("phrases")); + } + } + + // ======================== + // HELPER METHODS + // ======================== + + /** + * Helper method to add a document to the test index. + */ + private void addTestDocument(IndexWriter writer, String field, String value) throws IOException { + Document doc = new Document(); + doc.add(new TextField(field, value, Field.Store.NO)); + writer.addDocument(doc); + } +} diff --git a/server/src/test/java/org/opensearch/search/SearchModuleTests.java b/server/src/test/java/org/opensearch/search/SearchModuleTests.java index 658e7bd2297c4..f1a765ed3463d 100644 --- a/server/src/test/java/org/opensearch/search/SearchModuleTests.java +++ b/server/src/test/java/org/opensearch/search/SearchModuleTests.java @@ -564,6 +564,7 @@ public Optional create(IndexSettings indexSettin "bool", "boosting", "constant_score", + "combined_fields", "dis_max", "exists", "function_score", From 5841e99a42e0a966f1aed769e50bfa45daba7b4a Mon Sep 17 00:00:00 2001 From: Harrison Engel Date: Tue, 15 Jul 2025 16:36:36 +0000 Subject: [PATCH 2/4] Modify the Changelog to include this feature. Signed-off-by: Harrison Engel --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 16f8bb2bf9367..3577a1535c0b8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -21,6 +21,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Add support for non-timing info in profiler ([#18460](https://github.com/opensearch-project/OpenSearch/issues/18460)) - Extend Approximation Framework to other numeric types ([#18530](https://github.com/opensearch-project/OpenSearch/issues/18530)) - Add Semantic Version field type mapper and extensive unit tests([#18454](https://github.com/opensearch-project/OpenSearch/pull/18454)) +- Add support for Combined Fields query ([#18724](https://github.com/opensearch-project/OpenSearch/pull/18724)) ### Changed - Update Subject interface to use CheckedRunnable ([#18570](https://github.com/opensearch-project/OpenSearch/issues/18570)) From 3123164d4c941c55b6343b7da3aca854fa81b346 Mon Sep 17 00:00:00 2001 From: Harrison Engel Date: Wed, 16 Jul 2025 19:06:40 +0000 Subject: [PATCH 3/4] Add minimum_should_match to the combined_fields feature. Signed-off-by: Harrison Engel --- .../query/CombinedFieldsQueryBuilder.java | 41 +++- .../opensearch/index/query/QueryBuilders.java | 7 +- .../CombinedFieldsQueryBuilderTests.java | 201 ++++++++++++++++++ 3 files changed, 241 insertions(+), 8 deletions(-) diff --git a/server/src/main/java/org/opensearch/index/query/CombinedFieldsQueryBuilder.java b/server/src/main/java/org/opensearch/index/query/CombinedFieldsQueryBuilder.java index a6046da10aba0..dbe007d493c4a 100644 --- a/server/src/main/java/org/opensearch/index/query/CombinedFieldsQueryBuilder.java +++ b/server/src/main/java/org/opensearch/index/query/CombinedFieldsQueryBuilder.java @@ -22,6 +22,7 @@ import org.opensearch.core.common.io.stream.StreamInput; import org.opensearch.core.common.io.stream.StreamOutput; import org.opensearch.core.xcontent.ConstructingObjectParser; +import org.opensearch.core.xcontent.ObjectParser; import org.opensearch.core.xcontent.XContentBuilder; import org.opensearch.core.xcontent.XContentParser; import org.opensearch.index.mapper.MappedFieldType; @@ -51,10 +52,12 @@ public class CombinedFieldsQueryBuilder extends AbstractQueryBuilder fieldToWeight; private Operator operator = Operator.OR; + private String minimumShouldMatch; /** * Constructor for CombinedFieldsQueryBuilder. @@ -81,6 +84,7 @@ public CombinedFieldsQueryBuilder(Object value, String... fields) { * - numFields of fieldToWeight map (var int) * - repeated field name and weight pairs (string, float) * - operator (enum) + * - minimumShouldMatch (string) * @param in * @throws IOException */ @@ -99,6 +103,8 @@ public CombinedFieldsQueryBuilder(StreamInput in) throws IOException { } this.operator = Operator.readFromStream(in); + + this.minimumShouldMatch = in.readOptionalString(); } /* @@ -110,6 +116,7 @@ public CombinedFieldsQueryBuilder(StreamInput in) throws IOException { * - numFields of fieldToWeight map (var int) * - repeated field name and weight pairs (string, float) * - operator (enum) + * - minimumShouldMatch (string) */ @Override protected void doWriteTo(StreamOutput out) throws IOException { @@ -122,6 +129,8 @@ protected void doWriteTo(StreamOutput out) throws IOException { } this.operator.writeTo(out); + + out.writeOptionalString(minimumShouldMatch); } // ======================== @@ -160,6 +169,15 @@ public CombinedFieldsQueryBuilder operator(Operator operator) { return this; } + public CombinedFieldsQueryBuilder minimumShouldMatch(String minimumShouldMatch) { + this.minimumShouldMatch = minimumShouldMatch; + return this; + } + + public String minimumShouldMatch() { + return this.minimumShouldMatch; + } + // ======================== // XCONTENT SERIALIZATION // ======================== @@ -174,6 +192,9 @@ public void doXContent(XContentBuilder builder, Params params) throws IOExceptio } builder.endArray(); builder.field(OPERATOR_FIELD.getPreferredName(), operator.toString()); + if (minimumShouldMatch != null) { + builder.field(MINIMUM_SHOULD_MATCH_FIELD.getPreferredName(), minimumShouldMatch); + } printBoostAndQueryName(builder); builder.endObject(); } @@ -199,6 +220,12 @@ public void doXContent(XContentBuilder builder, Params params) throws IOExceptio XCONTENT_PARSER.declareFloat(CombinedFieldsQueryBuilder::boost, BOOST_FIELD); XCONTENT_PARSER.declareString(CombinedFieldsQueryBuilder::queryName, NAME_FIELD); + XCONTENT_PARSER.declareField( + CombinedFieldsQueryBuilder::minimumShouldMatch, + (p, c) -> p.textOrNull(), + MINIMUM_SHOULD_MATCH_FIELD, + ObjectParser.ValueType.VALUE + ); } public static CombinedFieldsQueryBuilder fromXContent(XContentParser parser) throws IOException { @@ -301,10 +328,10 @@ private Query buildQuery(Map mappedFieldToWeight, Analyzer shared assert mappedFieldToWeight.isEmpty() == false; String placeholderFieldName = mappedFieldToWeight.keySet().iterator().next(); - Builder builder = new Builder(mappedFieldToWeight, sharedAnalyzer, this.operator); + Builder builder = new Builder(mappedFieldToWeight, sharedAnalyzer, this.operator, this.minimumShouldMatch); Query query = builder.createBooleanQuery(placeholderFieldName, this.queryValue.toString(), this.operator.toBooleanClauseOccur()); - return query == null ? createZeroTermsQuery() : query; + return query == null ? createZeroTermsQuery() : Queries.maybeApplyMinimumShouldMatch(query, this.minimumShouldMatch); } private Query createZeroTermsQuery() { @@ -322,11 +349,13 @@ private Query createZeroTermsQuery() { private static class Builder extends QueryBuilder { private final Map mappedFieldToWeight; private final Operator operator; + private final String minimumShouldMatch; - Builder(Map mappedFieldToWeight, Analyzer analyzer, Operator operator) { + Builder(Map mappedFieldToWeight, Analyzer analyzer, Operator operator, String minimumShouldMatch) { super(analyzer); this.mappedFieldToWeight = mappedFieldToWeight; this.operator = operator; + this.minimumShouldMatch = minimumShouldMatch; } @Override @@ -354,6 +383,7 @@ protected Query newSynonymQuery(String fieldPlaceholder, TermAndBoost[] terms) { builder.add(combinedFieldBuilder.build(), occur); } + return builder.build(); } @@ -370,14 +400,15 @@ protected Query newTermQuery(Term term, float boost) { @Override protected int doHashCode() { - return Objects.hash(queryValue, fieldToWeight, operator); + return Objects.hash(queryValue, fieldToWeight, operator, minimumShouldMatch); } @Override protected boolean doEquals(CombinedFieldsQueryBuilder other) { return Objects.equals(queryValue, other.queryValue) && Objects.equals(fieldToWeight, other.fieldToWeight) - && Objects.equals(operator, other.operator); + && Objects.equals(operator, other.operator) + && Objects.equals(minimumShouldMatch, other.minimumShouldMatch); } } diff --git a/server/src/main/java/org/opensearch/index/query/QueryBuilders.java b/server/src/main/java/org/opensearch/index/query/QueryBuilders.java index db1f48cafcf32..7eaeed14e4bca 100644 --- a/server/src/main/java/org/opensearch/index/query/QueryBuilders.java +++ b/server/src/main/java/org/opensearch/index/query/QueryBuilders.java @@ -39,6 +39,7 @@ import org.opensearch.common.geo.builders.ShapeBuilder; import org.opensearch.core.common.bytes.BytesReference; import org.opensearch.geometry.Geometry; +import org.opensearch.index.query.CombinedFieldsQueryBuilder; import org.opensearch.index.query.DistanceFeatureQueryBuilder.Origin; import org.opensearch.index.query.MoreLikeThisQueryBuilder.Item; import org.opensearch.index.query.functionscore.FunctionScoreQueryBuilder; @@ -116,10 +117,10 @@ public static MatchBoolPrefixQueryBuilder matchBoolPrefixQuery(String name, Obje * * @param value The query value (to be analyzed). * @param fields The target field names. - * @return a {@link org.opensearch.index.query.CombinedFieldsQueryBuilder} instance. + * @return a {@link CombinedFieldsQueryBuilder} instance. */ - public static org.opensearch.index.query.CombinedFieldsQueryBuilder combinedFieldsQuery(Object value, String... fields) { - return new org.opensearch.index.query.CombinedFieldsQueryBuilder(value, fields); + public static CombinedFieldsQueryBuilder combinedFieldsQuery(Object value, String... fields) { + return new CombinedFieldsQueryBuilder(value, fields); } /** diff --git a/server/src/test/java/org/opensearch/index/query/CombinedFieldsQueryBuilderTests.java b/server/src/test/java/org/opensearch/index/query/CombinedFieldsQueryBuilderTests.java index 19bda4dafb877..a43ecccaf549d 100644 --- a/server/src/test/java/org/opensearch/index/query/CombinedFieldsQueryBuilderTests.java +++ b/server/src/test/java/org/opensearch/index/query/CombinedFieldsQueryBuilderTests.java @@ -116,6 +116,11 @@ protected CombinedFieldsQueryBuilder doCreateTestQueryBuilder() { queryBuilder.operator(randomFrom(Operator.values())); } + // Add minimum_should_match configuration + if (randomBoolean()) { + queryBuilder.minimumShouldMatch(randomMinimumShouldMatch()); + } + return queryBuilder; } @@ -195,12 +200,14 @@ public void testComprehensiveQueryConstruction() throws IOException { String queryText = "comprehensive test query with all features"; CombinedFieldsQueryBuilder queryBuilder = new CombinedFieldsQueryBuilder(queryText, TEXT_FIELD_NAME, TEXT_ALIAS_FIELD_NAME + "^1.5") .operator(Operator.AND) + .minimumShouldMatch("75%") .boost(1.8f) .queryName("comprehensive_test"); assertEquals("Query text should match", queryText, queryBuilder.queryValue()); assertEquals("Field count should be 2", 2, queryBuilder.fieldToWeight().size()); assertEquals("Operator should be AND", Operator.AND, queryBuilder.operator()); + assertEquals("Minimum should match should be 75%", "75%", queryBuilder.minimumShouldMatch()); assertEquals("Boost should be 1.8", 1.8f, queryBuilder.boost(), 1e-6); assertEquals("Query name should match", "comprehensive_test", queryBuilder.queryName()); @@ -272,6 +279,196 @@ public void testQueryNameConfiguration() { assertEquals("Query name should be set correctly", queryName, queryBuilder.queryName()); } + // ======================== + // MINIMUM_SHOULD_MATCH TESTS + // ======================== + + /** + * Tests minimum_should_match configuration and getter/setter methods. + */ + public void testMinimumShouldMatchConfiguration() { + String queryText = "minimum should match test"; + CombinedFieldsQueryBuilder queryBuilder = new CombinedFieldsQueryBuilder(queryText, TEXT_FIELD_NAME); + + // Test setting minimum_should_match + String minimumShouldMatch = "75%"; + queryBuilder.minimumShouldMatch(minimumShouldMatch); + assertEquals("Minimum should match should be set correctly", minimumShouldMatch, queryBuilder.minimumShouldMatch()); + + // Test setting numeric value + String numericValue = "2"; + queryBuilder.minimumShouldMatch(numericValue); + assertEquals("Numeric minimum should match should be set correctly", numericValue, queryBuilder.minimumShouldMatch()); + + // Test setting null value + queryBuilder.minimumShouldMatch(null); + assertNull("Null minimum should match should be set correctly", queryBuilder.minimumShouldMatch()); + + // Test method chaining + CombinedFieldsQueryBuilder chained = queryBuilder.minimumShouldMatch("50%"); + assertSame("Method chaining should return same instance", queryBuilder, chained); + assertEquals("Chained minimum should match should be set", "50%", queryBuilder.minimumShouldMatch()); + } + + /** + * Tests minimum_should_match JSON serialization and parsing. + */ + public void testMinimumShouldMatchJsonSerialization() throws IOException { + // Test with percentage value + String jsonWithPercentage = "{\n" + + " \"combined_fields\" : {\n" + + " \"query\" : \"test query\",\n" + + " \"fields\" : [ \"content\" ],\n" + + " \"minimum_should_match\" : \"75%\"\n" + + " }\n" + + "}"; + + CombinedFieldsQueryBuilder parsedWithPercentage = (CombinedFieldsQueryBuilder) parseQuery(jsonWithPercentage); + assertEquals("Percentage minimum should match should be parsed correctly", "75%", parsedWithPercentage.minimumShouldMatch()); + + // Test with numeric value + String jsonWithNumber = "{\n" + + " \"combined_fields\" : {\n" + + " \"query\" : \"test query\",\n" + + " \"fields\" : [ \"content\" ],\n" + + " \"minimum_should_match\" : \"2\"\n" + + " }\n" + + "}"; + + CombinedFieldsQueryBuilder parsedWithNumber = (CombinedFieldsQueryBuilder) parseQuery(jsonWithNumber); + assertEquals("Numeric minimum should match should be parsed correctly", "2", parsedWithNumber.minimumShouldMatch()); + + // Test with complex value + String jsonWithComplex = "{\n" + + " \"combined_fields\" : {\n" + + " \"query\" : \"test query\",\n" + + " \"fields\" : [ \"content\" ],\n" + + " \"minimum_should_match\" : \"2<75%\"\n" + + " }\n" + + "}"; + + CombinedFieldsQueryBuilder parsedWithComplex = (CombinedFieldsQueryBuilder) parseQuery(jsonWithComplex); + assertEquals("Complex minimum should match should be parsed correctly", "2<75%", parsedWithComplex.minimumShouldMatch()); + + // Test serialization round-trip + CombinedFieldsQueryBuilder original = new CombinedFieldsQueryBuilder("test query", "content") + .minimumShouldMatch("50%") + .operator(Operator.OR); + + String serialized = original.toString(); + CombinedFieldsQueryBuilder deserialized = (CombinedFieldsQueryBuilder) parseQuery(serialized); + + assertEquals("Minimum should match should be preserved in serialization", + original.minimumShouldMatch(), deserialized.minimumShouldMatch()); + } + + /** + * Tests minimum_should_match functional behavior with real queries. + */ + public void testMinimumShouldMatchFunctionalBehavior() throws IOException { + Directory dir = new ByteBuffersDirectory(); + Analyzer analyzer = new StandardAnalyzer(); + IndexWriterConfig iwc = new IndexWriterConfig(analyzer); + IndexWriter writer = new IndexWriter(dir, iwc); + + // Add test documents with multiple terms + addTestDocument(writer, "content", "machine learning algorithms"); + addTestDocument(writer, "content", "machine learning"); + addTestDocument(writer, "content", "machine"); + addTestDocument(writer, "content", "learning algorithms"); + addTestDocument(writer, "content", "algorithms"); + writer.close(); + + IndexReader reader = DirectoryReader.open(dir); + IndexSearcher searcher = new IndexSearcher(reader); + + // Test with minimum_should_match = "2" (at least 2 terms) + CombinedFieldsQueryBuilder queryWithMin2 = new CombinedFieldsQueryBuilder("machine learning algorithms", "content") + .operator(Operator.OR) + .minimumShouldMatch("2"); + + Query luceneQueryMin2 = queryWithMin2.toQuery(createShardContext()); + TopDocs resultsMin2 = searcher.search(luceneQueryMin2, 10); + + // Should match documents with at least 2 of the 3 terms + // The documents should be: "machine learning algorithms" (3 terms), "machine learning" (2 terms), "learning algorithms" (2 terms) + assertTrue("Should match documents with at least 2 terms", resultsMin2.totalHits.value() == 3); + + // Test with minimum_should_match = "75%" (75% of terms, rounded down, so 2 terms) + CombinedFieldsQueryBuilder queryWithMin75 = new CombinedFieldsQueryBuilder("machine learning algorithms", "content") + .operator(Operator.OR) + .minimumShouldMatch("75%"); + + Query luceneQueryMin75 = queryWithMin75.toQuery(createShardContext()); + TopDocs resultsMin75 = searcher.search(luceneQueryMin75, 10); + + // 75% of 3 terms = 2.25, rounded up to 3 terms required + // This should match fewer documents than the "2" minimum_should_match + assertTrue("Should match documents with 75% of terms", resultsMin75.totalHits.value() == 3); + + reader.close(); + dir.close(); + } + + /** + * Tests minimum_should_match edge cases and validation. + */ + public void testMinimumShouldMatchEdgeCases() { + String queryText = "test query"; + CombinedFieldsQueryBuilder queryBuilder = new CombinedFieldsQueryBuilder(queryText, TEXT_FIELD_NAME); + + // Test with empty string + queryBuilder.minimumShouldMatch(""); + assertEquals("Empty string should be preserved", "", queryBuilder.minimumShouldMatch()); + + // Test with whitespace-only string + queryBuilder.minimumShouldMatch(" "); + assertEquals("Whitespace-only string should be preserved", " ", queryBuilder.minimumShouldMatch()); + + // Test with very large number + queryBuilder.minimumShouldMatch("999999"); + assertEquals("Large number should be preserved", "999999", queryBuilder.minimumShouldMatch()); + + // Test with negative percentage + queryBuilder.minimumShouldMatch("-25%"); + assertEquals("Negative percentage should be preserved", "-25%", queryBuilder.minimumShouldMatch()); + } + + /** + * Tests minimum_should_match in equals and hashCode methods. + */ + public void testMinimumShouldMatchEqualsAndHashCode() { + String queryText = "test query"; + String field = "content"; + + CombinedFieldsQueryBuilder query1 = new CombinedFieldsQueryBuilder(queryText, field) + .minimumShouldMatch("75%"); + + CombinedFieldsQueryBuilder query2 = new CombinedFieldsQueryBuilder(queryText, field) + .minimumShouldMatch("75%"); + + CombinedFieldsQueryBuilder query3 = new CombinedFieldsQueryBuilder(queryText, field) + .minimumShouldMatch("50%"); + + // Test equals + assertEquals("Queries with same minimum_should_match should be equal", query1, query2); + assertNotEquals("Queries with different minimum_should_match should not be equal", query1, query3); + + // Test hashCode + assertEquals("Queries with same minimum_should_match should have same hashCode", query1.hashCode(), query2.hashCode()); + assertNotEquals("Queries with different minimum_should_match should have different hashCode", query1.hashCode(), query3.hashCode()); + + // Test with null minimum_should_match + CombinedFieldsQueryBuilder query4 = new CombinedFieldsQueryBuilder(queryText, field) + .minimumShouldMatch(null); + + CombinedFieldsQueryBuilder query5 = new CombinedFieldsQueryBuilder(queryText, field) + .minimumShouldMatch(null); + + assertEquals("Queries with null minimum_should_match should be equal", query4, query5); + assertEquals("Queries with null minimum_should_match should have same hashCode", query4.hashCode(), query5.hashCode()); + } + // ======================== // SERIALIZATION TESTS // ======================== @@ -285,6 +482,7 @@ public void testJsonSerializationWithMultipleFields() throws IOException { + " \"query\" : \"distributed systems architecture\",\n" + " \"fields\" : [ \"title^2.0\", \"content^1.5\", \"tags^0.8\", \"metadata^1.2\" ],\n" + " \"operator\" : \"AND\",\n" + + " \"minimum_should_match\" : \"75%\",\n" + " \"boost\" : 1.5,\n" + " \"_name\" : \"test_combined_fields_query\"\n" + " }\n" @@ -300,6 +498,7 @@ public void testJsonSerializationWithMultipleFields() throws IOException { assertEquals("Field weight should match for " + entry.getKey(), weight, entry.getValue(), 1e-6); } assertEquals("Operator should match", Operator.AND, parsed.operator()); + assertEquals("Minimum should match should match", "75%", parsed.minimumShouldMatch()); assertEquals("Boost should match", 1.5f, parsed.boost(), 1e-6); assertEquals("Query name should match", "test_combined_fields_query", parsed.queryName()); } @@ -311,6 +510,7 @@ public void testSerializationConsistency() throws IOException { String queryText = "serialization test query"; CombinedFieldsQueryBuilder original = new CombinedFieldsQueryBuilder(queryText, TEXT_FIELD_NAME, TEXT_ALIAS_FIELD_NAME + "^1.5") .operator(Operator.OR) + .minimumShouldMatch("50%") .boost(2.0f) .queryName("serialization_test"); @@ -320,6 +520,7 @@ public void testSerializationConsistency() throws IOException { assertEquals("Query value should be preserved", original.queryValue(), deserialized.queryValue()); assertEquals("Fields should be preserved", original.fieldToWeight(), deserialized.fieldToWeight()); assertEquals("Operator should be preserved", original.operator(), deserialized.operator()); + assertEquals("Minimum should match should be preserved", original.minimumShouldMatch(), deserialized.minimumShouldMatch()); assertEquals("Boost should be preserved", original.boost(), deserialized.boost(), 1e-6); assertEquals("Query name should be preserved", original.queryName(), deserialized.queryName()); } From 0c9d6521141320a18015cc127022afa796264ed1 Mon Sep 17 00:00:00 2001 From: Harrison Engel Date: Wed, 16 Jul 2025 20:02:25 +0000 Subject: [PATCH 4/4] Make spotless changes to CombinedFieldsQueryBuilder Signed-off-by: Harrison Engel --- .../opensearch/index/query/QueryBuilders.java | 1 - .../CombinedFieldsQueryBuilderTests.java | 53 +++++++++---------- 2 files changed, 25 insertions(+), 29 deletions(-) diff --git a/server/src/main/java/org/opensearch/index/query/QueryBuilders.java b/server/src/main/java/org/opensearch/index/query/QueryBuilders.java index 7eaeed14e4bca..51c0a36d6d5a6 100644 --- a/server/src/main/java/org/opensearch/index/query/QueryBuilders.java +++ b/server/src/main/java/org/opensearch/index/query/QueryBuilders.java @@ -39,7 +39,6 @@ import org.opensearch.common.geo.builders.ShapeBuilder; import org.opensearch.core.common.bytes.BytesReference; import org.opensearch.geometry.Geometry; -import org.opensearch.index.query.CombinedFieldsQueryBuilder; import org.opensearch.index.query.DistanceFeatureQueryBuilder.Origin; import org.opensearch.index.query.MoreLikeThisQueryBuilder.Item; import org.opensearch.index.query.functionscore.FunctionScoreQueryBuilder; diff --git a/server/src/test/java/org/opensearch/index/query/CombinedFieldsQueryBuilderTests.java b/server/src/test/java/org/opensearch/index/query/CombinedFieldsQueryBuilderTests.java index a43ecccaf549d..409185c51f559 100644 --- a/server/src/test/java/org/opensearch/index/query/CombinedFieldsQueryBuilderTests.java +++ b/server/src/test/java/org/opensearch/index/query/CombinedFieldsQueryBuilderTests.java @@ -351,15 +351,17 @@ public void testMinimumShouldMatchJsonSerialization() throws IOException { assertEquals("Complex minimum should match should be parsed correctly", "2<75%", parsedWithComplex.minimumShouldMatch()); // Test serialization round-trip - CombinedFieldsQueryBuilder original = new CombinedFieldsQueryBuilder("test query", "content") - .minimumShouldMatch("50%") + CombinedFieldsQueryBuilder original = new CombinedFieldsQueryBuilder("test query", "content").minimumShouldMatch("50%") .operator(Operator.OR); - + String serialized = original.toString(); CombinedFieldsQueryBuilder deserialized = (CombinedFieldsQueryBuilder) parseQuery(serialized); - - assertEquals("Minimum should match should be preserved in serialization", - original.minimumShouldMatch(), deserialized.minimumShouldMatch()); + + assertEquals( + "Minimum should match should be preserved in serialization", + original.minimumShouldMatch(), + deserialized.minimumShouldMatch() + ); } /** @@ -383,10 +385,10 @@ public void testMinimumShouldMatchFunctionalBehavior() throws IOException { IndexSearcher searcher = new IndexSearcher(reader); // Test with minimum_should_match = "2" (at least 2 terms) - CombinedFieldsQueryBuilder queryWithMin2 = new CombinedFieldsQueryBuilder("machine learning algorithms", "content") - .operator(Operator.OR) - .minimumShouldMatch("2"); - + CombinedFieldsQueryBuilder queryWithMin2 = new CombinedFieldsQueryBuilder("machine learning algorithms", "content").operator( + Operator.OR + ).minimumShouldMatch("2"); + Query luceneQueryMin2 = queryWithMin2.toQuery(createShardContext()); TopDocs resultsMin2 = searcher.search(luceneQueryMin2, 10); @@ -395,13 +397,13 @@ public void testMinimumShouldMatchFunctionalBehavior() throws IOException { assertTrue("Should match documents with at least 2 terms", resultsMin2.totalHits.value() == 3); // Test with minimum_should_match = "75%" (75% of terms, rounded down, so 2 terms) - CombinedFieldsQueryBuilder queryWithMin75 = new CombinedFieldsQueryBuilder("machine learning algorithms", "content") - .operator(Operator.OR) - .minimumShouldMatch("75%"); - + CombinedFieldsQueryBuilder queryWithMin75 = new CombinedFieldsQueryBuilder("machine learning algorithms", "content").operator( + Operator.OR + ).minimumShouldMatch("75%"); + Query luceneQueryMin75 = queryWithMin75.toQuery(createShardContext()); TopDocs resultsMin75 = searcher.search(luceneQueryMin75, 10); - + // 75% of 3 terms = 2.25, rounded up to 3 terms required // This should match fewer documents than the "2" minimum_should_match assertTrue("Should match documents with 75% of terms", resultsMin75.totalHits.value() == 3); @@ -441,14 +443,11 @@ public void testMinimumShouldMatchEqualsAndHashCode() { String queryText = "test query"; String field = "content"; - CombinedFieldsQueryBuilder query1 = new CombinedFieldsQueryBuilder(queryText, field) - .minimumShouldMatch("75%"); - - CombinedFieldsQueryBuilder query2 = new CombinedFieldsQueryBuilder(queryText, field) - .minimumShouldMatch("75%"); - - CombinedFieldsQueryBuilder query3 = new CombinedFieldsQueryBuilder(queryText, field) - .minimumShouldMatch("50%"); + CombinedFieldsQueryBuilder query1 = new CombinedFieldsQueryBuilder(queryText, field).minimumShouldMatch("75%"); + + CombinedFieldsQueryBuilder query2 = new CombinedFieldsQueryBuilder(queryText, field).minimumShouldMatch("75%"); + + CombinedFieldsQueryBuilder query3 = new CombinedFieldsQueryBuilder(queryText, field).minimumShouldMatch("50%"); // Test equals assertEquals("Queries with same minimum_should_match should be equal", query1, query2); @@ -459,11 +458,9 @@ public void testMinimumShouldMatchEqualsAndHashCode() { assertNotEquals("Queries with different minimum_should_match should have different hashCode", query1.hashCode(), query3.hashCode()); // Test with null minimum_should_match - CombinedFieldsQueryBuilder query4 = new CombinedFieldsQueryBuilder(queryText, field) - .minimumShouldMatch(null); - - CombinedFieldsQueryBuilder query5 = new CombinedFieldsQueryBuilder(queryText, field) - .minimumShouldMatch(null); + CombinedFieldsQueryBuilder query4 = new CombinedFieldsQueryBuilder(queryText, field).minimumShouldMatch(null); + + CombinedFieldsQueryBuilder query5 = new CombinedFieldsQueryBuilder(queryText, field).minimumShouldMatch(null); assertEquals("Queries with null minimum_should_match should be equal", query4, query5); assertEquals("Queries with null minimum_should_match should have same hashCode", query4.hashCode(), query5.hashCode());