diff --git a/CHANGELOG.md b/CHANGELOG.md index 24b8d7a255f88..a733a3ad26297 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,6 +15,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Add a dynamic setting to change skip_cache_factor and min_frequency for querycache ([#18351](https://github.com/opensearch-project/OpenSearch/issues/18351)) - Add overload constructor for Translog to accept Channel Factory as a parameter ([#18918](https://github.com/opensearch-project/OpenSearch/pull/18918)) - Add subdirectory-aware store module with recovery support ([#19132](https://github.com/opensearch-project/OpenSearch/pull/19132)) +- Field collapsing supports search_after ([#19261](https://github.com/opensearch-project/OpenSearch/pull/19261)) - Add a dynamic cluster setting to control the enablement of the merged segment warmer ([#18929](https://github.com/opensearch-project/OpenSearch/pull/18929)) - Publish transport-grpc-spi exposing QueryBuilderProtoConverter and QueryBuilderProtoConverterRegistry ([#18949](https://github.com/opensearch-project/OpenSearch/pull/18949)) - Support system generated search pipeline. ([#19128](https://github.com/opensearch-project/OpenSearch/pull/19128)) diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/search/110_field_collapsing.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/search/110_field_collapsing.yml index f49927cbae12d..5efabd82156fa 100644 --- a/rest-api-spec/src/main/resources/rest-api-spec/test/search/110_field_collapsing.yml +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/search/110_field_collapsing.yml @@ -245,20 +245,6 @@ setup: body: collapse: { field: numeric_group } ---- -"field collapsing and search_after": - - - do: - catch: /cannot use \`collapse\` in conjunction with \`search_after\`/ - search: - allow_partial_search_results: false - rest_total_hits_as_int: true - index: test - body: - collapse: { field: numeric_group } - search_after: [6] - sort: [{ sort: desc }] - --- "field collapsing and rescore": @@ -610,3 +596,334 @@ setup: - match: { hits.hits.0._source.marker: 'doc1' } - match: { hits.hits.1._id: '2' } - match: { hits.hits.1._source.marker: 'doc2' } + + +--- +"field collapsing with search_after - basic functionality": + - skip: + version: " - 3.2.99" + reason: Introduced in 3.3.0 + - do: + search: + rest_total_hits_as_int: true + index: test + body: + collapse: { field: numeric_group } + sort: [{ numeric_group: asc }] + size: 10 + + - match: {hits.total: 6 } + - length: {hits.hits: 3 } + - match: {hits.hits.0.fields.numeric_group: [1] } + - match: {hits.hits.1.fields.numeric_group: [3] } + - match: {hits.hits.2.fields.numeric_group: [25] } + + - do: + catch: /collapse field and sort field must be the same when use `collapse` in conjunction with `search_after`/ + search: + rest_total_hits_as_int: true + index: test + body: + collapse: { field: numeric_group } + sort: [{ sort: desc }] + search_after: [10] + - do: + catch: /collapse field and sort field must be the same when use `collapse` in conjunction with `search_after`/ + search: + rest_total_hits_as_int: true + index: test + body: + collapse: { field: numeric_group } + sort: [{ numeric_group: desc }, { sort: asc }] + search_after: [25, 10] + + # Test asc, first page + - do: + search: + rest_total_hits_as_int: true + index: test + body: + collapse: { field: numeric_group } + sort: [{ numeric_group: asc }] + search_after: [1] + size: 10 + + - match: {hits.total: 6 } + - length: {hits.hits: 2 } + - match: {hits.hits.0.fields.numeric_group: [3] } + - match: {hits.hits.1.fields.numeric_group: [25] } + + # Test asc, second page + - do: + search: + rest_total_hits_as_int: true + index: test + body: + collapse: { field: numeric_group } + sort: [{ numeric_group: asc }] + search_after: [3] + size: 2 + + - match: {hits.total: 6 } + - length: {hits.hits: 1 } + - match: {hits.hits.0.fields.numeric_group: [25] } + + # Test asc, no result + - do: + search: + rest_total_hits_as_int: true + index: test + body: + collapse: { field: numeric_group } + sort: [{ numeric_group: asc }] + search_after: [999] + size: 10 + + - match: {hits.total: 6 } + - length: {hits.hits: 0 } + + # Test desc, first page + - do: + search: + rest_total_hits_as_int: true + index: test + body: + collapse: { field: numeric_group } + sort: [{ numeric_group: desc }] + size: 1 + + - match: {hits.total: 6 } + - length: {hits.hits: 1 } + - match: {hits.hits.0.fields.numeric_group: [25] } + - set: { hits.hits.0.sort.0: last_sort_value } + + # Test desc, second page + - do: + search: + rest_total_hits_as_int: true + index: test + body: + collapse: { field: numeric_group } + sort: [{ numeric_group: desc }] + search_after: [$last_sort_value] + size: 1 + + - match: {hits.total: 6 } + - length: {hits.hits: 1 } + - match: {hits.hits.0.fields.numeric_group: [3] } + - set: { hits.hits.0.sort.0: last_sort_value } + + # Test desc, third page + - do: + search: + rest_total_hits_as_int: true + index: test + body: + collapse: { field: numeric_group } + sort: [{ numeric_group: desc }] + search_after: [$last_sort_value] + size: 1 + + - match: {hits.total: 6 } + - length: {hits.hits: 1 } + - match: {hits.hits.0.fields.numeric_group: [1] } + - set: { hits.hits.0.sort.0: last_sort_value } + + # Test desc, no result + - do: + search: + rest_total_hits_as_int: true + index: test + body: + collapse: { field: numeric_group } + sort: [{ numeric_group: desc }] + search_after: [$last_sort_value] + size: 1 + + - match: {hits.total: 6 } + - length: {hits.hits: 0 } + + # test on keyword field + - do: + search: + rest_total_hits_as_int: true + index: test + body: + collapse: { field: tag } + sort: [{ tag: asc }] + size: 1 + + - match: {hits.total: 6 } + - length: {hits.hits: 1 } + - match: {hits.hits.0.fields.tag: ["A"] } + + # Search after "A" + - do: + search: + rest_total_hits_as_int: true + index: test + body: + collapse: { field: tag } + sort: [{ tag: asc }] + search_after: ["A"] + size: 1 + +--- +"field collapsing with search_after - concurrent segment search enabled": + - skip: + version: " - 3.2.99" + reason: Introduced in 3.3.0 + - do: + indices.put_settings: + index: test + body: + index.search.concurrent_segment_search.mode: 'all' + + # Test asc, first page + - do: + search: + rest_total_hits_as_int: true + index: test + body: + collapse: { field: numeric_group } + sort: [{ numeric_group: asc }] + search_after: [1] + size: 10 + + - match: {hits.total: 6 } + - length: {hits.hits: 2 } + - match: {hits.hits.0.fields.numeric_group: [3] } + - match: {hits.hits.1.fields.numeric_group: [25] } + + # Test asc, second page + - do: + search: + rest_total_hits_as_int: true + index: test + body: + collapse: { field: numeric_group } + sort: [{ numeric_group: asc }] + search_after: [3] + size: 2 + + - match: {hits.total: 6 } + - length: {hits.hits: 1 } + - match: {hits.hits.0.fields.numeric_group: [25] } + + # Test asc, no result + - do: + search: + rest_total_hits_as_int: true + index: test + body: + collapse: { field: numeric_group } + sort: [{ numeric_group: asc }] + search_after: [999] + size: 10 + + - match: {hits.total: 6 } + - length: {hits.hits: 0 } + + # Test desc, first page + - do: + search: + rest_total_hits_as_int: true + index: test + body: + collapse: { field: numeric_group } + sort: [{ numeric_group: desc }] + size: 1 + + - match: {hits.total: 6 } + - length: {hits.hits: 1 } + - match: {hits.hits.0.fields.numeric_group: [25] } + - set: { hits.hits.0.sort.0: last_sort_value } + + # Test desc, second page + - do: + search: + rest_total_hits_as_int: true + index: test + body: + collapse: { field: numeric_group } + sort: [{ numeric_group: desc }] + search_after: [$last_sort_value] + size: 1 + + - match: {hits.total: 6 } + - length: {hits.hits: 1 } + - match: {hits.hits.0.fields.numeric_group: [3] } + - set: { hits.hits.0.sort.0: last_sort_value } + + # Test desc, third page + - do: + search: + rest_total_hits_as_int: true + index: test + body: + collapse: { field: numeric_group } + sort: [{ numeric_group: desc }] + search_after: [$last_sort_value] + size: 1 + + - match: {hits.total: 6 } + - length: {hits.hits: 1 } + - match: {hits.hits.0.fields.numeric_group: [1] } + - set: { hits.hits.0.sort.0: last_sort_value } + + # Test desc, no result + - do: + search: + rest_total_hits_as_int: true + index: test + body: + collapse: { field: numeric_group } + sort: [{ numeric_group: desc }] + search_after: [$last_sort_value] + size: 1 + + - match: {hits.total: 6 } + - length: {hits.hits: 0 } + + # test on keyword field + - do: + search: + rest_total_hits_as_int: true + index: test + body: + collapse: { field: tag } + sort: [{ tag: asc }] + size: 1 + + - match: {hits.total: 6 } + - length: {hits.hits: 1 } + - match: {hits.hits.0.fields.tag: ["A"] } + + # Search after "A" + - do: + search: + rest_total_hits_as_int: true + index: test + body: + collapse: { field: tag } + sort: [{ tag: asc }] + search_after: ["A"] + size: 1 + + - match: {hits.total: 6 } + - length: {hits.hits: 1 } + - match: {hits.hits.0.fields.tag: ["B"] } + + # Search after "B" + - do: + search: + rest_total_hits_as_int: true + index: test + body: + collapse: { field: tag } + sort: [{ tag: asc }] + search_after: ["B"] + size: 1 + + - match: {hits.total: 6 } + - length: {hits.hits: 0 } diff --git a/server/src/main/java/org/apache/lucene/search/grouping/CollapsingTopDocsCollector.java b/server/src/main/java/org/apache/lucene/search/grouping/CollapsingTopDocsCollector.java index 9ca0491bc29f5..aaef5861e38cd 100644 --- a/server/src/main/java/org/apache/lucene/search/grouping/CollapsingTopDocsCollector.java +++ b/server/src/main/java/org/apache/lucene/search/grouping/CollapsingTopDocsCollector.java @@ -31,7 +31,11 @@ package org.apache.lucene.search.grouping; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.search.FieldComparator; import org.apache.lucene.search.FieldDoc; +import org.apache.lucene.search.LeafFieldComparator; +import org.apache.lucene.search.Pruning; import org.apache.lucene.search.Scorable; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.ScoreMode; @@ -61,11 +65,41 @@ public final class CollapsingTopDocsCollector extends FirstPassGroupingCollec protected Scorable scorer; private int totalHitCount; + private final FieldDoc after; + private FieldComparator afterComparator; + private LeafFieldComparator leafComparator; + private final int reverseMul; CollapsingTopDocsCollector(GroupSelector groupSelector, String collapseField, Sort sort, int topN) { super(groupSelector, sort, topN); this.collapseField = collapseField; this.sort = sort; + this.after = null; + this.reverseMul = 1; + } + + CollapsingTopDocsCollector(GroupSelector groupSelector, String collapseField, Sort sort, int topN, FieldDoc after) { + super(groupSelector, sort, topN); + this.collapseField = collapseField; + this.sort = sort; + this.after = after; + + if (after != null) { + // we should have only one sort field which is the collapse field + if (sort.getSort().length != 1 || !sort.getSort()[0].getField().equals(collapseField)) { + throw new IllegalArgumentException("The after parameter can only be used when the sort is based on the collapse field"); + } + SortField field = sort.getSort()[0]; + afterComparator = field.getComparator(1, Pruning.NONE); + + @SuppressWarnings("unchecked") + FieldComparator comparator = (FieldComparator) afterComparator; + comparator.setTopValue(after.fields[0]); + + reverseMul = field.getReverse() ? -1 : 1; + } else { + reverseMul = 1; + } } /** @@ -76,7 +110,9 @@ public final class CollapsingTopDocsCollector extends FirstPassGroupingCollec public CollapseTopFieldDocs getTopDocs() throws IOException { Collection> groups = super.getTopGroups(0); if (groups == null) { - TotalHits totalHits = new TotalHits(0, TotalHits.Relation.EQUAL_TO); + // For search_after, use totalHitCount to preserve hit information + // For non-search_after, totalHitCount equals 0 when no matches, so behavior unchanged + TotalHits totalHits = new TotalHits(totalHitCount, TotalHits.Relation.EQUAL_TO); return new CollapseTopFieldDocs(collapseField, totalHits, new ScoreDoc[0], sort.getSort(), new Object[0]); } FieldDoc[] docs = new FieldDoc[groups.size()]; @@ -123,10 +159,34 @@ public void setScorer(Scorable scorer) throws IOException { @Override public void collect(int doc) throws IOException { + if (after != null && !isAfterDoc(doc)) { + totalHitCount++; + return; + } + super.collect(doc); totalHitCount++; } + private boolean isAfterDoc(int doc) throws IOException { + if (leafComparator == null) return true; + + int cmp = reverseMul * leafComparator.compareTop(doc); + if (cmp != 0) { + return cmp < 0; + } + + return doc > after.doc; + } + + @Override + protected void doSetNextReader(LeafReaderContext readerContext) throws IOException { + super.doSetNextReader(readerContext); + if (after != null) { + leafComparator = afterComparator.getLeafComparator(readerContext); + } + } + /** * Create a collapsing top docs collector on a {@link org.apache.lucene.index.NumericDocValues} field. * It accepts also {@link org.apache.lucene.index.SortedNumericDocValues} field but @@ -150,6 +210,31 @@ public static CollapsingTopDocsCollector createNumeric( return new CollapsingTopDocsCollector<>(new CollapsingDocValuesSource.Numeric(collapseFieldType), collapseField, sort, topN); } + /** + * Create a collapsing top docs collector on a {@link org.apache.lucene.index.NumericDocValues} field. + * It accepts also {@link org.apache.lucene.index.SortedNumericDocValues} field but + * the collect will fail with an {@link IllegalStateException} if a document contains more than one value for the + * field. + * + * @param collapseField The sort field used to group documents. + * @param collapseFieldType The {@link MappedFieldType} for this sort field. + * @param sort The {@link Sort} used to sort the collapsed hits. + * The collapsing keeps only the top sorted document per collapsed key. + * This must be non-null, ie, if you want to groupSort by relevance + * use Sort.RELEVANCE. + * @param topN How many top groups to keep. + * @param after The last sort value of the previous page. Pass null if this is the first page. + */ + public static CollapsingTopDocsCollector createNumeric( + String collapseField, + MappedFieldType collapseFieldType, + Sort sort, + int topN, + FieldDoc after + ) { + return new CollapsingTopDocsCollector<>(new CollapsingDocValuesSource.Numeric(collapseFieldType), collapseField, sort, topN, after); + } + /** * Create a collapsing top docs collector on a {@link org.apache.lucene.index.SortedDocValues} field. * It accepts also {@link org.apache.lucene.index.SortedSetDocValues} field but @@ -171,4 +256,28 @@ public static CollapsingTopDocsCollector createKeyword( ) { return new CollapsingTopDocsCollector<>(new CollapsingDocValuesSource.Keyword(collapseFieldType), collapseField, sort, topN); } + + /** + * Create a collapsing top docs collector on a {@link org.apache.lucene.index.SortedDocValues} field. + * It accepts also {@link org.apache.lucene.index.SortedSetDocValues} field but + * the collect will fail with an {@link IllegalStateException} if a document contains more than one value for the + * field. + * + * @param collapseField The sort field used to group documents. + * @param collapseFieldType The {@link MappedFieldType} for this sort field. + * @param sort The {@link Sort} used to sort the collapsed hits. The collapsing keeps only the top sorted + * document per collapsed key. + * This must be non-null, ie, if you want to groupSort by relevance use Sort.RELEVANCE. + * @param topN How many top groups to keep. + * @param after The last sort value of the previous page. Pass null if this is the first page. + */ + public static CollapsingTopDocsCollector createKeyword( + String collapseField, + MappedFieldType collapseFieldType, + Sort sort, + int topN, + FieldDoc after + ) { + return new CollapsingTopDocsCollector<>(new CollapsingDocValuesSource.Keyword(collapseFieldType), collapseField, sort, topN, after); + } } diff --git a/server/src/main/java/org/opensearch/search/SearchService.java b/server/src/main/java/org/opensearch/search/SearchService.java index c6fe57188eff1..eeb4978d4c1f8 100644 --- a/server/src/main/java/org/opensearch/search/SearchService.java +++ b/server/src/main/java/org/opensearch/search/SearchService.java @@ -37,6 +37,7 @@ import org.apache.lucene.search.FieldDoc; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; +import org.apache.lucene.search.SortField; import org.apache.lucene.search.TopDocs; import org.opensearch.OpenSearchException; import org.opensearch.action.ActionRunnable; @@ -1681,7 +1682,13 @@ private void parseSource(DefaultSearchContext context, SearchSourceBuilder sourc throw new SearchException(shardTarget, "cannot use `collapse` in a scroll context"); } if (context.searchAfter() != null) { - throw new SearchException(shardTarget, "cannot use `collapse` in conjunction with `search_after`"); + SortField[] sort = context.sort().sort.getSort(); + if (sort.length != 1 || !sort[0].getField().equals(source.collapse().getField())) { + throw new SearchException( + shardTarget, + "collapse field and sort field must be the same when use `collapse` in conjunction with `search_after`" + ); + } } if (context.rescore() != null && context.rescore().isEmpty() == false) { throw new SearchException(shardTarget, "cannot use `collapse` in conjunction with `rescore`"); diff --git a/server/src/main/java/org/opensearch/search/collapse/CollapseContext.java b/server/src/main/java/org/opensearch/search/collapse/CollapseContext.java index f3e8fb7ffea47..d0f3ac2489cdb 100644 --- a/server/src/main/java/org/opensearch/search/collapse/CollapseContext.java +++ b/server/src/main/java/org/opensearch/search/collapse/CollapseContext.java @@ -31,6 +31,7 @@ package org.opensearch.search.collapse; +import org.apache.lucene.search.FieldDoc; import org.apache.lucene.search.Sort; import org.apache.lucene.search.grouping.CollapsingTopDocsCollector; import org.opensearch.common.annotation.PublicApi; @@ -75,6 +76,14 @@ public List getInnerHit() { return innerHits; } + /** + * Creates a CollapsingTopDocsCollector for field collapsing without search_after support. + * + * @param sort The sort order for collapsed groups + * @param topN Maximum number of collapsed groups to collect + * @return CollapsingTopDocsCollector instance based on field type + * @throws IllegalStateException if field type is not keyword or numeric + */ public CollapsingTopDocsCollector createTopDocs(Sort sort, int topN) { if (fieldType != null && fieldType.unwrap() instanceof KeywordFieldMapper.KeywordFieldType) { return CollapsingTopDocsCollector.createKeyword(fieldName, fieldType, sort, topN); @@ -84,4 +93,25 @@ public CollapsingTopDocsCollector createTopDocs(Sort sort, int topN) { throw new IllegalStateException("unknown type for collapse field " + fieldName + ", only keywords and numbers are accepted"); } } + + /** + * Creates a CollapsingTopDocsCollector for field collapsing with search_after support. + * + * @param sort The sort order for collapsed groups (must match collapse field for search_after) + * @param topN Maximum number of collapsed groups to collect + * @param searchAfter The last sort value from previous page for pagination + * @return CollapsingTopDocsCollector instance based on field type + * @throws IllegalStateException if field type is not keyword or numeric + */ + public CollapsingTopDocsCollector createTopDocs(Sort sort, int topN, FieldDoc searchAfter) { + if (fieldType != null && fieldType.unwrap() instanceof KeywordFieldMapper.KeywordFieldType) { + return CollapsingTopDocsCollector.createKeyword(fieldName, fieldType, sort, topN, searchAfter); + } else if (fieldType != null && fieldType.unwrap() instanceof NumberFieldMapper.NumberFieldType) { + return CollapsingTopDocsCollector.createNumeric(fieldName, fieldType, sort, topN, searchAfter); + } else { + throw new IllegalStateException( + "unsupported type for collapse field " + fieldName + ", only keywords and numbers are accepted" + ); + } + } } diff --git a/server/src/main/java/org/opensearch/search/query/TopDocsCollectorContext.java b/server/src/main/java/org/opensearch/search/query/TopDocsCollectorContext.java index 5b82b0df68ca6..d56148e6fab00 100644 --- a/server/src/main/java/org/opensearch/search/query/TopDocsCollectorContext.java +++ b/server/src/main/java/org/opensearch/search/query/TopDocsCollectorContext.java @@ -232,6 +232,7 @@ static class CollapsingTopDocsCollectorContext extends TopDocsCollectorContext { private final CollapseContext collapseContext; private final boolean trackMaxScore; private final Sort sort; + private final FieldDoc searchAfter; /** * Ctr @@ -245,6 +246,24 @@ private CollapsingTopDocsCollectorContext( @Nullable SortAndFormats sortAndFormats, int numHits, boolean trackMaxScore + ) { + this(collapseContext, sortAndFormats, numHits, trackMaxScore, null); + } + + /** + * Ctr + * @param collapseContext The collapsing context + * @param sortAndFormats The query sort + * @param numHits The number of collapsed top hits to retrieve. + * @param trackMaxScore True if max score should be tracked + * @param searchAfter The search after value + */ + private CollapsingTopDocsCollectorContext( + CollapseContext collapseContext, + @Nullable SortAndFormats sortAndFormats, + int numHits, + boolean trackMaxScore, + FieldDoc searchAfter ) { super(REASON_SEARCH_TOP_HITS, numHits); assert numHits > 0; @@ -252,10 +271,11 @@ private CollapsingTopDocsCollectorContext( this.sort = sortAndFormats == null ? Sort.RELEVANCE : sortAndFormats.sort; this.sortFmt = sortAndFormats == null ? new DocValueFormat[] { DocValueFormat.RAW } : sortAndFormats.formats; this.collapseContext = collapseContext; - this.topDocsCollector = collapseContext.createTopDocs(sort, numHits); + this.searchAfter = searchAfter; + this.topDocsCollector = collapseContext.createTopDocs(sort, numHits, searchAfter); this.trackMaxScore = trackMaxScore; - MaxScoreCollector maxScoreCollector = null; + MaxScoreCollector maxScoreCollector; if (trackMaxScore) { maxScoreCollector = new MaxScoreCollector(); maxScoreSupplier = maxScoreCollector::getMaxScore; @@ -290,7 +310,7 @@ public Collector newCollector() throws IOException { maxScoreCollector = new MaxScoreCollector(); } - return MultiCollectorWrapper.wrap(collapseContext.createTopDocs(sort, numHits), maxScoreCollector); + return MultiCollectorWrapper.wrap(collapseContext.createTopDocs(sort, numHits, searchAfter), maxScoreCollector); } @Override @@ -817,7 +837,13 @@ public static TopDocsCollectorContext createTopDocsCollectorContext(SearchContex } else if (searchContext.collapse() != null) { boolean trackScores = searchContext.sort() == null ? true : searchContext.trackScores(); int numDocs = Math.min(searchContext.from() + searchContext.size(), totalNumDocs); - return new CollapsingTopDocsCollectorContext(searchContext.collapse(), searchContext.sort(), numDocs, trackScores); + return new CollapsingTopDocsCollectorContext( + searchContext.collapse(), + searchContext.sort(), + numDocs, + trackScores, + searchContext.searchAfter() + ); } else { int numDocs = Math.min(searchContext.from() + searchContext.size(), totalNumDocs); final boolean rescore = searchContext.rescore().isEmpty() == false; diff --git a/server/src/test/java/org/opensearch/lucene/grouping/CollapsingTopDocsCollectorTests.java b/server/src/test/java/org/opensearch/lucene/grouping/CollapsingTopDocsCollectorTests.java index 38bc575037924..d86345d7d68f3 100644 --- a/server/src/test/java/org/opensearch/lucene/grouping/CollapsingTopDocsCollectorTests.java +++ b/server/src/test/java/org/opensearch/lucene/grouping/CollapsingTopDocsCollectorTests.java @@ -494,4 +494,180 @@ public void testInconsistentShardIndicesException() { assertEquals("Inconsistent order of shard indices", exception.getMessage()); } + public void testSearchAfterValidation() { + MappedFieldType fieldType = new MockFieldMapper.FakeFieldType("category"); + + // Test multiple sort fields - should fail + Sort multiSort = new Sort(new SortField("category", SortField.Type.INT), new SortField("score", SortField.Type.FLOAT)); + FieldDoc multiAfter = new FieldDoc(0, Float.NaN, new Object[] { 1, 1.0f }); + IllegalArgumentException exception = expectThrows(IllegalArgumentException.class, () -> { + CollapsingTopDocsCollector.createNumeric("category", fieldType, multiSort, 10, multiAfter); + }); + assertEquals("The after parameter can only be used when the sort is based on the collapse field", exception.getMessage()); + + // Test wrong sort field - should fail + Sort wrongSort = new Sort(new SortField("different_field", SortField.Type.INT)); + FieldDoc wrongAfter = new FieldDoc(0, Float.NaN, new Object[] { 1 }); + exception = expectThrows(IllegalArgumentException.class, () -> { + CollapsingTopDocsCollector.createNumeric("category", fieldType, wrongSort, 10, wrongAfter); + }); + assertEquals("The after parameter can only be used when the sort is based on the collapse field", exception.getMessage()); + + // Test correct sort field - should succeed + Sort correctSort = new Sort(new SortField("category", SortField.Type.INT)); + FieldDoc correctAfter = new FieldDoc(0, Float.NaN, new Object[] { 1 }); + CollapsingTopDocsCollector collector = CollapsingTopDocsCollector.createNumeric( + "category", + fieldType, + correctSort, + 10, + correctAfter + ); + assertNotNull(collector); + + // Test keyword field with multiple sorts - should fail + MappedFieldType keywordFieldType = new MockFieldMapper.FakeFieldType("tag"); + Sort keywordMultiSort = new Sort(new SortField("tag", SortField.Type.STRING), new SortField("score", SortField.Type.FLOAT)); + FieldDoc keywordAfter = new FieldDoc(0, Float.NaN, new Object[] { "A", 1.0f }); + exception = expectThrows(IllegalArgumentException.class, () -> { + CollapsingTopDocsCollector.createKeyword("tag", keywordFieldType, keywordMultiSort, 10, keywordAfter); + }); + assertEquals("The after parameter can only be used when the sort is based on the collapse field", exception.getMessage()); + } + + public void testSearchAfterWithNumericCollapse() throws IOException { + testSearchAfterCollapse(new NumericDVProducer(), true); + } + + public void testSearchAfterWithKeywordCollapse() throws IOException { + testSearchAfterCollapse(new KeywordDVProducer(), false); + } + + private > void testSearchAfterCollapse(CollapsingDocValuesProducer dvProducer, boolean numeric) + throws IOException { + final int numDocs = 100; + final int maxGroup = 10; + final Directory dir = newDirectory(); + final RandomIndexWriter w = new RandomIndexWriter(random(), dir); + + for (int i = 0; i < numDocs; i++) { + Document doc = new Document(); + T groupValue = dvProducer.randomGroup(maxGroup); + dvProducer.add(doc, groupValue, false); + w.addDocument(doc); + } + + final IndexReader reader = w.getReader(); + final IndexSearcher searcher = newSearcher(reader); + + SortField collapseField = dvProducer.sortField(false); + // Use collapse field as sort field to satisfy validation + Sort sort = new Sort(collapseField); + MappedFieldType fieldType = new MockFieldMapper.FakeFieldType(collapseField.getField()); + + // First search without search_after + CollapsingTopDocsCollector collector1 = numeric + ? CollapsingTopDocsCollector.createNumeric(collapseField.getField(), fieldType, sort, 5, null) + : CollapsingTopDocsCollector.createKeyword(collapseField.getField(), fieldType, sort, 5, null); + + searcher.search(new MatchAllDocsQuery(), collector1); + CollapseTopFieldDocs results1 = collector1.getTopDocs(); + + assertTrue("Should have results", results1.scoreDocs.length > 0); + + // Use the last result as search_after + FieldDoc after = (FieldDoc) results1.scoreDocs[results1.scoreDocs.length - 1]; + + // Second search with search_after + CollapsingTopDocsCollector collector2 = numeric + ? CollapsingTopDocsCollector.createNumeric(collapseField.getField(), fieldType, sort, 5, after) + : CollapsingTopDocsCollector.createKeyword(collapseField.getField(), fieldType, sort, 5, after); + + searcher.search(new MatchAllDocsQuery(), collector2); + CollapseTopFieldDocs results2 = collector2.getTopDocs(); + + // Verify no overlap between pages + Set firstPageDocs = new HashSet<>(); + for (ScoreDoc doc : results1.scoreDocs) { + firstPageDocs.add(doc.doc); + } + + for (ScoreDoc doc : results2.scoreDocs) { + assertFalse("No document should appear in both pages", firstPageDocs.contains(doc.doc)); + } + + w.close(); + reader.close(); + dir.close(); + } + + public void testSearchAfterWithEmptyResults() throws IOException { + final Directory dir = newDirectory(); + final RandomIndexWriter w = new RandomIndexWriter(random(), dir); + + final int numDocs = 100; + for (int i = 0; i < numDocs; i++) { + Document doc = new Document(); + doc.add(new NumericDocValuesField("group", i)); + w.addDocument(doc); + } + + final IndexReader reader = w.getReader(); + final IndexSearcher searcher = newSearcher(reader); + + // Use collapse field as sort field to satisfy validation + Sort sort = new Sort(new SortField("group", SortField.Type.INT)); + MappedFieldType fieldType = new MockFieldMapper.FakeFieldType("group"); + + // Create search_after that's beyond all documents + FieldDoc after = new FieldDoc(0, Float.NaN, new Object[] { 200 }); + + CollapsingTopDocsCollector collector = CollapsingTopDocsCollector.createNumeric("group", fieldType, sort, 10, after); + + searcher.search(new MatchAllDocsQuery(), collector); + CollapseTopFieldDocs results = collector.getTopDocs(); + + assertEquals("Should have no results after last document", 0, results.scoreDocs.length); + assertEquals("Total hits should reflect all documents processed with search_after", numDocs, results.totalHits.value()); + + w.close(); + reader.close(); + dir.close(); + } + + // Helper classes for test data + private static class NumericDVProducer implements CollapsingDocValuesProducer { + @Override + public Long randomGroup(int maxGroup) { + return (long) randomIntBetween(0, maxGroup - 1); + } + + @Override + public void add(Document doc, Long value, boolean multivalued) { + doc.add(new NumericDocValuesField("group", value)); + } + + @Override + public SortField sortField(boolean multivalued) { + return new SortField("group", SortField.Type.LONG); + } + } + + private static class KeywordDVProducer implements CollapsingDocValuesProducer { + @Override + public BytesRef randomGroup(int maxGroup) { + return new BytesRef("group" + randomIntBetween(0, maxGroup - 1)); + } + + @Override + public void add(Document doc, BytesRef value, boolean multivalued) { + doc.add(new SortedDocValuesField("group", value)); + } + + @Override + public SortField sortField(boolean multivalued) { + return new SortField("group", SortField.Type.STRING); + } + } + }