From d6edffda00884609451fbc7faea253bec70528bd Mon Sep 17 00:00:00 2001 From: Lamine Idjeraoui Date: Tue, 5 Aug 2025 11:08:50 -0500 Subject: [PATCH 01/11] add a new sort tiebreaker based on shard id and docid - Implements ShardDocSortBuilder + comparator - TODO: Add unit + integ tests - Registers in SearchModule Signed-off-by: Lamine Idjeraoui --- .../src/main/java/org/opensearch/search/SearchModule.java | 2 ++ .../opensearch/search/searchafter/SearchAfterBuilder.java | 3 +++ .../main/java/org/opensearch/search/sort/SortBuilders.java | 7 +++++++ 3 files changed, 12 insertions(+) diff --git a/server/src/main/java/org/opensearch/search/SearchModule.java b/server/src/main/java/org/opensearch/search/SearchModule.java index f588013b74af2..d7a67836e4247 100644 --- a/server/src/main/java/org/opensearch/search/SearchModule.java +++ b/server/src/main/java/org/opensearch/search/SearchModule.java @@ -268,6 +268,7 @@ import org.opensearch.search.sort.GeoDistanceSortBuilder; import org.opensearch.search.sort.ScoreSortBuilder; import org.opensearch.search.sort.ScriptSortBuilder; +import org.opensearch.search.sort.ShardDocSortBuilder; import org.opensearch.search.sort.SortBuilder; import org.opensearch.search.sort.SortValue; import org.opensearch.search.suggest.Suggest; @@ -1202,6 +1203,7 @@ private void registerSortParsers(List plugins) { ); registerSort(new SortSpec<>(ScoreSortBuilder.NAME, ScoreSortBuilder::new, ScoreSortBuilder::fromXContent)); registerFromPlugin(plugins, SearchPlugin::getSorts, this::registerSort); + registerSort(new SortSpec<>(ShardDocSortBuilder.NAME, ShardDocSortBuilder::new, ShardDocSortBuilder::fromXContent)); } private void registerIntervalsSourceProviders() { diff --git a/server/src/main/java/org/opensearch/search/searchafter/SearchAfterBuilder.java b/server/src/main/java/org/opensearch/search/searchafter/SearchAfterBuilder.java index a45b2bd40c03d..cc67b68c90d7e 100644 --- a/server/src/main/java/org/opensearch/search/searchafter/SearchAfterBuilder.java +++ b/server/src/main/java/org/opensearch/search/searchafter/SearchAfterBuilder.java @@ -49,6 +49,7 @@ import org.opensearch.core.xcontent.XContentParser; import org.opensearch.index.fielddata.IndexFieldData; import org.opensearch.search.DocValueFormat; +import org.opensearch.search.sort.ShardDocFieldComparatorSource; import org.opensearch.search.sort.SortAndFormats; import java.io.IOException; @@ -161,6 +162,8 @@ static SortField.Type extractSortType(SortField sortField) { } else if ("LatLonPointSortField".equals(sortField.getClass().getSimpleName())) { // for geo distance sorting return SortField.Type.DOUBLE; + } else if (sortField.getComparatorSource() instanceof ShardDocFieldComparatorSource) { + return SortField.Type.LONG; } else { return sortField.getType(); } diff --git a/server/src/main/java/org/opensearch/search/sort/SortBuilders.java b/server/src/main/java/org/opensearch/search/sort/SortBuilders.java index 209b5b160f30b..729e62d8675ff 100644 --- a/server/src/main/java/org/opensearch/search/sort/SortBuilders.java +++ b/server/src/main/java/org/opensearch/search/sort/SortBuilders.java @@ -100,4 +100,11 @@ public static GeoDistanceSortBuilder geoDistanceSort(String fieldName, GeoPoint. public static GeoDistanceSortBuilder geoDistanceSort(String fieldName, String... geohashes) { return new GeoDistanceSortBuilder(fieldName, geohashes); } + + /** + * Constructs a new shard‐doc tiebreaker sort. + */ + public static ShardDocSortBuilder shardDocSort() { + return new ShardDocSortBuilder(); + } } From 719934786200dff2a4ace75a393a65d5a5a62ff8 Mon Sep 17 00:00:00 2001 From: Lamine Idjeraoui Date: Tue, 5 Aug 2025 11:10:52 -0500 Subject: [PATCH 02/11] add a new sort tiebreaker based on shard id and docid Signed-off-by: Lamine Idjeraoui --- .../sort/ShardDocFieldComparatorSource.java | 111 ++++++++++++++++++ .../search/sort/ShardDocSortBuilder.java | 96 +++++++++++++++ 2 files changed, 207 insertions(+) create mode 100644 server/src/main/java/org/opensearch/search/sort/ShardDocFieldComparatorSource.java create mode 100644 server/src/main/java/org/opensearch/search/sort/ShardDocSortBuilder.java diff --git a/server/src/main/java/org/opensearch/search/sort/ShardDocFieldComparatorSource.java b/server/src/main/java/org/opensearch/search/sort/ShardDocFieldComparatorSource.java new file mode 100644 index 0000000000000..4499a509f74d9 --- /dev/null +++ b/server/src/main/java/org/opensearch/search/sort/ShardDocFieldComparatorSource.java @@ -0,0 +1,111 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.search.sort; + +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.search.FieldComparator; +import org.apache.lucene.search.LeafFieldComparator; +import org.apache.lucene.search.Pruning; +import org.apache.lucene.search.Scorable; +import org.apache.lucene.search.SortField; +import org.opensearch.common.util.BigArrays; +import org.opensearch.index.fielddata.IndexFieldData; +import org.opensearch.search.DocValueFormat; +import org.opensearch.search.MultiValueMode; +import org.opensearch.search.sort.BucketedSort.ExtraData; + +/** + * A pseudo‑field (_shard_doc) comparator that tiebreaks by {@code (shardOrd << 32) | globalDocId} + */ +public class ShardDocFieldComparatorSource extends IndexFieldData.XFieldComparatorSource { + public static final String NAME = "_shard_doc"; + + private final int shardId; + + /** + * @param shardId the ordinal of this shard within the coordinating node’s shard list + */ + public ShardDocFieldComparatorSource(int shardId) { + super(null, MultiValueMode.MIN, null); + this.shardId = shardId; + } + + @Override + public SortField.Type reducedType() { + return SortField.Type.LONG; + } + + @Override + public BucketedSort newBucketedSort(BigArrays bigArrays, SortOrder sortOrder, DocValueFormat format, int bucketSize, ExtraData extra) { + throw new UnsupportedOperationException("bucketed sort not supported for " + NAME); + } + + @Override + public FieldComparator newComparator(String fieldname, int numHits, Pruning pruning, boolean reversed) { + return new FieldComparator() { + private final long[] values = new long[numHits]; + private long bottom; + private long topValue; + + @Override + public LeafFieldComparator getLeafComparator(LeafReaderContext context) { + // derive a stable shard ordinal per-segment + long shardOrd = shardId; + final int docBase = context.docBase; + + return new LeafFieldComparator() { + Scorable scorer; + + @Override + public void setScorer(Scorable scorer) { + this.scorer = scorer; + } + + @Override + public void setBottom(int slot) { + bottom = values[slot]; + } + + @Override + public int compareBottom(int doc) { + long key = ((long) shardId << 32) | (docBase + doc); + return Long.compare(bottom, key); + } + + @Override + public void copy(int slot, int doc) { + long key = ((long) shardId << 32) | (docBase + doc); + values[slot] = key; + } + + @Override + public int compareTop(int doc) { + long key = ((long) shardId << 32) | (docBase + doc); + return Long.compare(topValue, key); + } + }; + } + + @Override + public int compare(int slot1, int slot2) { + return Long.compare(values[slot1], values[slot2]); + } + + @Override + public Long value(int slot) { + return values[slot]; + } + + @Override + public void setTopValue(Long value) { + this.topValue = value; + } + }; + } +} diff --git a/server/src/main/java/org/opensearch/search/sort/ShardDocSortBuilder.java b/server/src/main/java/org/opensearch/search/sort/ShardDocSortBuilder.java new file mode 100644 index 0000000000000..366d21382ceb2 --- /dev/null +++ b/server/src/main/java/org/opensearch/search/sort/ShardDocSortBuilder.java @@ -0,0 +1,96 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.search.sort; + +import org.apache.lucene.search.SortField; +import org.opensearch.core.common.io.stream.StreamInput; +import org.opensearch.core.common.io.stream.StreamOutput; +import org.opensearch.core.xcontent.ObjectParser; +import org.opensearch.core.xcontent.XContentBuilder; +import org.opensearch.core.xcontent.XContentParser; +import org.opensearch.index.query.QueryRewriteContext; +import org.opensearch.index.query.QueryShardContext; +import org.opensearch.search.DocValueFormat; + +import java.io.IOException; +import java.util.Objects; + +/** + * Sort builder for the pseudo‐field "_shard_doc", which tiebreaks by {@code (shardOrd << 32) | globalDocId}. + */ +public class ShardDocSortBuilder extends SortBuilder { + + public static final String NAME = "_shard_doc"; + + // parser for JSON: { "_shard_doc": { "order":"asc" } } + private static final ObjectParser PARSER = new ObjectParser<>(NAME, ShardDocSortBuilder::new); + + static { + PARSER.declareString((b, s) -> b.order(SortOrder.fromString(s)), ORDER_FIELD); + } + + public ShardDocSortBuilder() {} + + public ShardDocSortBuilder(StreamInput in) throws IOException { + this.order = SortOrder.readFromStream(in); + + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + order.writeTo(out); + } + + public static ShardDocSortBuilder fromXContent(XContentParser parser, String fieldName) throws IOException { + return PARSER.parse(parser, null); + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + builder.startObject(NAME); + builder.field(ORDER_FIELD.getPreferredName(), order); + builder.endObject(); + return builder; + } + + @Override + protected SortFieldAndFormat build(QueryShardContext context) { + final int shardId = context.getShardId(); + SortField sf = new SortField(NAME, new ShardDocFieldComparatorSource(shardId), order == SortOrder.DESC); + return new SortFieldAndFormat(sf, DocValueFormat.RAW); + } + + @Override + public BucketedSort buildBucketedSort(QueryShardContext context, int bucketSize, BucketedSort.ExtraData extra) throws IOException { + throw new UnsupportedOperationException("bucketed sort not supported for " + NAME); + } + + @Override + public ShardDocSortBuilder rewrite(QueryRewriteContext ctx) { + return this; + } + + @Override + public String getWriteableName() { + return NAME; + } + + @Override + public boolean equals(Object obj) { + if (this == obj) return true; + if (obj == null || getClass() != obj.getClass()) return false; + ShardDocSortBuilder other = (ShardDocSortBuilder) obj; + return order == other.order; + } + + @Override + public int hashCode() { + return Objects.hash(order); + } +} From 396dd0e4acecdb4b6855efd4609f7cf2f2b38e2b Mon Sep 17 00:00:00 2001 From: Lamine Idjeraoui Date: Sun, 17 Aug 2025 17:17:46 -0500 Subject: [PATCH 03/11] add a test class plus some refactoring Signed-off-by: Lamine Idjeraoui --- .../sort/ShardDocFieldComparatorSourceIT.java | 214 ++++++++++++++++++ .../sort/ShardDocFieldComparatorSource.java | 23 +- 2 files changed, 225 insertions(+), 12 deletions(-) create mode 100644 server/src/internalClusterTest/java/org/opensearch/search/sort/ShardDocFieldComparatorSourceIT.java diff --git a/server/src/internalClusterTest/java/org/opensearch/search/sort/ShardDocFieldComparatorSourceIT.java b/server/src/internalClusterTest/java/org/opensearch/search/sort/ShardDocFieldComparatorSourceIT.java new file mode 100644 index 0000000000000..c4579fc407d8f --- /dev/null +++ b/server/src/internalClusterTest/java/org/opensearch/search/sort/ShardDocFieldComparatorSourceIT.java @@ -0,0 +1,214 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.search.sort; + + +import org.junit.Before; +import org.junit.Test; +import org.opensearch.action.search.SearchRequest; +import org.opensearch.action.search.SearchResponse; +import org.opensearch.common.settings.Settings; +import org.opensearch.search.SearchHit; +import org.opensearch.search.builder.SearchSourceBuilder; +import org.opensearch.test.OpenSearchIntegTestCase; + +import java.util.ArrayList; +import java.util.List; + +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.greaterThan; +import static org.hamcrest.Matchers.lessThan; + +@OpenSearchIntegTestCase.ClusterScope(numDataNodes = 2, supportsDedicatedMasters = false) +public class ShardDocFieldComparatorSourceIT extends OpenSearchIntegTestCase { + + private static final String INDEX = "test_shard_doc"; + + @Before + public void setupIndex() { + createIndex( + INDEX, + Settings.builder() + .put("index.number_of_shards", 2) + .put("index.number_of_replicas", 0).build() + ); + ensureGreen(INDEX); + } + + @Test + public void testEmptyIndex() { + SearchResponse resp = client().prepareSearch(INDEX) + .addSort(SortBuilders.shardDocSort().order(SortOrder.ASC)) + .setSize(10) + .get(); + + // no hits at all + SearchHit[] hits = resp.getHits().getHits(); + assertThat(hits.length, equalTo(0)); + assertThat(resp.getHits().getTotalHits().value(), equalTo(0L)); + } + + @Test + public void testSingleDocument() { + client().prepareIndex(INDEX).setId("42").setSource("foo", "bar").get(); + refresh(); + + SearchResponse resp = client().prepareSearch(INDEX) + .addSort(SortBuilders.shardDocSort().order(SortOrder.ASC)) + .setSize(5) + .get(); + + assertThat(resp.getHits().getTotalHits().value(), equalTo(1L)); + assertThat(resp.getHits().getHits()[0].getId(), equalTo("42")); + } + + + @Test + public void testSearchAfterBeyondEndYieldsNoHits() { + indexSequentialDocs(5); + refresh(); + List allKeys = new ArrayList<>(); + SearchSourceBuilder ssb = new SearchSourceBuilder() + .size(5) + .sort(SortBuilders.shardDocSort().order(SortOrder.ASC)); + SearchResponse resp0 = client().search(new SearchRequest(INDEX).source(ssb)).actionGet(); + // collect first page + for (SearchHit hit : resp0.getHits().getHits()) { + Object[] sv = hit.getSortValues(); + allKeys.add(((Number) sv[0]).longValue()); + } + + long globalMax = allKeys.get(allKeys.size() - 1); + SearchResponse resp = client().prepareSearch(INDEX) + .addSort(SortBuilders.shardDocSort().order(SortOrder.ASC)) + .setSize(3) + .searchAfter(new Object[]{globalMax + 1}) + .get(); + + SearchHit[] hits = resp.getHits().getHits(); + assertThat(hits.length, equalTo(0)); + } + + @Test + public void testSearchAfterBeyondEndYieldsNoHits_DESC() throws Exception { + indexSequentialDocs(5); + refresh(); + + // First page: _shard_doc DESC, grab the SMALLEST key (last hit on the page) + SearchSourceBuilder ssb = new SearchSourceBuilder() + .size(5) + .sort(SortBuilders.shardDocSort().order(SortOrder.DESC)); + SearchResponse first = client().search(new SearchRequest(INDEX).source(ssb)).actionGet(); + + assertThat(first.getHits().getHits().length, equalTo(5)); + long minKey = ((Number) first.getHits().getHits()[4].getSortValues()[0]).longValue(); // smallest in DESC page + + // Probe strictly beyond the end for DESC: use search_after < min (min - 1) => expect 0 hits + SearchResponse resp = client().prepareSearch(INDEX) + .addSort(SortBuilders.shardDocSort().order(SortOrder.DESC)) + .setSize(3) + .searchAfter(new Object[] { minKey - 1 }) + .get(); + + assertThat(resp.getHits().getHits().length, equalTo(0)); + } + + @Test + public void testPrimaryFieldSortThenShardDocTieBreaker() { + // force ties on primary + for (int i = 1; i <= 30; i++) { + client().prepareIndex(INDEX).setId(Integer.toString(i)).setSource("val", 123).get(); + } + refresh(); + + var shardDocKeys = collectAllSortKeys(10, 1, + new FieldSortBuilder("val").order(SortOrder.ASC), + SortBuilders.shardDocSort().order(SortOrder.ASC)); + + assertThat(shardDocKeys.size(), equalTo(30)); + for (int i = 1; i < shardDocKeys.size(); i++) { + assertThat(shardDocKeys.get(i), greaterThan(shardDocKeys.get(i - 1))); + } + } + + @Test + public void testOrderingAscAndPagination() throws Exception { + assertShardDocOrdering(SortOrder.ASC, 7, 20); + } + + @Test + public void testOrderingDescAndPagination() throws Exception { + assertShardDocOrdering(SortOrder.DESC, 8, 20); + } + + private void assertShardDocOrdering(SortOrder order, int pageSize, int expectedCount) { + indexSequentialDocs(expectedCount); + refresh(); + + // shardDocIndex = 0 because we're only sorting by _shard_doc here + List keys = collectAllSortKeys( + pageSize, + 0, + SortBuilders.shardDocSort().order(order) + ); + + assertThat(keys.size(), equalTo(expectedCount)); + + for (int i = 1; i < keys.size(); i++) { + if (order == SortOrder.ASC) { + assertThat("not strictly increasing at i=" + i, keys.get(i), greaterThan(keys.get(i - 1))); + } else { + assertThat("not strictly decreasing at i=" + i, keys.get(i), lessThan(keys.get(i - 1))); + } + } + } + + // Generic paginator: works for 1 or many sort keys. + // - pageSize: page size + // - shardDocIndex: which position in sortValues[] + // - sorts: the full sort list to apply (e.g., only _shard_doc, or primary then _shard_doc) + private List collectAllSortKeys(int pageSize, int shardDocIndex, SortBuilder... sorts) { + List all = new ArrayList<>(); + + SearchSourceBuilder ssb = new SearchSourceBuilder().size(pageSize); + for (var s : sorts) ssb.sort(s); + + SearchResponse resp = client().search(new SearchRequest(INDEX).source(ssb)).actionGet(); + + while (true) { + for (SearchHit hit : resp.getHits().getHits()) { + Object[] sv = hit.getSortValues(); + all.add(((Number) sv[shardDocIndex]).longValue()); + } + // stop if last page + if (resp.getHits().getHits().length < pageSize) break; + + // use the FULL last sortValues[] as search_after for correctness + Object[] nextAfter = resp.getHits().getHits()[resp.getHits().getHits().length - 1] + .getSortValues(); + + ssb = new SearchSourceBuilder().size(pageSize); + for (var s : sorts) ssb.sort(s); + ssb.searchAfter(nextAfter); + + resp = client().search(new SearchRequest(INDEX).source(ssb)).actionGet(); + } + return all; + } + + private void indexSequentialDocs(int count) { + for (int i = 1; i <= count; i++) { + client().prepareIndex(INDEX) + .setId(Integer.toString(i)) + // the content doesn't matter for _shard_doc + .setSource("val", i) + .get(); + } + } +} diff --git a/server/src/main/java/org/opensearch/search/sort/ShardDocFieldComparatorSource.java b/server/src/main/java/org/opensearch/search/sort/ShardDocFieldComparatorSource.java index 4499a509f74d9..34fa3bd61d1c6 100644 --- a/server/src/main/java/org/opensearch/search/sort/ShardDocFieldComparatorSource.java +++ b/server/src/main/java/org/opensearch/search/sort/ShardDocFieldComparatorSource.java @@ -21,19 +21,19 @@ import org.opensearch.search.sort.BucketedSort.ExtraData; /** - * A pseudo‑field (_shard_doc) comparator that tiebreaks by {@code (shardOrd << 32) | globalDocId} + * A pseudo‑field (_shard_doc) comparator that tiebreaks by {@code (shardId << 32) | globalDocId} */ public class ShardDocFieldComparatorSource extends IndexFieldData.XFieldComparatorSource { public static final String NAME = "_shard_doc"; - private final int shardId; + private final long shardKeyPrefix; /** - * @param shardId the ordinal of this shard within the coordinating node’s shard list + * @param shardId the shard ID of this shard */ public ShardDocFieldComparatorSource(int shardId) { super(null, MultiValueMode.MIN, null); - this.shardId = shardId; + shardKeyPrefix = ((long) shardId) << 32; } @Override @@ -55,8 +55,6 @@ public FieldComparator newComparator(String fieldname, int numHits, Prunin @Override public LeafFieldComparator getLeafComparator(LeafReaderContext context) { - // derive a stable shard ordinal per-segment - long shardOrd = shardId; final int docBase = context.docBase; return new LeafFieldComparator() { @@ -74,20 +72,21 @@ public void setBottom(int slot) { @Override public int compareBottom(int doc) { - long key = ((long) shardId << 32) | (docBase + doc); - return Long.compare(bottom, key); + return Long.compare(bottom, computeGlobalDocKey(doc)); } @Override public void copy(int slot, int doc) { - long key = ((long) shardId << 32) | (docBase + doc); - values[slot] = key; + values[slot] = computeGlobalDocKey(doc); } @Override public int compareTop(int doc) { - long key = ((long) shardId << 32) | (docBase + doc); - return Long.compare(topValue, key); + return Long.compare(topValue, computeGlobalDocKey(doc)); + } + + private long computeGlobalDocKey(int doc) { + return shardKeyPrefix | (docBase + doc); } }; } From 743d2efde7f879faf06d7fccc2029c2ccd771011 Mon Sep 17 00:00:00 2001 From: Lamine Idjeraoui Date: Thu, 21 Aug 2025 08:42:48 -0500 Subject: [PATCH 04/11] add microbenchmark Signed-off-by: Lamine Idjeraoui --- .../sort/ShardDocComparatorBenchmark.java | 139 ++++++++++++++++++ 1 file changed, 139 insertions(+) create mode 100644 benchmarks/src/main/java/org/opensearch/benchmark/search/sort/ShardDocComparatorBenchmark.java diff --git a/benchmarks/src/main/java/org/opensearch/benchmark/search/sort/ShardDocComparatorBenchmark.java b/benchmarks/src/main/java/org/opensearch/benchmark/search/sort/ShardDocComparatorBenchmark.java new file mode 100644 index 0000000000000..0b601209c451c --- /dev/null +++ b/benchmarks/src/main/java/org/opensearch/benchmark/search/sort/ShardDocComparatorBenchmark.java @@ -0,0 +1,139 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.benchmark.search.sort; + + +import org.openjdk.jmh.annotations.*; +import org.openjdk.jmh.infra.Blackhole; + +import java.util.Random; +import java.util.concurrent.TimeUnit; + +/** + * JMH microbenchmarks for the _shard_doc composite key path: + * key = (shardKeyPrefix | (docBase + doc)) + * + * Mirrors hot operations in ShardDocFieldComparatorSource without needing Lucene classes. + */ +@Fork(3) +@Warmup(iterations = 5) +@Measurement(iterations = 10, time = 1, timeUnit = TimeUnit.SECONDS) +@BenchmarkMode(Mode.Throughput) +@OutputTimeUnit(TimeUnit.MILLISECONDS) +@State(Scope.Benchmark) + +public class ShardDocComparatorBenchmark { + + @Param({"1", "4", "16"}) + public int segments; + + @Param({"50000"}) + public int docsPerSegment; + + @Param({"7"}) + public int shardId; + + private long shardKeyPrefix; + private int[] docBases; + private int[] docs; + private long[] keys; // precomputed composite keys + + // per-doc global doc (docBase + doc) for doc-only baseline + private int[] globalDocs; + + @Setup + public void setup() { + shardKeyPrefix = ((long) shardId) << 32; // Must mirror ShardDocFieldComparatorSource.shardKeyPrefix + + docBases = new int[segments]; + for (int i = 1; i < segments; i++) { + docBases[i] = docBases[i - 1] + docsPerSegment; + } + + int total = segments * docsPerSegment; + docs = new int[total]; + keys = new long[total]; + globalDocs = new int[total]; + + Random r = new Random(42); + int pos = 0; + for (int s = 0; s < segments; s++) { + int base = docBases[s]; + for (int d = 0; d < docsPerSegment; d++) { + int doc = r.nextInt(docsPerSegment); + docs[pos] = doc; + keys[pos] = computeGlobalDocKey(base, doc); + globalDocs[pos] = base + doc; + pos++; + } + } + } + + /** Baseline: compare only globalDoc */ + @Benchmark + public long compareDocOnlyAsc() { + long acc = 0; + for (int i = 1; i < globalDocs.length; i++) { + acc += Integer.compare(globalDocs[i - 1], globalDocs[i]); + } + return acc; + } + + /** raw key packing cost */ + @Benchmark + public void packKey(Blackhole bh) { + int total = segments * docsPerSegment; + int idx = 0; + for (int s = 0; s < segments; s++) { + int base = docBases[s]; + for (int d = 0; d < docsPerSegment; d++) { + long k = computeGlobalDocKey(base, docs[idx++]); + bh.consume(k); + } + } + } + + /** compare already-packed keys as ASC */ + @Benchmark + public long compareAsc() { + long acc = 0; + for (int i = 1; i < keys.length; i++) { + acc += Long.compare(keys[i - 1], keys[i]); + } + return acc; + } + + /** compare already-packed keys as DESC */ + @Benchmark + public long compareDesc() { + long acc = 0; + for (int i = 1; i < keys.length; i++) { + acc += Long.compare(keys[i], keys[i - 1]); // reversed + } + return acc; + } + + /** rough “collector loop” mix: copy + occasional compareBottom */ + @Benchmark + public int copyAndCompareBottomAsc() { + long bottom = Long.MIN_VALUE; + int worse = 0; + for (int i = 0; i < keys.length; i++) { + long v = keys[i]; // simulate copy(slot, doc) + if ((i & 31) == 0) bottom = v; // simulate setBottom every 32 items + if (Long.compare(bottom, v) < 0) worse++; + } + return worse; + } + + // Must mirror ShardDocFieldComparatorSource.computeGlobalDocKey: (shardId << 32) | (docBase + doc) + private long computeGlobalDocKey(int docBase, int doc) { + return shardKeyPrefix | (docBase + doc); + } +} From 1889dc7c8c6c589f534c8c2a34295e71f5aef2b1 Mon Sep 17 00:00:00 2001 From: Lamine Idjeraoui Date: Thu, 21 Aug 2025 10:14:52 -0500 Subject: [PATCH 05/11] spotless Signed-off-by: Lamine Idjeraoui --- .../sort/ShardDocComparatorBenchmark.java | 23 +++++++++++++------ 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/benchmarks/src/main/java/org/opensearch/benchmark/search/sort/ShardDocComparatorBenchmark.java b/benchmarks/src/main/java/org/opensearch/benchmark/search/sort/ShardDocComparatorBenchmark.java index 0b601209c451c..c94857480b185 100644 --- a/benchmarks/src/main/java/org/opensearch/benchmark/search/sort/ShardDocComparatorBenchmark.java +++ b/benchmarks/src/main/java/org/opensearch/benchmark/search/sort/ShardDocComparatorBenchmark.java @@ -8,8 +8,17 @@ package org.opensearch.benchmark.search.sort; - -import org.openjdk.jmh.annotations.*; +import org.openjdk.jmh.annotations.Benchmark; +import org.openjdk.jmh.annotations.BenchmarkMode; +import org.openjdk.jmh.annotations.Fork; +import org.openjdk.jmh.annotations.Measurement; +import org.openjdk.jmh.annotations.Mode; +import org.openjdk.jmh.annotations.OutputTimeUnit; +import org.openjdk.jmh.annotations.Param; +import org.openjdk.jmh.annotations.Scope; +import org.openjdk.jmh.annotations.Setup; +import org.openjdk.jmh.annotations.State; +import org.openjdk.jmh.annotations.Warmup; import org.openjdk.jmh.infra.Blackhole; import java.util.Random; @@ -30,13 +39,13 @@ public class ShardDocComparatorBenchmark { - @Param({"1", "4", "16"}) + @Param({ "1", "4", "16" }) public int segments; - @Param({"50000"}) + @Param({ "50000" }) public int docsPerSegment; - @Param({"7"}) + @Param({ "7" }) public int shardId; private long shardKeyPrefix; @@ -45,7 +54,7 @@ public class ShardDocComparatorBenchmark { private long[] keys; // precomputed composite keys // per-doc global doc (docBase + doc) for doc-only baseline - private int[] globalDocs; + private int[] globalDocs; @Setup public void setup() { @@ -69,7 +78,7 @@ public void setup() { int doc = r.nextInt(docsPerSegment); docs[pos] = doc; keys[pos] = computeGlobalDocKey(base, doc); - globalDocs[pos] = base + doc; + globalDocs[pos] = base + doc; pos++; } } From 2fde916abd24b0e2905b2e329081001df35017b2 Mon Sep 17 00:00:00 2001 From: Lamine Idjeraoui Date: Tue, 26 Aug 2025 10:48:08 -0500 Subject: [PATCH 06/11] spotless Signed-off-by: Lamine Idjeraoui --- .../sort/ShardDocFieldComparatorSourceIT.java | 57 +++++++------------ 1 file changed, 20 insertions(+), 37 deletions(-) diff --git a/server/src/internalClusterTest/java/org/opensearch/search/sort/ShardDocFieldComparatorSourceIT.java b/server/src/internalClusterTest/java/org/opensearch/search/sort/ShardDocFieldComparatorSourceIT.java index c4579fc407d8f..df2a83089cd16 100644 --- a/server/src/internalClusterTest/java/org/opensearch/search/sort/ShardDocFieldComparatorSourceIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/search/sort/ShardDocFieldComparatorSourceIT.java @@ -8,15 +8,14 @@ package org.opensearch.search.sort; - -import org.junit.Before; -import org.junit.Test; import org.opensearch.action.search.SearchRequest; import org.opensearch.action.search.SearchResponse; import org.opensearch.common.settings.Settings; import org.opensearch.search.SearchHit; import org.opensearch.search.builder.SearchSourceBuilder; import org.opensearch.test.OpenSearchIntegTestCase; +import org.junit.Before; +import org.junit.Test; import java.util.ArrayList; import java.util.List; @@ -32,21 +31,13 @@ public class ShardDocFieldComparatorSourceIT extends OpenSearchIntegTestCase { @Before public void setupIndex() { - createIndex( - INDEX, - Settings.builder() - .put("index.number_of_shards", 2) - .put("index.number_of_replicas", 0).build() - ); + createIndex(INDEX, Settings.builder().put("index.number_of_shards", 2).put("index.number_of_replicas", 0).build()); ensureGreen(INDEX); } @Test public void testEmptyIndex() { - SearchResponse resp = client().prepareSearch(INDEX) - .addSort(SortBuilders.shardDocSort().order(SortOrder.ASC)) - .setSize(10) - .get(); + SearchResponse resp = client().prepareSearch(INDEX).addSort(SortBuilders.shardDocSort().order(SortOrder.ASC)).setSize(10).get(); // no hits at all SearchHit[] hits = resp.getHits().getHits(); @@ -59,24 +50,18 @@ public void testSingleDocument() { client().prepareIndex(INDEX).setId("42").setSource("foo", "bar").get(); refresh(); - SearchResponse resp = client().prepareSearch(INDEX) - .addSort(SortBuilders.shardDocSort().order(SortOrder.ASC)) - .setSize(5) - .get(); + SearchResponse resp = client().prepareSearch(INDEX).addSort(SortBuilders.shardDocSort().order(SortOrder.ASC)).setSize(5).get(); assertThat(resp.getHits().getTotalHits().value(), equalTo(1L)); assertThat(resp.getHits().getHits()[0].getId(), equalTo("42")); } - @Test public void testSearchAfterBeyondEndYieldsNoHits() { indexSequentialDocs(5); refresh(); List allKeys = new ArrayList<>(); - SearchSourceBuilder ssb = new SearchSourceBuilder() - .size(5) - .sort(SortBuilders.shardDocSort().order(SortOrder.ASC)); + SearchSourceBuilder ssb = new SearchSourceBuilder().size(5).sort(SortBuilders.shardDocSort().order(SortOrder.ASC)); SearchResponse resp0 = client().search(new SearchRequest(INDEX).source(ssb)).actionGet(); // collect first page for (SearchHit hit : resp0.getHits().getHits()) { @@ -88,7 +73,7 @@ public void testSearchAfterBeyondEndYieldsNoHits() { SearchResponse resp = client().prepareSearch(INDEX) .addSort(SortBuilders.shardDocSort().order(SortOrder.ASC)) .setSize(3) - .searchAfter(new Object[]{globalMax + 1}) + .searchAfter(new Object[] { globalMax + 1 }) .get(); SearchHit[] hits = resp.getHits().getHits(); @@ -101,9 +86,7 @@ public void testSearchAfterBeyondEndYieldsNoHits_DESC() throws Exception { refresh(); // First page: _shard_doc DESC, grab the SMALLEST key (last hit on the page) - SearchSourceBuilder ssb = new SearchSourceBuilder() - .size(5) - .sort(SortBuilders.shardDocSort().order(SortOrder.DESC)); + SearchSourceBuilder ssb = new SearchSourceBuilder().size(5).sort(SortBuilders.shardDocSort().order(SortOrder.DESC)); SearchResponse first = client().search(new SearchRequest(INDEX).source(ssb)).actionGet(); assertThat(first.getHits().getHits().length, equalTo(5)); @@ -127,9 +110,12 @@ public void testPrimaryFieldSortThenShardDocTieBreaker() { } refresh(); - var shardDocKeys = collectAllSortKeys(10, 1, + var shardDocKeys = collectAllSortKeys( + 10, + 1, new FieldSortBuilder("val").order(SortOrder.ASC), - SortBuilders.shardDocSort().order(SortOrder.ASC)); + SortBuilders.shardDocSort().order(SortOrder.ASC) + ); assertThat(shardDocKeys.size(), equalTo(30)); for (int i = 1; i < shardDocKeys.size(); i++) { @@ -139,7 +125,7 @@ public void testPrimaryFieldSortThenShardDocTieBreaker() { @Test public void testOrderingAscAndPagination() throws Exception { - assertShardDocOrdering(SortOrder.ASC, 7, 20); + assertShardDocOrdering(SortOrder.ASC, 7, 20); } @Test @@ -152,11 +138,7 @@ private void assertShardDocOrdering(SortOrder order, int pageSize, int expectedC refresh(); // shardDocIndex = 0 because we're only sorting by _shard_doc here - List keys = collectAllSortKeys( - pageSize, - 0, - SortBuilders.shardDocSort().order(order) - ); + List keys = collectAllSortKeys(pageSize, 0, SortBuilders.shardDocSort().order(order)); assertThat(keys.size(), equalTo(expectedCount)); @@ -177,7 +159,8 @@ private List collectAllSortKeys(int pageSize, int shardDocIndex, SortBuild List all = new ArrayList<>(); SearchSourceBuilder ssb = new SearchSourceBuilder().size(pageSize); - for (var s : sorts) ssb.sort(s); + for (var s : sorts) + ssb.sort(s); SearchResponse resp = client().search(new SearchRequest(INDEX).source(ssb)).actionGet(); @@ -190,11 +173,11 @@ private List collectAllSortKeys(int pageSize, int shardDocIndex, SortBuild if (resp.getHits().getHits().length < pageSize) break; // use the FULL last sortValues[] as search_after for correctness - Object[] nextAfter = resp.getHits().getHits()[resp.getHits().getHits().length - 1] - .getSortValues(); + Object[] nextAfter = resp.getHits().getHits()[resp.getHits().getHits().length - 1].getSortValues(); ssb = new SearchSourceBuilder().size(pageSize); - for (var s : sorts) ssb.sort(s); + for (var s : sorts) + ssb.sort(s); ssb.searchAfter(nextAfter); resp = client().search(new SearchRequest(INDEX).source(ssb)).actionGet(); From 27449cec23b416fcd8d749dcae0144a9fc91534f Mon Sep 17 00:00:00 2001 From: Lamine Idjeraoui Date: Sun, 14 Sep 2025 09:25:06 -0500 Subject: [PATCH 07/11] add yaml rest test cases Signed-off-by: Lamine Idjeraoui --- .../test/search/95_search_after_shard_doc.yml | 241 ++++++++++++++++++ .../sort/ShardDocFieldComparatorSourceIT.java | 8 - .../action/search/SearchRequest.java | 39 +++ .../sort/ShardDocFieldComparatorSource.java | 2 +- .../search/sort/ShardDocSortBuilder.java | 7 +- .../action/search/SearchRequestTests.java | 67 +++++ 6 files changed, 352 insertions(+), 12 deletions(-) create mode 100644 rest-api-spec/src/main/resources/rest-api-spec/test/search/95_search_after_shard_doc.yml diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/search/95_search_after_shard_doc.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/search/95_search_after_shard_doc.yml new file mode 100644 index 0000000000000..7cf5dee46b76e --- /dev/null +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/search/95_search_after_shard_doc.yml @@ -0,0 +1,241 @@ +--- +setup: + # Common tiny index used by some validation checks + - do: + indices.create: + index: sharddoc_index + body: + settings: + number_of_shards: 2 + number_of_replicas: 0 + - do: + cluster.health: + wait_for_status: green + index: sharddoc_index + + # Multi-shard dataset for happy path + pagination (helps exercise multi-node when available) + - do: + indices.create: + index: sharddoc_paging + body: + settings: + number_of_shards: 4 + number_of_replicas: 0 + mappings: + properties: + id: { type: integer } + txt: { type: keyword } + - do: + bulk: + refresh: true + index: sharddoc_paging + body: | + {"index":{}} + {"id":1,"txt":"a"} + {"index":{}} + {"id":2,"txt":"b"} + {"index":{}} + {"id":3,"txt":"c"} + {"index":{}} + {"id":4,"txt":"d"} + {"index":{}} + {"id":5,"txt":"e"} + {"index":{}} + {"id":6,"txt":"f"} + {"index":{}} + {"id":7,"txt":"g"} + {"index":{}} + {"id":8,"txt":"h"} + {"index":{}} + {"id":9,"txt":"i"} + {"index":{}} + {"id":10,"txt":"j"} + {"index":{}} + {"id":11,"txt":"k"} + {"index":{}} + {"id":12,"txt":"l"} + + {"index":{}} + {"id":13,"txt":"m"} + {"index":{}} + {"id":14,"txt":"n"} + {"index":{}} + {"id":15,"txt":"o"} + {"index":{}} + {"id":16,"txt":"p"} + {"index":{}} + {"id":17,"txt":"q"} + + {"index":{}} + {"id":18,"txt":"r"} + {"index":{}} + {"id":19,"txt":"s"} + {"index":{}} + {"id":20,"txt":"t"} + {"index":{}} + {"id":21,"txt":"u"} + {"index":{}} + {"id":22,"txt":"v"} + +# ------------------------------------------------------------------- +# VALIDATION +# ------------------------------------------------------------------- + +--- +"reject _shard_doc without PIT": + - do: + catch: bad_request + search: + index: sharddoc_index + body: + sort: + - _shard_doc + - match: { status: 400 } + - match: { error.type: action_request_validation_exception } + - match: { error.reason: "/.*_shard_doc is only supported with point-in-time.*|.*PIT.*/" } + +--- +"detect _shard_doc via FieldSortBuilder-style object without PIT": + - do: + catch: bad_request + search: + index: sharddoc_index + body: + sort: + - _shard_doc: { } # object form, still invalid without PIT + - match: { status: 400 } + - match: { error.type: action_request_validation_exception } + - match: { error.reason: "/.*_shard_doc is only supported with point-in-time.*|.*PIT.*/" } + + + +# ------------------------------------------------------------------- +# HAPPY PATH: PAGINATION WITH PIT ON MULTI-SHARD INDEX +# ------------------------------------------------------------------- + +--- +"accept _shard_doc with PIT + paginate with search_after (multi-shard)": + - do: + create_pit: + index: sharddoc_paging + keep_alive: 1m + - set: { pit_id: pit_id } + + # Page 1 + - do: + search: + body: + size: 5 + pit: { id: "$pit_id", keep_alive: "1m" } + sort: + - _shard_doc: {} + - match: { _shards.failed: 0 } + - length: { hits.hits: 5 } + - is_true: hits.hits.0.sort + - is_true: hits.hits.1.sort + - is_true: hits.hits.2.sort + - is_true: hits.hits.3.sort + - is_true: hits.hits.4.sort + + - set: { hits.hits.4.sort: after1 } + + # Check that the sort values increase from one hit to the next without ever decreasing. + - set: { hits.hits.0.sort.0: prev } + - gt: { hits.hits.1.sort.0: $prev } + + - set: { hits.hits.1.sort.0: prev } + - gt: { hits.hits.2.sort.0: $prev } + + - set: { hits.hits.2.sort.0: prev } + - gt: { hits.hits.3.sort.0: $prev } + + - set: { hits.hits.3.sort.0: prev } + - gt: { hits.hits.4.sort.0: $prev } + + # Page 2 + - do: + search: + body: + size: 5 + pit: { id: "$pit_id", keep_alive: "1m" } + sort: + - _shard_doc: { } + - match: { _shards.failed: 0 } + - length: { hits.hits: 5 } + - is_true: hits.hits.4.sort + + - set: { hits.hits.4.sort: after2 } + - set: { hits.hits.4.sort.0: last_value_page2 } + + + # Page 3 + - do: + search: + body: + size: 5 + pit: { id: "$pit_id", keep_alive: "1m" } + sort: + - _shard_doc: {} + search_after: $after2 + - match: { _shards.failed: 0 } + - length: { hits.hits: 5 } + - is_true: hits.hits.0.sort + - set: { hits.hits.4.sort: after3 } + + - set: { hits.hits.0.sort.0 : first_value_page3 } + - set: { hits.hits.4.sort.0 : last_value_page3 } + - gt: { $first_value_page3: $last_value_page2 } + + # Page 4 + - do: + search: + body: + size: 5 + pit: { id: "$pit_id", keep_alive: "1m" } + sort: + - _shard_doc: {} + search_after: $after3 + - match: { _shards.failed: 0 } + - length: { hits.hits: 5 } + - is_true: hits.hits.0.sort + - set: { hits.hits.4.sort: after4 } + + - set: { hits.hits.0.sort.0 : first_value_page4 } + - set: { hits.hits.4.sort.0 : last_value_page4 } + - gt: { $first_value_page4: $last_value_page3 } + + # Page 5 + - do: + search: + body: + size: 5 + pit: { id: "$pit_id", keep_alive: "1m" } + sort: + - _shard_doc: {} + search_after: $after4 + - match: { _shards.failed: 0 } + - length: { hits.hits: 5 } + - is_true: hits.hits.0.sort + - set: { hits.hits.4.sort: after5 } + + - set: { hits.hits.0.sort.0 : first_value_page5 } + - set: { hits.hits.4.sort.0 : last_value_page5 } + - gt: { $first_value_page5: $last_value_page4 } + + # Page 6: drain the rest (22 docs total => 5 + 5 + 5 + 5 + 2) + - do: + search: + body: + size: 5 + pit: { id: "$pit_id", keep_alive: "1m" } + sort: + - _shard_doc: {} + search_after: $after5 + - match: { _shards.failed: 0 } + - length: { hits.hits: 2 } + - is_true: hits.hits.0.sort + + - do: + delete_pit: + body: + pit_id: [ "$pit_id" ] diff --git a/server/src/internalClusterTest/java/org/opensearch/search/sort/ShardDocFieldComparatorSourceIT.java b/server/src/internalClusterTest/java/org/opensearch/search/sort/ShardDocFieldComparatorSourceIT.java index df2a83089cd16..ab5131756f34e 100644 --- a/server/src/internalClusterTest/java/org/opensearch/search/sort/ShardDocFieldComparatorSourceIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/search/sort/ShardDocFieldComparatorSourceIT.java @@ -15,7 +15,6 @@ import org.opensearch.search.builder.SearchSourceBuilder; import org.opensearch.test.OpenSearchIntegTestCase; import org.junit.Before; -import org.junit.Test; import java.util.ArrayList; import java.util.List; @@ -35,7 +34,6 @@ public void setupIndex() { ensureGreen(INDEX); } - @Test public void testEmptyIndex() { SearchResponse resp = client().prepareSearch(INDEX).addSort(SortBuilders.shardDocSort().order(SortOrder.ASC)).setSize(10).get(); @@ -45,7 +43,6 @@ public void testEmptyIndex() { assertThat(resp.getHits().getTotalHits().value(), equalTo(0L)); } - @Test public void testSingleDocument() { client().prepareIndex(INDEX).setId("42").setSource("foo", "bar").get(); refresh(); @@ -56,7 +53,6 @@ public void testSingleDocument() { assertThat(resp.getHits().getHits()[0].getId(), equalTo("42")); } - @Test public void testSearchAfterBeyondEndYieldsNoHits() { indexSequentialDocs(5); refresh(); @@ -80,7 +76,6 @@ public void testSearchAfterBeyondEndYieldsNoHits() { assertThat(hits.length, equalTo(0)); } - @Test public void testSearchAfterBeyondEndYieldsNoHits_DESC() throws Exception { indexSequentialDocs(5); refresh(); @@ -102,7 +97,6 @@ public void testSearchAfterBeyondEndYieldsNoHits_DESC() throws Exception { assertThat(resp.getHits().getHits().length, equalTo(0)); } - @Test public void testPrimaryFieldSortThenShardDocTieBreaker() { // force ties on primary for (int i = 1; i <= 30; i++) { @@ -123,12 +117,10 @@ public void testPrimaryFieldSortThenShardDocTieBreaker() { } } - @Test public void testOrderingAscAndPagination() throws Exception { assertShardDocOrdering(SortOrder.ASC, 7, 20); } - @Test public void testOrderingDescAndPagination() throws Exception { assertShardDocOrdering(SortOrder.DESC, 8, 20); } diff --git a/server/src/main/java/org/opensearch/action/search/SearchRequest.java b/server/src/main/java/org/opensearch/action/search/SearchRequest.java index 4a4a309b45a2e..a1e6e7605cbdb 100644 --- a/server/src/main/java/org/opensearch/action/search/SearchRequest.java +++ b/server/src/main/java/org/opensearch/action/search/SearchRequest.java @@ -51,6 +51,9 @@ import org.opensearch.search.builder.PointInTimeBuilder; import org.opensearch.search.builder.SearchSourceBuilder; import org.opensearch.search.internal.SearchContext; +import org.opensearch.search.sort.FieldSortBuilder; +import org.opensearch.search.sort.ShardDocSortBuilder; +import org.opensearch.search.sort.SortBuilder; import org.opensearch.transport.client.Client; import org.opensearch.transport.client.Requests; @@ -349,9 +352,45 @@ public ActionRequestValidationException validate() { validationException = addValidationError("using [point in time] is not allowed in a scroll context", validationException); } } + + // _shard_doc validation + if (source != null && source.sorts() != null && !source.sorts().isEmpty()) { + int shardDocCount = 0; + for (SortBuilder sb : source.sorts()) { + if (isShardDocSort(sb)) shardDocCount++; + } + final boolean hasPit = pointInTimeBuilder() != null; + + if (shardDocCount > 0 && scroll) { + validationException = addValidationError( + "_shard_doc cannot be used with scroll. Use PIT + search_after instead.", + validationException + ); + } + if (shardDocCount > 0 && !hasPit) { + validationException = addValidationError( + "_shard_doc is only supported with point-in-time (PIT). Add a PIT or remove _shard_doc.", + validationException + ); + } + if (shardDocCount > 1) { + validationException = addValidationError( + "duplicate _shard_doc sort detected. Specify it at most once.", + validationException + ); + } + } return validationException; } + private static boolean isShardDocSort(SortBuilder sb) { + if (sb instanceof ShardDocSortBuilder) return true; + if (sb instanceof FieldSortBuilder) { + return ShardDocSortBuilder.NAME.equals(((FieldSortBuilder) sb).getFieldName()); + } + return false; + } + /** * Returns the alias of the cluster that this search request is being executed on. A non-null value indicates that this search request * is being executed as part of a locally reduced cross-cluster search request. The cluster alias is used to prefix index names diff --git a/server/src/main/java/org/opensearch/search/sort/ShardDocFieldComparatorSource.java b/server/src/main/java/org/opensearch/search/sort/ShardDocFieldComparatorSource.java index 34fa3bd61d1c6..47a71fd2fd772 100644 --- a/server/src/main/java/org/opensearch/search/sort/ShardDocFieldComparatorSource.java +++ b/server/src/main/java/org/opensearch/search/sort/ShardDocFieldComparatorSource.java @@ -86,7 +86,7 @@ public int compareTop(int doc) { } private long computeGlobalDocKey(int doc) { - return shardKeyPrefix | (docBase + doc); + return shardKeyPrefix | (docBase + doc); } }; } diff --git a/server/src/main/java/org/opensearch/search/sort/ShardDocSortBuilder.java b/server/src/main/java/org/opensearch/search/sort/ShardDocSortBuilder.java index 366d21382ceb2..71c9906252381 100644 --- a/server/src/main/java/org/opensearch/search/sort/ShardDocSortBuilder.java +++ b/server/src/main/java/org/opensearch/search/sort/ShardDocSortBuilder.java @@ -22,7 +22,7 @@ import java.util.Objects; /** - * Sort builder for the pseudo‐field "_shard_doc", which tiebreaks by {@code (shardOrd << 32) | globalDocId}. + * Sort builder for the pseudo‐field "_shard_doc", which tiebreaks by {@code (shardId << 32) | globalDocId}. */ public class ShardDocSortBuilder extends SortBuilder { @@ -35,11 +35,12 @@ public class ShardDocSortBuilder extends SortBuilder { PARSER.declareString((b, s) -> b.order(SortOrder.fromString(s)), ORDER_FIELD); } - public ShardDocSortBuilder() {} + public ShardDocSortBuilder() { + this.order = SortOrder.ASC; // default to ASC + } public ShardDocSortBuilder(StreamInput in) throws IOException { this.order = SortOrder.readFromStream(in); - } @Override diff --git a/server/src/test/java/org/opensearch/action/search/SearchRequestTests.java b/server/src/test/java/org/opensearch/action/search/SearchRequestTests.java index acda1445bacbb..a8c5b768711e8 100644 --- a/server/src/test/java/org/opensearch/action/search/SearchRequestTests.java +++ b/server/src/test/java/org/opensearch/action/search/SearchRequestTests.java @@ -50,6 +50,10 @@ import org.opensearch.search.builder.SearchSourceBuilder; import org.opensearch.search.fetch.subphase.FetchSourceContext; import org.opensearch.search.rescore.QueryRescorerBuilder; +import org.opensearch.search.sort.FieldSortBuilder; +import org.opensearch.search.sort.ShardDocSortBuilder; +import org.opensearch.search.sort.SortBuilders; +import org.opensearch.search.sort.SortOrder; import org.opensearch.test.OpenSearchTestCase; import org.opensearch.test.VersionUtils; import org.opensearch.test.rest.FakeRestRequest; @@ -238,6 +242,69 @@ public void testValidate() throws IOException { assertEquals(1, validationErrors.validationErrors().size()); assertEquals("using [point in time] is not allowed in a scroll context", validationErrors.validationErrors().get(0)); } + { + // _shard_doc without PIT -> reject + SearchRequest searchRequest = new SearchRequest().source(new SearchSourceBuilder().sort(SortBuilders.shardDocSort())); + ActionRequestValidationException e = searchRequest.validate(); + assertNotNull(e); + assertEquals(1, e.validationErrors().size()); + assertEquals( + "_shard_doc is only supported with point-in-time (PIT). Add a PIT or remove _shard_doc.", + e.validationErrors().get(0) + ); + } + { + // _shard_doc with scroll -> reject (even if PIT is present, scroll is illegal) + SearchRequest searchRequest = new SearchRequest().source( + // include PIT to mirror real usage; scroll + PIT is already invalid, but we assert shard_doc+scroll error is present + new SearchSourceBuilder().pointInTimeBuilder(new PointInTimeBuilder("id")).sort(SortBuilders.shardDocSort()) + ).scroll(TimeValue.timeValueSeconds(30)); + ActionRequestValidationException e = searchRequest.validate(); + assertNotNull(e); + assertTrue( + "Expected shard_doc + scroll error", + e.validationErrors().contains("_shard_doc cannot be used with scroll. Use PIT + search_after instead.") + ); + } + { + // Duplicate _shard_doc with PIT -> reject + SearchRequest searchRequest = new SearchRequest().source( + new SearchSourceBuilder().pointInTimeBuilder(new PointInTimeBuilder("id")) + .sort(SortBuilders.shardDocSort()) // first + .sort(SortBuilders.shardDocSort().order(SortOrder.DESC)) // second + ); + ActionRequestValidationException e = searchRequest.validate(); + assertNotNull(e); + assertEquals(1, e.validationErrors().size()); + assertEquals("duplicate _shard_doc sort detected. Specify it at most once.", e.validationErrors().get(0)); + } + { + // Smuggled as FieldSortBuilder("_shard_doc") without PIT -> reject + SearchRequest searchRequest = new SearchRequest().source( + new SearchSourceBuilder().sort(new FieldSortBuilder(ShardDocSortBuilder.NAME)) + ); + ActionRequestValidationException e = searchRequest.validate(); + assertNotNull(e); + assertEquals(1, e.validationErrors().size()); + assertEquals( + "_shard_doc is only supported with point-in-time (PIT). Add a PIT or remove _shard_doc.", + e.validationErrors().get(0) + ); + } + { + // Good: PIT + _shard_doc -> valid + SearchRequest searchRequest = new SearchRequest().source( + new SearchSourceBuilder().pointInTimeBuilder(new PointInTimeBuilder("id")).sort(SortBuilders.shardDocSort()) + ); + ActionRequestValidationException e = searchRequest.validate(); + assertNull("PIT + _shard_doc should be valid", e); + } + { + // Control: no PIT, no _shard_doc -> valid + SearchRequest searchRequest = new SearchRequest().source(new SearchSourceBuilder().sort("_id", SortOrder.ASC)); + ActionRequestValidationException e = searchRequest.validate(); + assertNull(e); + } } public void testCopyConstructor() throws IOException { From f7d8f73beb2f06b85590bc70de319a7f11a86fb3 Mon Sep 17 00:00:00 2001 From: Lamine Idjeraoui Date: Sun, 14 Sep 2025 10:01:11 -0500 Subject: [PATCH 08/11] change log Signed-off-by: Lamine Idjeraoui --- CHANGELOG.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f329adcd7650a..7838c958fd5dc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -19,7 +19,8 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Publish transport-grpc-spi exposing QueryBuilderProtoConverter and QueryBuilderProtoConverterRegistry ([#18949](https://github.com/opensearch-project/OpenSearch/pull/18949)) - Support system generated search pipeline. ([#19128](https://github.com/opensearch-project/OpenSearch/pull/19128)) - Add `epoch_micros` date format ([#14669](https://github.com/opensearch-project/OpenSearch/issues/14669)) -- Grok processor supports capturing multiple values for same field name ([#18799](https://github.com/opensearch-project/OpenSearch/pull/18799) +- Grok processor supports capturing multiple values for same field name ([#18799](https://github.com/opensearch-project/OpenSearch/pull/18799)) +- Add support for search tie-breaking by _shard_doc ([#18924](https://github.com/opensearch-project/OpenSearch/pull/18924)) ### Changed - Refactor `if-else` chains to use `Java 17 pattern matching switch expressions`(([#18965](https://github.com/opensearch-project/OpenSearch/pull/18965)) From 9467d068c3ee91a7f1a4a426c53300b8f8212d99 Mon Sep 17 00:00:00 2001 From: Lamine Idjeraoui Date: Mon, 15 Sep 2025 09:05:31 -0500 Subject: [PATCH 09/11] some changes to the shard_doc yaml rest test file Signed-off-by: Lamine Idjeraoui --- .../test/search/95_search_after_shard_doc.yml | 138 ++++++------------ 1 file changed, 44 insertions(+), 94 deletions(-) diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/search/95_search_after_shard_doc.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/search/95_search_after_shard_doc.yml index 7cf5dee46b76e..89e6cc5979ea4 100644 --- a/rest-api-spec/src/main/resources/rest-api-spec/test/search/95_search_after_shard_doc.yml +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/search/95_search_after_shard_doc.yml @@ -1,19 +1,10 @@ --- setup: - # Common tiny index used by some validation checks - - do: - indices.create: - index: sharddoc_index - body: - settings: - number_of_shards: 2 - number_of_replicas: 0 - - do: - cluster.health: - wait_for_status: green - index: sharddoc_index + - skip: + version: " - 3.2.99" + reason: "introduced in 3.3.0" - # Multi-shard dataset for happy path + pagination (helps exercise multi-node when available) + # Multi-shard index - do: indices.create: index: sharddoc_paging @@ -25,6 +16,10 @@ setup: properties: id: { type: integer } txt: { type: keyword } + - do: + cluster.health: + wait_for_status: green + index: sharddoc_paging - do: bulk: refresh: true @@ -54,7 +49,6 @@ setup: {"id":11,"txt":"k"} {"index":{}} {"id":12,"txt":"l"} - {"index":{}} {"id":13,"txt":"m"} {"index":{}} @@ -65,7 +59,6 @@ setup: {"id":16,"txt":"p"} {"index":{}} {"id":17,"txt":"q"} - {"index":{}} {"id":18,"txt":"r"} {"index":{}} @@ -86,7 +79,7 @@ setup: - do: catch: bad_request search: - index: sharddoc_index + index: sharddoc_paging body: sort: - _shard_doc @@ -99,7 +92,7 @@ setup: - do: catch: bad_request search: - index: sharddoc_index + index: sharddoc_paging body: sort: - _shard_doc: { } # object form, still invalid without PIT @@ -108,7 +101,6 @@ setup: - match: { error.reason: "/.*_shard_doc is only supported with point-in-time.*|.*PIT.*/" } - # ------------------------------------------------------------------- # HAPPY PATH: PAGINATION WITH PIT ON MULTI-SHARD INDEX # ------------------------------------------------------------------- @@ -125,19 +117,32 @@ setup: - do: search: body: - size: 5 + size: 10 pit: { id: "$pit_id", keep_alive: "1m" } sort: - _shard_doc: {} - match: { _shards.failed: 0 } - - length: { hits.hits: 5 } - - is_true: hits.hits.0.sort - - is_true: hits.hits.1.sort - - is_true: hits.hits.2.sort - - is_true: hits.hits.3.sort - - is_true: hits.hits.4.sort + - length: { hits.hits: 10 } + - is_true: hits.hits.9.sort - - set: { hits.hits.4.sort: after1 } + - set: { hits.hits.9.sort: after1 } + + # Page 2 + - do: + search: + body: + size: 10 + pit: { id: "$pit_id", keep_alive: "1m" } + sort: + - _shard_doc: { } + search_after: $after1 + + - match: { _shards.failed: 0 } + - length: { hits.hits: 10 } + - is_true: hits.hits.9.sort + + - set: { hits.hits.9.sort: after2 } + - set: { hits.hits.9.sort.0: last_value_page2 } # Check that the sort values increase from one hit to the next without ever decreasing. - set: { hits.hits.0.sort.0: prev } @@ -152,88 +157,33 @@ setup: - set: { hits.hits.3.sort.0: prev } - gt: { hits.hits.4.sort.0: $prev } - # Page 2 - - do: - search: - body: - size: 5 - pit: { id: "$pit_id", keep_alive: "1m" } - sort: - - _shard_doc: { } - - match: { _shards.failed: 0 } - - length: { hits.hits: 5 } - - is_true: hits.hits.4.sort - - - set: { hits.hits.4.sort: after2 } - - set: { hits.hits.4.sort.0: last_value_page2 } + - set: { hits.hits.4.sort.0: prev } + - gt: { hits.hits.5.sort.0: $prev } + - set: { hits.hits.5.sort.0: prev } + - gt: { hits.hits.6.sort.0: $prev } - # Page 3 - - do: - search: - body: - size: 5 - pit: { id: "$pit_id", keep_alive: "1m" } - sort: - - _shard_doc: {} - search_after: $after2 - - match: { _shards.failed: 0 } - - length: { hits.hits: 5 } - - is_true: hits.hits.0.sort - - set: { hits.hits.4.sort: after3 } + - set: { hits.hits.6.sort.0: prev } + - gt: { hits.hits.7.sort.0: $prev } - - set: { hits.hits.0.sort.0 : first_value_page3 } - - set: { hits.hits.4.sort.0 : last_value_page3 } - - gt: { $first_value_page3: $last_value_page2 } + - set: { hits.hits.7.sort.0: prev } + - gt: { hits.hits.8.sort.0: $prev } - # Page 4 - - do: - search: - body: - size: 5 - pit: { id: "$pit_id", keep_alive: "1m" } - sort: - - _shard_doc: {} - search_after: $after3 - - match: { _shards.failed: 0 } - - length: { hits.hits: 5 } - - is_true: hits.hits.0.sort - - set: { hits.hits.4.sort: after4 } - - - set: { hits.hits.0.sort.0 : first_value_page4 } - - set: { hits.hits.4.sort.0 : last_value_page4 } - - gt: { $first_value_page4: $last_value_page3 } + - set: { hits.hits.8.sort.0: prev } + - gt: { hits.hits.9.sort.0: $prev } - # Page 5 + # Page 3: drain the rest (22 docs total => 10 + 10 + 2) - do: search: body: - size: 5 + size: 10 pit: { id: "$pit_id", keep_alive: "1m" } sort: - _shard_doc: {} - search_after: $after4 - - match: { _shards.failed: 0 } - - length: { hits.hits: 5 } - - is_true: hits.hits.0.sort - - set: { hits.hits.4.sort: after5 } - - - set: { hits.hits.0.sort.0 : first_value_page5 } - - set: { hits.hits.4.sort.0 : last_value_page5 } - - gt: { $first_value_page5: $last_value_page4 } + search_after: $after2 - # Page 6: drain the rest (22 docs total => 5 + 5 + 5 + 5 + 2) - - do: - search: - body: - size: 5 - pit: { id: "$pit_id", keep_alive: "1m" } - sort: - - _shard_doc: {} - search_after: $after5 - match: { _shards.failed: 0 } - length: { hits.hits: 2 } - - is_true: hits.hits.0.sort - do: delete_pit: From efa56c96100fdc79c79488f283e08322d545cdfb Mon Sep 17 00:00:00 2001 From: Lamine Idjeraoui Date: Sun, 21 Sep 2025 10:56:14 -0500 Subject: [PATCH 10/11] Add PIT param to ShardDocFieldComparatorSourceIT's search requests Signed-off-by: Lamine Idjeraoui --- .../sort/ShardDocFieldComparatorSourceIT.java | 110 +++++++++++++----- 1 file changed, 82 insertions(+), 28 deletions(-) diff --git a/server/src/internalClusterTest/java/org/opensearch/search/sort/ShardDocFieldComparatorSourceIT.java b/server/src/internalClusterTest/java/org/opensearch/search/sort/ShardDocFieldComparatorSourceIT.java index ab5131756f34e..b546fc95c9e11 100644 --- a/server/src/internalClusterTest/java/org/opensearch/search/sort/ShardDocFieldComparatorSourceIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/search/sort/ShardDocFieldComparatorSourceIT.java @@ -8,15 +8,24 @@ package org.opensearch.search.sort; +import org.opensearch.action.search.CreatePitAction; +import org.opensearch.action.search.CreatePitRequest; +import org.opensearch.action.search.CreatePitResponse; +import org.opensearch.action.search.DeletePitAction; +import org.opensearch.action.search.DeletePitRequest; import org.opensearch.action.search.SearchRequest; import org.opensearch.action.search.SearchResponse; +import org.opensearch.common.action.ActionFuture; import org.opensearch.common.settings.Settings; +import org.opensearch.common.unit.TimeValue; import org.opensearch.search.SearchHit; +import org.opensearch.search.builder.PointInTimeBuilder; import org.opensearch.search.builder.SearchSourceBuilder; import org.opensearch.test.OpenSearchIntegTestCase; import org.junit.Before; import java.util.ArrayList; +import java.util.Collections; import java.util.List; import static org.hamcrest.Matchers.equalTo; @@ -34,30 +43,45 @@ public void setupIndex() { ensureGreen(INDEX); } - public void testEmptyIndex() { - SearchResponse resp = client().prepareSearch(INDEX).addSort(SortBuilders.shardDocSort().order(SortOrder.ASC)).setSize(10).get(); + public void testEmptyIndex() throws Exception { + String pitId = openPit(INDEX, TimeValue.timeValueMinutes(1)); + SearchSourceBuilder ssb = new SearchSourceBuilder().size(10) + .sort(SortBuilders.shardDocSort().order(SortOrder.ASC)) + .pointInTimeBuilder(pit(pitId)); + SearchResponse resp = client().search(new SearchRequest(INDEX).source(ssb)).actionGet(); // no hits at all SearchHit[] hits = resp.getHits().getHits(); assertThat(hits.length, equalTo(0)); assertThat(resp.getHits().getTotalHits().value(), equalTo(0L)); + closePit(pitId); } - public void testSingleDocument() { + public void testSingleDocument() throws Exception { client().prepareIndex(INDEX).setId("42").setSource("foo", "bar").get(); refresh(); - SearchResponse resp = client().prepareSearch(INDEX).addSort(SortBuilders.shardDocSort().order(SortOrder.ASC)).setSize(5).get(); + String pitId = openPit(INDEX, TimeValue.timeValueMinutes(1)); + SearchSourceBuilder ssb = new SearchSourceBuilder().size(5) + .sort(SortBuilders.shardDocSort().order(SortOrder.ASC)) + .pointInTimeBuilder(pit(pitId)); + SearchResponse resp = client().search(new SearchRequest(INDEX).source(ssb)).actionGet(); assertThat(resp.getHits().getTotalHits().value(), equalTo(1L)); assertThat(resp.getHits().getHits()[0].getId(), equalTo("42")); + closePit(pitId); } - public void testSearchAfterBeyondEndYieldsNoHits() { + public void testSearchAfterBeyondEndYieldsNoHits() throws Exception { indexSequentialDocs(5); refresh(); List allKeys = new ArrayList<>(); - SearchSourceBuilder ssb = new SearchSourceBuilder().size(5).sort(SortBuilders.shardDocSort().order(SortOrder.ASC)); + + String pitId = openPit(INDEX, TimeValue.timeValueMinutes(1)); + SearchSourceBuilder ssb = new SearchSourceBuilder().size(5) + .sort(SortBuilders.shardDocSort().order(SortOrder.ASC)) + .pointInTimeBuilder(pit(pitId)); + SearchResponse resp0 = client().search(new SearchRequest(INDEX).source(ssb)).actionGet(); // collect first page for (SearchHit hit : resp0.getHits().getHits()) { @@ -66,14 +90,16 @@ public void testSearchAfterBeyondEndYieldsNoHits() { } long globalMax = allKeys.get(allKeys.size() - 1); - SearchResponse resp = client().prepareSearch(INDEX) - .addSort(SortBuilders.shardDocSort().order(SortOrder.ASC)) - .setSize(3) - .searchAfter(new Object[] { globalMax + 1 }) - .get(); + SearchSourceBuilder next = new SearchSourceBuilder().size(3) + .sort(SortBuilders.shardDocSort().order(SortOrder.ASC)) + .pointInTimeBuilder(pit(pitId)) + .searchAfter(new Object[] { globalMax + 1 }); + + SearchResponse resp = client().search(new SearchRequest(INDEX).source(next)).actionGet(); SearchHit[] hits = resp.getHits().getHits(); assertThat(hits.length, equalTo(0)); + closePit(pitId); } public void testSearchAfterBeyondEndYieldsNoHits_DESC() throws Exception { @@ -81,30 +107,36 @@ public void testSearchAfterBeyondEndYieldsNoHits_DESC() throws Exception { refresh(); // First page: _shard_doc DESC, grab the SMALLEST key (last hit on the page) - SearchSourceBuilder ssb = new SearchSourceBuilder().size(5).sort(SortBuilders.shardDocSort().order(SortOrder.DESC)); - SearchResponse first = client().search(new SearchRequest(INDEX).source(ssb)).actionGet(); + String pitId = openPit(INDEX, TimeValue.timeValueMinutes(1)); + SearchSourceBuilder ssb = new SearchSourceBuilder().size(5) + .sort(SortBuilders.shardDocSort().order(SortOrder.DESC)) + .pointInTimeBuilder(pit(pitId)); + SearchResponse first = client().search(new SearchRequest(INDEX).source(ssb)).actionGet(); assertThat(first.getHits().getHits().length, equalTo(5)); - long minKey = ((Number) first.getHits().getHits()[4].getSortValues()[0]).longValue(); // smallest in DESC page // Probe strictly beyond the end for DESC: use search_after < min (min - 1) => expect 0 hits - SearchResponse resp = client().prepareSearch(INDEX) - .addSort(SortBuilders.shardDocSort().order(SortOrder.DESC)) - .setSize(3) - .searchAfter(new Object[] { minKey - 1 }) - .get(); + long minKey = ((Number) first.getHits().getHits()[4].getSortValues()[0]).longValue(); // smallest in DESC page + SearchSourceBuilder probe = new SearchSourceBuilder().size(3) + .sort(SortBuilders.shardDocSort().order(SortOrder.DESC)) + .pointInTimeBuilder(pit(pitId)) + .searchAfter(new Object[] { minKey - 1 }); + SearchResponse resp = client().search(new SearchRequest(INDEX).source(probe)).actionGet(); assertThat(resp.getHits().getHits().length, equalTo(0)); + closePit(pitId); } - public void testPrimaryFieldSortThenShardDocTieBreaker() { + public void testPrimaryFieldSortThenShardDocTieBreaker() throws Exception { // force ties on primary for (int i = 1; i <= 30; i++) { client().prepareIndex(INDEX).setId(Integer.toString(i)).setSource("val", 123).get(); } refresh(); + String pitId = openPit(INDEX, TimeValue.timeValueMinutes(1)); var shardDocKeys = collectAllSortKeys( + pitId, 10, 1, new FieldSortBuilder("val").order(SortOrder.ASC), @@ -115,6 +147,7 @@ public void testPrimaryFieldSortThenShardDocTieBreaker() { for (int i = 1; i < shardDocKeys.size(); i++) { assertThat(shardDocKeys.get(i), greaterThan(shardDocKeys.get(i - 1))); } + closePit(pitId); } public void testOrderingAscAndPagination() throws Exception { @@ -125,12 +158,13 @@ public void testOrderingDescAndPagination() throws Exception { assertShardDocOrdering(SortOrder.DESC, 8, 20); } - private void assertShardDocOrdering(SortOrder order, int pageSize, int expectedCount) { + private void assertShardDocOrdering(SortOrder order, int pageSize, int expectedCount) throws Exception { indexSequentialDocs(expectedCount); refresh(); + String pitId = openPit(INDEX, TimeValue.timeValueMinutes(1)); // shardDocIndex = 0 because we're only sorting by _shard_doc here - List keys = collectAllSortKeys(pageSize, 0, SortBuilders.shardDocSort().order(order)); + List keys = collectAllSortKeys(pitId, pageSize, 0, SortBuilders.shardDocSort().order(order)); assertThat(keys.size(), equalTo(expectedCount)); @@ -141,19 +175,20 @@ private void assertShardDocOrdering(SortOrder order, int pageSize, int expectedC assertThat("not strictly decreasing at i=" + i, keys.get(i), lessThan(keys.get(i - 1))); } } + closePit(pitId); } // Generic paginator: works for 1 or many sort keys. // - pageSize: page size // - shardDocIndex: which position in sortValues[] // - sorts: the full sort list to apply (e.g., only _shard_doc, or primary then _shard_doc) - private List collectAllSortKeys(int pageSize, int shardDocIndex, SortBuilder... sorts) { + private List collectAllSortKeys(String pitId, int pageSize, int shardDocIndex, SortBuilder... sorts) { List all = new ArrayList<>(); - SearchSourceBuilder ssb = new SearchSourceBuilder().size(pageSize); - for (var s : sorts) + SearchSourceBuilder ssb = new SearchSourceBuilder().size(pageSize).pointInTimeBuilder(pit(pitId)); + for (var s : sorts) { ssb.sort(s); - + } SearchResponse resp = client().search(new SearchRequest(INDEX).source(ssb)).actionGet(); while (true) { @@ -167,9 +202,10 @@ private List collectAllSortKeys(int pageSize, int shardDocIndex, SortBuild // use the FULL last sortValues[] as search_after for correctness Object[] nextAfter = resp.getHits().getHits()[resp.getHits().getHits().length - 1].getSortValues(); - ssb = new SearchSourceBuilder().size(pageSize); - for (var s : sorts) + ssb = new SearchSourceBuilder().size(pageSize).pointInTimeBuilder(pit(pitId)); + for (var s : sorts) { ssb.sort(s); + } ssb.searchAfter(nextAfter); resp = client().search(new SearchRequest(INDEX).source(ssb)).actionGet(); @@ -186,4 +222,22 @@ private void indexSequentialDocs(int count) { .get(); } } + + private String openPit(String index, TimeValue keepAlive) throws Exception { + CreatePitRequest request = new CreatePitRequest(keepAlive, true); + request.setIndices(new String[] { index }); + ActionFuture execute = client().execute(CreatePitAction.INSTANCE, request); + CreatePitResponse pitResponse = execute.get(); + return pitResponse.getId(); + } + + private void closePit(String pitId) { + if (pitId == null) return; + DeletePitRequest del = new DeletePitRequest(Collections.singletonList(pitId)); + client().execute(DeletePitAction.INSTANCE, del).actionGet(); + } + + private static PointInTimeBuilder pit(String pitId) { + return new PointInTimeBuilder(pitId).setKeepAlive(TimeValue.timeValueMinutes(1)); + } } From 60f438dade0d222f343a9b3a5c9f0e48bd68510a Mon Sep 17 00:00:00 2001 From: Lamine Idjeraoui Date: Fri, 26 Sep 2025 18:48:36 -0500 Subject: [PATCH 11/11] add more logic for shard_doc parsing add more test cases Signed-off-by: Lamine Idjeraoui --- .../sort/ShardDocFieldComparatorSourceIT.java | 472 ++++++++++++++---- .../search/sort/ShardDocSortBuilder.java | 34 +- .../opensearch/search/sort/SortBuilder.java | 2 + .../action/search/SearchRequestTests.java | 82 ++- .../search/sort/ShardDocSortBuilderTests.java | 120 +++++ 5 files changed, 600 insertions(+), 110 deletions(-) create mode 100644 server/src/test/java/org/opensearch/search/sort/ShardDocSortBuilderTests.java diff --git a/server/src/internalClusterTest/java/org/opensearch/search/sort/ShardDocFieldComparatorSourceIT.java b/server/src/internalClusterTest/java/org/opensearch/search/sort/ShardDocFieldComparatorSourceIT.java index b546fc95c9e11..33cd5d42bf4d7 100644 --- a/server/src/internalClusterTest/java/org/opensearch/search/sort/ShardDocFieldComparatorSourceIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/search/sort/ShardDocFieldComparatorSourceIT.java @@ -26,10 +26,12 @@ import java.util.ArrayList; import java.util.Collections; +import java.util.HashSet; import java.util.List; import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.greaterThan; +import static org.hamcrest.Matchers.instanceOf; import static org.hamcrest.Matchers.lessThan; @OpenSearchIntegTestCase.ClusterScope(numDataNodes = 2, supportsDedicatedMasters = false) @@ -45,31 +47,38 @@ public void setupIndex() { public void testEmptyIndex() throws Exception { String pitId = openPit(INDEX, TimeValue.timeValueMinutes(1)); - SearchSourceBuilder ssb = new SearchSourceBuilder().size(10) - .sort(SortBuilders.shardDocSort().order(SortOrder.ASC)) - .pointInTimeBuilder(pit(pitId)); - SearchResponse resp = client().search(new SearchRequest(INDEX).source(ssb)).actionGet(); - - // no hits at all - SearchHit[] hits = resp.getHits().getHits(); - assertThat(hits.length, equalTo(0)); - assertThat(resp.getHits().getTotalHits().value(), equalTo(0L)); - closePit(pitId); + try { + SearchSourceBuilder ssb = new SearchSourceBuilder().size(10) + .sort(SortBuilders.shardDocSort().order(SortOrder.ASC)) + .pointInTimeBuilder(pit(pitId)); + SearchResponse resp = client().search(new SearchRequest(INDEX).source(ssb)).actionGet(); + + // no hits at all + SearchHit[] hits = resp.getHits().getHits(); + assertThat(hits.length, equalTo(0)); + assertThat(resp.getHits().getTotalHits().value(), equalTo(0L)); + } finally { + closePit(pitId); + } } public void testSingleDocument() throws Exception { client().prepareIndex(INDEX).setId("42").setSource("foo", "bar").get(); refresh(); - String pitId = openPit(INDEX, TimeValue.timeValueMinutes(1)); - SearchSourceBuilder ssb = new SearchSourceBuilder().size(5) - .sort(SortBuilders.shardDocSort().order(SortOrder.ASC)) - .pointInTimeBuilder(pit(pitId)); - SearchResponse resp = client().search(new SearchRequest(INDEX).source(ssb)).actionGet(); - - assertThat(resp.getHits().getTotalHits().value(), equalTo(1L)); - assertThat(resp.getHits().getHits()[0].getId(), equalTo("42")); - closePit(pitId); + String pitId = null; + try { + pitId = openPit(INDEX, TimeValue.timeValueMinutes(1)); + SearchSourceBuilder ssb = new SearchSourceBuilder().size(5) + .sort(SortBuilders.shardDocSort().order(SortOrder.ASC)) + .pointInTimeBuilder(pit(pitId)); + SearchResponse resp = client().search(new SearchRequest(INDEX).source(ssb)).actionGet(); + + assertThat(resp.getHits().getTotalHits().value(), equalTo(1L)); + assertThat(resp.getHits().getHits()[0].getId(), equalTo("42")); + } finally { + closePit(pitId); + } } public void testSearchAfterBeyondEndYieldsNoHits() throws Exception { @@ -77,54 +86,64 @@ public void testSearchAfterBeyondEndYieldsNoHits() throws Exception { refresh(); List allKeys = new ArrayList<>(); - String pitId = openPit(INDEX, TimeValue.timeValueMinutes(1)); - SearchSourceBuilder ssb = new SearchSourceBuilder().size(5) - .sort(SortBuilders.shardDocSort().order(SortOrder.ASC)) - .pointInTimeBuilder(pit(pitId)); + String pitId = null; + try { + pitId = openPit(INDEX, TimeValue.timeValueMinutes(1)); + SearchSourceBuilder ssb = new SearchSourceBuilder().size(5) + .sort(SortBuilders.shardDocSort().order(SortOrder.ASC)) + .pointInTimeBuilder(pit(pitId)); - SearchResponse resp0 = client().search(new SearchRequest(INDEX).source(ssb)).actionGet(); - // collect first page - for (SearchHit hit : resp0.getHits().getHits()) { - Object[] sv = hit.getSortValues(); - allKeys.add(((Number) sv[0]).longValue()); - } + SearchResponse resp0 = client().search(new SearchRequest(INDEX).source(ssb)).actionGet(); + // collect first page + for (SearchHit hit : resp0.getHits().getHits()) { + Object[] sv = hit.getSortValues(); + allKeys.add(((Number) sv[0]).longValue()); + } - long globalMax = allKeys.get(allKeys.size() - 1); + long globalMax = allKeys.get(allKeys.size() - 1); - SearchSourceBuilder next = new SearchSourceBuilder().size(3) - .sort(SortBuilders.shardDocSort().order(SortOrder.ASC)) - .pointInTimeBuilder(pit(pitId)) - .searchAfter(new Object[] { globalMax + 1 }); + SearchSourceBuilder next = new SearchSourceBuilder().size(3) + .sort(SortBuilders.shardDocSort().order(SortOrder.ASC)) + .pointInTimeBuilder(pit(pitId)) + .searchAfter(new Object[] { globalMax + 1 }); - SearchResponse resp = client().search(new SearchRequest(INDEX).source(next)).actionGet(); - SearchHit[] hits = resp.getHits().getHits(); - assertThat(hits.length, equalTo(0)); - closePit(pitId); + SearchResponse resp = client().search(new SearchRequest(INDEX).source(next)).actionGet(); + SearchHit[] hits = resp.getHits().getHits(); + assertThat(hits.length, equalTo(0)); + + } finally { + closePit(pitId); + } } public void testSearchAfterBeyondEndYieldsNoHits_DESC() throws Exception { indexSequentialDocs(5); refresh(); - // First page: _shard_doc DESC, grab the SMALLEST key (last hit on the page) - String pitId = openPit(INDEX, TimeValue.timeValueMinutes(1)); - SearchSourceBuilder ssb = new SearchSourceBuilder().size(5) - .sort(SortBuilders.shardDocSort().order(SortOrder.DESC)) - .pointInTimeBuilder(pit(pitId)); - - SearchResponse first = client().search(new SearchRequest(INDEX).source(ssb)).actionGet(); - assertThat(first.getHits().getHits().length, equalTo(5)); - - // Probe strictly beyond the end for DESC: use search_after < min (min - 1) => expect 0 hits - long minKey = ((Number) first.getHits().getHits()[4].getSortValues()[0]).longValue(); // smallest in DESC page - SearchSourceBuilder probe = new SearchSourceBuilder().size(3) - .sort(SortBuilders.shardDocSort().order(SortOrder.DESC)) - .pointInTimeBuilder(pit(pitId)) - .searchAfter(new Object[] { minKey - 1 }); - - SearchResponse resp = client().search(new SearchRequest(INDEX).source(probe)).actionGet(); - assertThat(resp.getHits().getHits().length, equalTo(0)); - closePit(pitId); + String pitId = null; + try { + // First page: _shard_doc DESC, grab the SMALLEST key (last hit on the page) + pitId = openPit(INDEX, TimeValue.timeValueMinutes(1)); + SearchSourceBuilder ssb = new SearchSourceBuilder().size(5) + .sort(SortBuilders.shardDocSort().order(SortOrder.DESC)) + .pointInTimeBuilder(pit(pitId)); + + SearchResponse first = client().search(new SearchRequest(INDEX).source(ssb)).actionGet(); + assertThat(first.getHits().getHits().length, equalTo(5)); + + // Probe strictly beyond the end for DESC: use search_after < min (min - 1) => expect 0 hits + long minKey = ((Number) first.getHits().getHits()[4].getSortValues()[0]).longValue(); // smallest in DESC page + SearchSourceBuilder probe = new SearchSourceBuilder().size(3) + .sort(SortBuilders.shardDocSort().order(SortOrder.DESC)) + .pointInTimeBuilder(pit(pitId)) + .searchAfter(new Object[] { minKey - 1 }); + + SearchResponse resp = client().search(new SearchRequest(INDEX).source(probe)).actionGet(); + assertThat(resp.getHits().getHits().length, equalTo(0)); + + } finally { + closePit(pitId); + } } public void testPrimaryFieldSortThenShardDocTieBreaker() throws Exception { @@ -134,83 +153,275 @@ public void testPrimaryFieldSortThenShardDocTieBreaker() throws Exception { } refresh(); - String pitId = openPit(INDEX, TimeValue.timeValueMinutes(1)); - var shardDocKeys = collectAllSortKeys( - pitId, - 10, - 1, - new FieldSortBuilder("val").order(SortOrder.ASC), - SortBuilders.shardDocSort().order(SortOrder.ASC) - ); - - assertThat(shardDocKeys.size(), equalTo(30)); - for (int i = 1; i < shardDocKeys.size(); i++) { - assertThat(shardDocKeys.get(i), greaterThan(shardDocKeys.get(i - 1))); + List shardDocKeys = new ArrayList<>(); + String pitId = null; + try { + pitId = openPit(INDEX, TimeValue.timeValueMinutes(1)); + collectIdsAndSortKeys( + INDEX, + pitId, + 10, + 1, + null, + shardDocKeys, + new FieldSortBuilder("val").order(SortOrder.ASC), + SortBuilders.shardDocSort().order(SortOrder.ASC) + ); + + assertThat(shardDocKeys.size(), equalTo(30)); + for (int i = 1; i < shardDocKeys.size(); i++) { + assertThat(shardDocKeys.get(i), greaterThan(shardDocKeys.get(i - 1))); + } + } finally { + closePit(pitId); } - closePit(pitId); } public void testOrderingAscAndPagination() throws Exception { - assertShardDocOrdering(SortOrder.ASC, 7, 20); + assertShardDocOrdering(SortOrder.ASC); } public void testOrderingDescAndPagination() throws Exception { - assertShardDocOrdering(SortOrder.DESC, 8, 20); + assertShardDocOrdering(SortOrder.DESC); } - private void assertShardDocOrdering(SortOrder order, int pageSize, int expectedCount) throws Exception { - indexSequentialDocs(expectedCount); + private void assertShardDocOrdering(SortOrder order) throws Exception { + int pageSize = randomIntBetween(5, 23); + int totalDocs = randomIntBetween(73, 187); + indexSequentialDocs(totalDocs); + refresh(); + + String pitId = null; + try { + pitId = openPit(INDEX, TimeValue.timeValueMinutes(1)); + List shardDocKeys = new ArrayList<>(); + // shardDocIndex = 0 because we're only sorting by _shard_doc here + collectIdsAndSortKeys(INDEX, pitId, pageSize, 0, null, shardDocKeys, SortBuilders.shardDocSort().order(order)); + + assertThat(shardDocKeys.size(), equalTo(totalDocs)); + + for (int i = 1; i < shardDocKeys.size(); i++) { + if (order == SortOrder.ASC) { + assertThat("not strictly increasing at i=" + i, shardDocKeys.get(i), greaterThan(shardDocKeys.get(i - 1))); + } else { + assertThat("not strictly decreasing at i=" + i, shardDocKeys.get(i), lessThan(shardDocKeys.get(i - 1))); + } + } + + } finally { + closePit(pitId); + } + } + + public void testPageLocalMonotonicity_ASC() throws Exception { + indexSequentialDocs(20); refresh(); String pitId = openPit(INDEX, TimeValue.timeValueMinutes(1)); - // shardDocIndex = 0 because we're only sorting by _shard_doc here - List keys = collectAllSortKeys(pitId, pageSize, 0, SortBuilders.shardDocSort().order(order)); + SearchSourceBuilder ssb = new SearchSourceBuilder().size(10) + .sort(SortBuilders.shardDocSort().order(SortOrder.ASC)) + .pointInTimeBuilder(pit(pitId)); + + SearchResponse resp = client().search(new SearchRequest(INDEX).source(ssb)).actionGet(); + SearchHit[] hits = resp.getHits().getHits(); + for (int i = 1; i < hits.length; i++) { + long prev = ((Number) hits[i - 1].getSortValues()[0]).longValue(); + long cur = ((Number) hits[i].getSortValues()[0]).longValue(); + assertThat("regression at i=" + i, cur, greaterThan(prev)); + } + closePit(pitId); + } + + // No duplicates across the whole scan (ASC & DESC). + public void testNoDuplicatesAcrossScan_ASC_DESC() throws Exception { + indexSequentialDocs(123); + refresh(); + + String pitId = null; + try { + pitId = openPit(INDEX, TimeValue.timeValueMinutes(1)); + List idsAsc = new ArrayList<>(); + List shardDocKeys = new ArrayList<>(); + + // ASC + collectIdsAndSortKeys(INDEX, pitId, 13, 0, idsAsc, shardDocKeys, SortBuilders.shardDocSort().order(SortOrder.ASC)); + assertThat(idsAsc.size(), equalTo(123)); + assertThat(new HashSet<>(idsAsc).size(), equalTo(idsAsc.size())); + + // DESC + List idsDesc = new ArrayList<>(); + collectIdsAndSortKeys(INDEX, pitId, 17, 0, idsDesc, shardDocKeys, SortBuilders.shardDocSort().order(SortOrder.DESC)); + assertThat(idsDesc.size(), equalTo(123)); + assertThat(new HashSet<>(idsDesc).size(), equalTo(idsDesc.size())); + } finally { + closePit(pitId); + } + } + + // Resume from the middle of a page (ASC). + public void testResumeFromMiddleOfPage_ASC() throws Exception { + indexSequentialDocs(60); + refresh(); - assertThat(keys.size(), equalTo(expectedCount)); + String pitId = null; + try { + pitId = openPit(INDEX, TimeValue.timeValueMinutes(1)); - for (int i = 1; i < keys.size(); i++) { - if (order == SortOrder.ASC) { - assertThat("not strictly increasing at i=" + i, keys.get(i), greaterThan(keys.get(i - 1))); - } else { - assertThat("not strictly decreasing at i=" + i, keys.get(i), lessThan(keys.get(i - 1))); + // First page to pick a middle anchor + SearchSourceBuilder firstPage = new SearchSourceBuilder().size(10) + .sort(SortBuilders.shardDocSort().order(SortOrder.ASC)) + .pointInTimeBuilder(pit(pitId)); + SearchResponse r1 = client().search(new SearchRequest(INDEX).source(firstPage)).actionGet(); + assertThat(r1.getHits().getHits().length, equalTo(10)); + + int mid = 4; + Object[] midSort = r1.getHits().getHits()[mid].getSortValues(); + + // Collect IDs = first page up to 'mid' (inclusive), then resume from mid sort tuple + List ids = new ArrayList<>(); + for (int i = 0; i <= mid; i++) { + ids.add(r1.getHits().getHits()[i].getId()); + } + + SearchSourceBuilder resume = new SearchSourceBuilder().size(10) + .sort(SortBuilders.shardDocSort().order(SortOrder.ASC)) + .pointInTimeBuilder(pit(pitId)) + .searchAfter(midSort); + SearchResponse resp = client().search(new SearchRequest(INDEX).source(resume)).actionGet(); + + while (true) { + SearchHit[] hits = resp.getHits().getHits(); + // should start strictly after the anchor + for (SearchHit h : hits) + ids.add(h.getId()); + if (hits.length < 10) break; + Object[] after = hits[hits.length - 1].getSortValues(); + + resume = new SearchSourceBuilder().size(10) + .sort(SortBuilders.shardDocSort().order(SortOrder.ASC)) + .pointInTimeBuilder(pit(pitId)) + .searchAfter(after); + resp = client().search(new SearchRequest(INDEX).source(resume)).actionGet(); } + + // Should cover all 60 docs exactly once + assertThat(ids.size(), equalTo(60)); + assertThat(new HashSet<>(ids).size(), equalTo(60)); + } finally { + closePit(pitId); } - closePit(pitId); } - // Generic paginator: works for 1 or many sort keys. - // - pageSize: page size - // - shardDocIndex: which position in sortValues[] - // - sorts: the full sort list to apply (e.g., only _shard_doc, or primary then _shard_doc) - private List collectAllSortKeys(String pitId, int pageSize, int shardDocIndex, SortBuilder... sorts) { - List all = new ArrayList<>(); + // Tiny page sizes (size=1 and size=2) with strict monotonicity & no dupes. + public void testTinyPageSizes_ASC() throws Exception { + indexSequentialDocs(41); + refresh(); - SearchSourceBuilder ssb = new SearchSourceBuilder().size(pageSize).pointInTimeBuilder(pit(pitId)); - for (var s : sorts) { - ssb.sort(s); + for (int pageSize : new int[] { 1, 2 }) { + String pitId = null; + try { + pitId = openPit(INDEX, TimeValue.timeValueMinutes(1)); + List keys = new ArrayList<>(); + collectIdsAndSortKeys(INDEX, pitId, pageSize, 0, null, keys, SortBuilders.shardDocSort().order(SortOrder.ASC)); + + assertThat(keys.size(), equalTo(41)); + for (int i = 1; i < keys.size(); i++) { + assertThat(keys.get(i), greaterThan(keys.get(i - 1))); + } + } finally { + closePit(pitId); + } } - SearchResponse resp = client().search(new SearchRequest(INDEX).source(ssb)).actionGet(); + } - while (true) { - for (SearchHit hit : resp.getHits().getHits()) { - Object[] sv = hit.getSortValues(); - all.add(((Number) sv[shardDocIndex]).longValue()); + // Replicas enabled: still strict order and no dupes. + public void testWithReplicasEnabled_ASC() throws Exception { + final String repIdx = INDEX + "_repl"; + createIndex(repIdx, Settings.builder().put("index.number_of_shards", 3).put("index.number_of_replicas", 1).build()); + ensureGreen(repIdx); + + for (int i = 1; i <= 100; i++) { + client().prepareIndex(repIdx).setId(Integer.toString(i)).setSource("v", i).get(); + } + refresh(repIdx); + + String pitId = null; + try { + pitId = openPit(repIdx, TimeValue.timeValueMinutes(1)); + List keys = new ArrayList<>(); + List ids = new ArrayList<>(); + collectIdsAndSortKeys(repIdx, pitId, 11, 0, ids, keys, SortBuilders.shardDocSort().order(SortOrder.ASC)); + assertThat(keys.size(), equalTo(100)); + for (int i = 1; i < keys.size(); i++) { + assertThat(keys.get(i), greaterThan(keys.get(i - 1))); } - // stop if last page - if (resp.getHits().getHits().length < pageSize) break; + // also IDs unique + // List ids = collectAllIds(repIdx, pitId, 11, SortBuilders.shardDocSort().order(SortOrder.ASC)); + assertThat(new HashSet<>(ids).size(), equalTo(ids.size())); + } finally { + closePit(pitId); + } + } - // use the FULL last sortValues[] as search_after for correctness - Object[] nextAfter = resp.getHits().getHits()[resp.getHits().getHits().length - 1].getSortValues(); + // Boundary equality: using the exact last sort tuple as search_after should not duplicate the boundary doc. + public void testBoundaryEqualityNoOverlap_ASC() throws Exception { + indexSequentialDocs(30); + refresh(); - ssb = new SearchSourceBuilder().size(pageSize).pointInTimeBuilder(pit(pitId)); - for (var s : sorts) { - ssb.sort(s); + String pitId = null; + try { + pitId = openPit(INDEX, TimeValue.timeValueMinutes(1)); + + SearchSourceBuilder p1 = new SearchSourceBuilder().size(7) + .sort(SortBuilders.shardDocSort().order(SortOrder.ASC)) + .pointInTimeBuilder(pit(pitId)); + SearchResponse r1 = client().search(new SearchRequest(INDEX).source(p1)).actionGet(); + SearchHit[] hits1 = r1.getHits().getHits(); + SearchHit lastOfPage1 = hits1[hits1.length - 1]; + + SearchSourceBuilder p2 = new SearchSourceBuilder().size(7) + .sort(SortBuilders.shardDocSort().order(SortOrder.ASC)) + .pointInTimeBuilder(pit(pitId)) + .searchAfter(lastOfPage1.getSortValues()); + SearchResponse r2 = client().search(new SearchRequest(INDEX).source(p2)).actionGet(); + SearchHit[] hits2 = r2.getHits().getHits(); + + if (hits2.length > 0) { + assertNotEquals("no overlap with boundary", lastOfPage1.getId(), hits2[0].getId()); } - ssb.searchAfter(nextAfter); + } finally { + closePit(pitId); + } + } - resp = client().search(new SearchRequest(INDEX).source(ssb)).actionGet(); + // Large corpus, odd page sizes, multi-shard interleaving stress. + public void testLargeCorpusInterleaving_ASC() throws Exception { + final String bigIdx = INDEX + "_big"; + createIndex(bigIdx, Settings.builder().put("index.number_of_shards", 5).put("index.number_of_replicas", 1).build()); + ensureGreen(TimeValue.timeValueSeconds(60), bigIdx); + + for (int i = 1; i <= 2000; i++) { + client().prepareIndex(bigIdx).setId(Integer.toString(i)).setSource("v", i).get(); + } + refresh(bigIdx); + + String pitId = null; + try { + pitId = openPit(bigIdx, TimeValue.timeValueMinutes(1)); + // odd page sizes to stress boundaries + int[] sizes = new int[] { 13, 17, 19, 23, 31 }; + for (int sz : sizes) { + List keys = new ArrayList<>(); + // shardDocIndex=0 since only shard_doc is sorted + collectIdsAndSortKeys(INDEX, pitId, sz, 0, null, keys, SortBuilders.shardDocSort().order(SortOrder.ASC)); + assertThat(keys.size(), equalTo(2000)); + for (int i = 1; i < keys.size(); i++) { + assertThat(keys.get(i), greaterThan(keys.get(i - 1))); + } + } + } finally { + closePit(pitId); } - return all; } private void indexSequentialDocs(int count) { @@ -240,4 +451,51 @@ private void closePit(String pitId) { private static PointInTimeBuilder pit(String pitId) { return new PointInTimeBuilder(pitId).setKeepAlive(TimeValue.timeValueMinutes(1)); } + + // Generic paginator: works for 1 or many sort keys. + // - pageSize: page size + // - shardDocIndex: which position in sortValues[] + // - sorts: the full sort list to apply (e.g., only _shard_doc, or primary then _shard_doc) + private void collectIdsAndSortKeys( + String index, + String pitId, + int pageSize, + int shardDocIndex, + List ids, + List keys, + SortBuilder... sorts + ) { + SearchSourceBuilder ssb = new SearchSourceBuilder().size(pageSize).pointInTimeBuilder(pit(pitId)); + for (var s : sorts) { + ssb.sort(s); + } + SearchResponse resp = client().search(new SearchRequest(index).source(ssb)).actionGet(); + + while (true) { + SearchHit[] hits = resp.getHits().getHits(); + for (SearchHit hit : hits) { + Object[] sv = hit.getSortValues(); + assertNotNull("every hit must have sort", sv); + assertTrue("shard_doc should be present", shardDocIndex < sv.length); + assertThat("sort key must be a Long", sv[shardDocIndex], instanceOf(Long.class)); + long k = (Long) sv[shardDocIndex]; + keys.add(k); + if (ids != null) { + ids.add(hit.getId()); + } + } + // stop if last page + if (hits.length < pageSize) break; + + // use the FULL last sortValues[] as search_after for correctness + Object[] nextAfter = hits[hits.length - 1].getSortValues(); + ssb = new SearchSourceBuilder().size(pageSize).pointInTimeBuilder(pit(pitId)); + for (var s : sorts) { + ssb.sort(s); + } + ssb.searchAfter(nextAfter); + + resp = client().search(new SearchRequest(index).source(ssb)).actionGet(); + } + } } diff --git a/server/src/main/java/org/opensearch/search/sort/ShardDocSortBuilder.java b/server/src/main/java/org/opensearch/search/sort/ShardDocSortBuilder.java index 71c9906252381..99cebaeed3474 100644 --- a/server/src/main/java/org/opensearch/search/sort/ShardDocSortBuilder.java +++ b/server/src/main/java/org/opensearch/search/sort/ShardDocSortBuilder.java @@ -43,20 +43,52 @@ public ShardDocSortBuilder(StreamInput in) throws IOException { this.order = SortOrder.readFromStream(in); } + public ShardDocSortBuilder(ShardDocSortBuilder other) { + this.order = other.order; + } + @Override public void writeTo(StreamOutput out) throws IOException { order.writeTo(out); } public static ShardDocSortBuilder fromXContent(XContentParser parser, String fieldName) throws IOException { - return PARSER.parse(parser, null); + XContentParser.Token token = parser.currentToken(); + if (token == XContentParser.Token.FIELD_NAME) { + token = parser.nextToken(); + } + + switch (token) { + case START_OBJECT: + return PARSER.parse(parser, null); // { "_shard_doc": { "order": "asc" } } + + case VALUE_STRING: + case VALUE_NUMBER: + case VALUE_BOOLEAN: + case VALUE_NULL: + return new ShardDocSortBuilder(); // Scalar shorthand: "_shard_doc" → defaults to ASC + + case START_ARRAY: + throw new org.opensearch.core.xcontent.XContentParseException( + parser.getTokenLocation(), + "[" + NAME + "] Expected START_OBJECT or scalar but was: START_ARRAY" + ); + + default: + throw new org.opensearch.core.xcontent.XContentParseException( + parser.getTokenLocation(), + "[" + NAME + "] Expected START_OBJECT or scalar but was: " + token + ); + } } @Override public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + builder.startObject(); builder.startObject(NAME); builder.field(ORDER_FIELD.getPreferredName(), order); builder.endObject(); + builder.endObject(); return builder; } diff --git a/server/src/main/java/org/opensearch/search/sort/SortBuilder.java b/server/src/main/java/org/opensearch/search/sort/SortBuilder.java index a8c21e7311061..e3d83e6a8102e 100644 --- a/server/src/main/java/org/opensearch/search/sort/SortBuilder.java +++ b/server/src/main/java/org/opensearch/search/sort/SortBuilder.java @@ -135,6 +135,8 @@ public static List> fromXContent(XContentParser parser) throws IO private static SortBuilder fieldOrScoreSort(String fieldName) { if (fieldName.equals(ScoreSortBuilder.NAME)) { return new ScoreSortBuilder(); + } else if (fieldName.equals(ShardDocSortBuilder.NAME)) { // ShardDocSortBuilder is a special "field" sort + return new ShardDocSortBuilder(); } else { return new FieldSortBuilder(fieldName); } diff --git a/server/src/test/java/org/opensearch/action/search/SearchRequestTests.java b/server/src/test/java/org/opensearch/action/search/SearchRequestTests.java index a8c5b768711e8..23a84737ec5df 100644 --- a/server/src/test/java/org/opensearch/action/search/SearchRequestTests.java +++ b/server/src/test/java/org/opensearch/action/search/SearchRequestTests.java @@ -67,6 +67,7 @@ import static org.opensearch.action.search.SearchType.DFS_QUERY_THEN_FETCH; import static org.opensearch.test.EqualsHashCodeTestUtils.checkEqualsAndHashCode; import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.hasItems; import static org.mockito.Mockito.mock; public class SearchRequestTests extends AbstractSearchTestCase { @@ -266,6 +267,19 @@ public void testValidate() throws IOException { e.validationErrors().contains("_shard_doc cannot be used with scroll. Use PIT + search_after instead.") ); } + { + // Smuggled as FieldSortBuilder("_shard_doc") without PIT -> reject + SearchRequest searchRequest = new SearchRequest().source( + new SearchSourceBuilder().sort(new FieldSortBuilder(ShardDocSortBuilder.NAME)) + ); + ActionRequestValidationException e = searchRequest.validate(); + assertNotNull(e); + assertEquals(1, e.validationErrors().size()); + assertEquals( + "_shard_doc is only supported with point-in-time (PIT). Add a PIT or remove _shard_doc.", + e.validationErrors().get(0) + ); + } { // Duplicate _shard_doc with PIT -> reject SearchRequest searchRequest = new SearchRequest().source( @@ -279,9 +293,33 @@ public void testValidate() throws IOException { assertEquals("duplicate _shard_doc sort detected. Specify it at most once.", e.validationErrors().get(0)); } { - // Smuggled as FieldSortBuilder("_shard_doc") without PIT -> reject + // Duplicate detection insensitive to order differences (ASC + DESC is still duplicate) SearchRequest searchRequest = new SearchRequest().source( - new SearchSourceBuilder().sort(new FieldSortBuilder(ShardDocSortBuilder.NAME)) + new SearchSourceBuilder().pointInTimeBuilder(new PointInTimeBuilder("id")) + .sort(SortBuilders.shardDocSort().order(SortOrder.ASC)) + .sort(SortBuilders.shardDocSort().order(SortOrder.DESC)) + ); + ActionRequestValidationException e = searchRequest.validate(); + assertNotNull(e); + assertEquals(1, e.validationErrors().size()); + assertEquals("duplicate _shard_doc sort detected. Specify it at most once.", e.validationErrors().get(0)); + } + { + // Duplicate via mixed builders: ShardDocSortBuilder + FieldSortBuilder("_shard_doc") -> reject + SearchRequest searchRequest = new SearchRequest().source( + new SearchSourceBuilder().pointInTimeBuilder(new PointInTimeBuilder("id")) + .sort(SortBuilders.shardDocSort()) + .sort(new FieldSortBuilder(ShardDocSortBuilder.NAME)) + ); + ActionRequestValidationException e = searchRequest.validate(); + assertNotNull(e); + assertEquals(1, e.validationErrors().size()); + assertEquals("duplicate _shard_doc sort detected. Specify it at most once.", e.validationErrors().get(0)); + } + { + // _shard_doc + search_after but NO PIT -> reject (explicitly exercising this combo) + SearchRequest searchRequest = new SearchRequest().source( + new SearchSourceBuilder().sort(SortBuilders.shardDocSort()).searchAfter(new Object[] { 1L }) ); ActionRequestValidationException e = searchRequest.validate(); assertNotNull(e); @@ -291,6 +329,46 @@ public void testValidate() throws IOException { e.validationErrors().get(0) ); } + { + // Error aggregation with SCROLL: collect multiple errors including shard_doc+scroll + SearchRequest searchRequest = new SearchRequest().source( + new SearchSourceBuilder().pointInTimeBuilder(new PointInTimeBuilder("id")) // PIT + scroll already illegal + .sort(SortBuilders.shardDocSort()) // shard_doc + scroll = illegal + .from(10) // also illegal with scroll + .size(0) // also illegal with scroll + ).scroll(TimeValue.timeValueSeconds(30)); + + ActionRequestValidationException e = searchRequest.validate(); + assertNotNull(e); + assertThat( + e.validationErrors(), + hasItems( + "using [point in time] is not allowed in a scroll context", + "_shard_doc cannot be used with scroll. Use PIT + search_after instead.", + "using [from] is not allowed in a scroll context", + "[size] cannot be [0] in a scroll context" + ) + ); + } + { + // PIT present + search_after but NO _shard_doc -> valid (control for PIT/search_after) + SearchRequest searchRequest = new SearchRequest().source( + new SearchSourceBuilder().pointInTimeBuilder(new PointInTimeBuilder("id")) + .sort("_id", SortOrder.ASC) + .searchAfter(new Object[] { "x" }) + ); + ActionRequestValidationException e = searchRequest.validate(); + assertNull(e); + } + { + // FieldSortBuilder("_shard_doc") WITH PIT -> valid (object-style declared as FieldSort) + SearchRequest searchRequest = new SearchRequest().source( + new SearchSourceBuilder().pointInTimeBuilder(new PointInTimeBuilder("id")) + .sort(new FieldSortBuilder(ShardDocSortBuilder.NAME).order(SortOrder.ASC)) + ); + ActionRequestValidationException e = searchRequest.validate(); + assertNull(e); + } { // Good: PIT + _shard_doc -> valid SearchRequest searchRequest = new SearchRequest().source( diff --git a/server/src/test/java/org/opensearch/search/sort/ShardDocSortBuilderTests.java b/server/src/test/java/org/opensearch/search/sort/ShardDocSortBuilderTests.java new file mode 100644 index 0000000000000..7f026f249f8f4 --- /dev/null +++ b/server/src/test/java/org/opensearch/search/sort/ShardDocSortBuilderTests.java @@ -0,0 +1,120 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.search.sort; + +import org.apache.lucene.search.SortField; +import org.opensearch.common.io.stream.BytesStreamOutput; +import org.opensearch.common.xcontent.json.JsonXContent; +import org.opensearch.core.xcontent.ToXContent; +import org.opensearch.core.xcontent.XContentBuilder; +import org.opensearch.core.xcontent.XContentParseException; +import org.opensearch.core.xcontent.XContentParser; +import org.opensearch.search.DocValueFormat; +import org.opensearch.search.builder.SearchSourceBuilder; + +import java.io.IOException; + +import static org.hamcrest.Matchers.containsString; +import static org.hamcrest.Matchers.instanceOf; + +public class ShardDocSortBuilderTests extends AbstractSortTestCase { + + @Override + protected ShardDocSortBuilder createTestItem() { + ShardDocSortBuilder b = new ShardDocSortBuilder(); + if (randomBoolean()) { + b.order(randomFrom(SortOrder.values())); + } + return b; + } + + @Override + protected ShardDocSortBuilder mutate(ShardDocSortBuilder original) throws IOException { + // only mutates order; builder is intentionally tiny + ShardDocSortBuilder copy = new ShardDocSortBuilder(original); + copy.order(original.order() == SortOrder.ASC ? SortOrder.DESC : SortOrder.ASC); + return copy; + } + + @Override + protected void sortFieldAssertions(ShardDocSortBuilder builder, SortField sortField, DocValueFormat format) throws IOException { + assertEquals(SortField.Type.CUSTOM, sortField.getType()); + assertEquals(builder.order() == SortOrder.DESC, sortField.getReverse()); + assertEquals(DocValueFormat.RAW, format); + } + + @Override + protected ShardDocSortBuilder fromXContent(XContentParser parser, String fieldName) throws IOException { + return ShardDocSortBuilder.fromXContent(parser, fieldName); + } + + public void testParseScalarAndObject() throws IOException { + String json = " [ { \"_shard_doc\": { \"order\": \"desc\" } } ] "; + XContentParser parser = createParser(JsonXContent.jsonXContent, json); + parser.nextToken(); + SortBuilder sb = SortBuilder.fromXContent(parser).get(0); + assertThat(sb, instanceOf(ShardDocSortBuilder.class)); + assertEquals(SortOrder.DESC, sb.order()); + + json = " [ { \"_shard_doc\": { \"order\": \"asc\" } } ] "; + parser = createParser(JsonXContent.jsonXContent, json); + parser.nextToken(); + sb = SortBuilder.fromXContent(parser).get(0); + assertThat(sb, instanceOf(ShardDocSortBuilder.class)); + assertEquals(SortOrder.ASC, sb.order()); + + json = " [ \"_shard_doc\" ] "; // default to asc + parser = createParser(JsonXContent.jsonXContent, json); + parser.nextToken(); + sb = SortBuilder.fromXContent(parser).get(0); + assertThat(sb, instanceOf(ShardDocSortBuilder.class)); + assertEquals(SortOrder.ASC, sb.order()); + + // from ShardDocSortBuilder + json = "{ \"_shard_doc\": { \"order\": \"desc\" } }"; + try (XContentParser p = createParser(JsonXContent.jsonXContent, json)) { + p.nextToken(); + p.nextToken(); + p.nextToken(); + ShardDocSortBuilder b = ShardDocSortBuilder.fromXContent(p, "_shard_doc"); + assertEquals(SortOrder.DESC, b.order()); + } + } + + public void testUnknownOptionFails() throws IOException { + String json = "{ \"_shard_doc\": { \"reverse\": true } }"; + try (XContentParser p = createParser(JsonXContent.jsonXContent, json)) { + p.nextToken(); + p.nextToken(); + p.nextToken(); + XContentParseException e = expectThrows(XContentParseException.class, () -> ShardDocSortBuilder.fromXContent(p, "_shard_doc")); + assertThat(e.getMessage(), containsString("unknown field [reverse]")); + } + } + + public void testXContentRoundTripAndSerialization() throws IOException { + ShardDocSortBuilder original = new ShardDocSortBuilder().order(SortOrder.DESC); + // XContent round-trip + XContentBuilder builder = JsonXContent.contentBuilder(); + original.toXContent(builder, ToXContent.EMPTY_PARAMS); + String rendered = builder.toString(); + SearchSourceBuilder ssb = SearchSourceBuilder.fromXContent( + createParser(JsonXContent.jsonXContent, "{ \"sort\": [" + rendered + "] }") + ); + SortBuilder parsed = ssb.sorts().get(0); + assertEquals(original, parsed); + assertEquals(original.hashCode(), parsed.hashCode()); + + // Stream serialization + BytesStreamOutput out = new BytesStreamOutput(); + original.writeTo(out); + ShardDocSortBuilder restored = new ShardDocSortBuilder(out.bytes().streamInput()); + assertEquals(original, restored); + } +}