Skip to content

Commit 40e9e40

Browse files
vishdivsDivyansh Sharma
andauthored
Support for IndexSort in Nested Fields (#18536)
* Support for IndexSort in Nested Fields Signed-off-by: Divyansh Sharma <[email protected]> * Updating the New Feature to V3.2.0 Signed-off-by: Divyansh Sharma <[email protected]> * Addressing comments and adding Unit test Signed-off-by: Divyansh Sharma <[email protected]> * Adding unit test with merge segments Signed-off-by: Divyansh Sharma <[email protected]> * Fixing rest-api-spec test and falky test Signed-off-by: Divyansh Sharma <[email protected]> --------- Signed-off-by: Divyansh Sharma <[email protected]> Co-authored-by: Divyansh Sharma <[email protected]>
1 parent cdbb76e commit 40e9e40

File tree

10 files changed

+515
-8
lines changed

10 files changed

+515
-8
lines changed

rest-api-spec/src/main/resources/rest-api-spec/test/indices.sort/10_basic.yml

Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -176,3 +176,106 @@
176176
# This should failed with 400 as half_float is not supported for index sort
177177
- match: { status: 400 }
178178
- match: { error.type: illegal_argument_exception }
179+
180+
---
181+
"Index sort with nested fields":
182+
- skip:
183+
version: " - 3.1.0"
184+
reason: "Index sort on nested field is only supported after 3.1.0"
185+
- do:
186+
indices.create:
187+
index: test_nested_sort
188+
body:
189+
settings:
190+
number_of_shards: 1
191+
number_of_replicas: 0
192+
index.sort.field: foo
193+
index.sort.order: desc
194+
mappings:
195+
properties:
196+
foo:
197+
type: integer
198+
foo1:
199+
type: keyword
200+
contacts:
201+
type: nested
202+
properties:
203+
name:
204+
type: keyword
205+
age:
206+
type: integer
207+
208+
- do:
209+
index:
210+
index: test_nested_sort
211+
id: "1"
212+
body:
213+
foo: 100
214+
foo1: "A"
215+
contacts:
216+
- name: "Alice"
217+
age: 30
218+
219+
- do:
220+
index:
221+
index: test_nested_sort
222+
id: "2"
223+
body:
224+
foo: 200
225+
foo1: "B"
226+
contacts:
227+
- name: "Bob"
228+
age: 40
229+
230+
- do:
231+
index:
232+
index: test_nested_sort
233+
id: "3"
234+
body:
235+
foo: 150
236+
foo1: "C"
237+
contacts:
238+
- name: "Charlie"
239+
age: 25
240+
241+
- do:
242+
indices.refresh:
243+
index: test_nested_sort
244+
245+
- do:
246+
search:
247+
index: test_nested_sort
248+
body:
249+
sort:
250+
- foo: desc
251+
size: 3
252+
253+
- match: { hits.total.value: 3 }
254+
- match: { hits.hits.0._id: "2" }
255+
- match: { hits.hits.1._id: "3" }
256+
- match: { hits.hits.2._id: "1" }
257+
258+
---
259+
"Index sort with nested field as sort field validation":
260+
- skip:
261+
version: " - 3.1.0"
262+
reason: "Index sort on nested field is only supported after 3.1.0"
263+
- do:
264+
catch: bad_request
265+
indices.create:
266+
index: test_nested_sort
267+
body:
268+
settings:
269+
number_of_shards: 1
270+
number_of_replicas: 0
271+
index.sort.field: contacts.age
272+
mappings:
273+
properties:
274+
contacts:
275+
type: nested
276+
properties:
277+
age:
278+
type: integer
279+
- match: { status: 400 }
280+
- match: { error.type: illegal_argument_exception }
281+
- match: { error.reason: "index sorting on nested fields is not supported: found nested sort field [contacts.age] in [test_nested_sort]" }

server/src/internalClusterTest/java/org/opensearch/index/IndexSortIT.java

Lines changed: 192 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,22 +36,34 @@
3636

3737
import org.apache.lucene.search.Sort;
3838
import org.apache.lucene.search.SortField;
39+
import org.apache.lucene.search.SortedNumericSelector;
3940
import org.apache.lucene.search.SortedNumericSortField;
4041
import org.apache.lucene.search.SortedSetSortField;
42+
import org.opensearch.action.search.SearchResponse;
43+
import org.opensearch.common.collect.Tuple;
4144
import org.opensearch.common.settings.Settings;
4245
import org.opensearch.core.xcontent.XContentBuilder;
46+
import org.opensearch.index.query.QueryBuilders;
47+
import org.opensearch.search.sort.SortOrder;
4348
import org.opensearch.test.ParameterizedStaticSettingsOpenSearchIntegTestCase;
4449

4550
import java.io.IOException;
51+
import java.util.ArrayList;
4652
import java.util.Arrays;
4753
import java.util.Collection;
54+
import java.util.Collections;
55+
import java.util.HashMap;
56+
import java.util.List;
57+
import java.util.Map;
58+
import java.util.UUID;
4859

4960
import static org.opensearch.common.xcontent.XContentFactory.jsonBuilder;
5061
import static org.opensearch.search.SearchService.CLUSTER_CONCURRENT_SEGMENT_SEARCH_SETTING;
5162
import static org.hamcrest.Matchers.containsString;
5263

5364
public class IndexSortIT extends ParameterizedStaticSettingsOpenSearchIntegTestCase {
5465
private static final XContentBuilder TEST_MAPPING = createTestMapping();
66+
private static final XContentBuilder NESTED_TEST_MAPPING = createNestedTestMapping();
5567

5668
public IndexSortIT(Settings staticSettings) {
5769
super(staticSettings);
@@ -95,6 +107,49 @@ private static XContentBuilder createTestMapping() {
95107
}
96108
}
97109

110+
private static XContentBuilder createNestedTestMapping() {
111+
try {
112+
return jsonBuilder().startObject()
113+
.startObject("properties")
114+
.startObject("foo")
115+
.field("type", "integer")
116+
.endObject()
117+
.startObject("foo1")
118+
.field("type", "keyword")
119+
.endObject()
120+
.startObject("contacts")
121+
.field("type", "nested")
122+
.startObject("properties")
123+
.startObject("name")
124+
.field("type", "keyword")
125+
.endObject()
126+
.startObject("age")
127+
.field("type", "integer")
128+
.endObject()
129+
.endObject()
130+
.endObject()
131+
.endObject()
132+
.endObject();
133+
} catch (IOException e) {
134+
throw new IllegalStateException(e);
135+
}
136+
}
137+
138+
private static void addNestedDocuments(String id, int foo, String foo1, String name, int age) throws IOException {
139+
XContentBuilder sourceBuilder = jsonBuilder().startObject()
140+
.field("foo", foo)
141+
.field("foo1", foo1)
142+
.startArray("contacts")
143+
.startObject()
144+
.field("name", name)
145+
.field("age", age)
146+
.endObject()
147+
.endArray()
148+
.endObject();
149+
150+
client().prepareIndex("nested-test-index").setId(id).setSource(sourceBuilder).get();
151+
}
152+
98153
public void testIndexSort() {
99154
SortField dateSort = new SortedNumericSortField("date", SortField.Type.LONG, false);
100155
dateSort.setMissingValue(Long.MAX_VALUE);
@@ -146,4 +201,141 @@ public void testInvalidIndexSort() {
146201
);
147202
assertThat(exc.getMessage(), containsString("docvalues not found for index sort field:[keyword]"));
148203
}
204+
205+
public void testIndexSortOnNestedField() throws IOException {
206+
boolean ascending = randomBoolean();
207+
SortedNumericSelector.Type selector = ascending ? SortedNumericSelector.Type.MIN : SortedNumericSelector.Type.MAX;
208+
SortField regularSort = new SortedNumericSortField("foo", SortField.Type.INT, !ascending, selector);
209+
regularSort.setMissingValue(ascending ? Integer.MAX_VALUE : Integer.MIN_VALUE);
210+
211+
Sort indexSort = new Sort(regularSort);
212+
213+
prepareCreate("nested-test-index").setSettings(
214+
Settings.builder()
215+
.put(indexSettings())
216+
.put("index.number_of_shards", "1")
217+
.put("index.number_of_replicas", "0")
218+
.putList("index.sort.field", "foo")
219+
.putList("index.sort.order", ascending ? "asc" : "desc")
220+
).setMapping(NESTED_TEST_MAPPING).get();
221+
222+
int numDocs = randomIntBetween(10, 30);
223+
List<Integer> fooValues = new ArrayList<>(numDocs);
224+
List<String> ids = new ArrayList<>(numDocs);
225+
226+
for (int i = 0; i < numDocs; i++) {
227+
String id = String.valueOf(i);
228+
int fooValue = randomIntBetween(1, 100);
229+
String name = UUID.randomUUID().toString().replace("-", "").substring(0, 5);
230+
231+
addNestedDocuments(id, fooValue, "", name, fooValue);
232+
fooValues.add(fooValue);
233+
ids.add(id);
234+
}
235+
236+
flushAndRefresh("nested-test-index");
237+
ensureGreen("nested-test-index");
238+
239+
assertSortedSegments("nested-test-index", indexSort);
240+
241+
SearchResponse response = client().prepareSearch("nested-test-index")
242+
.addSort("foo", ascending ? SortOrder.ASC : SortOrder.DESC)
243+
.setQuery(QueryBuilders.matchAllQuery())
244+
.setSize(numDocs)
245+
.get();
246+
247+
assertEquals(numDocs, response.getHits().getTotalHits().value());
248+
249+
Map<Integer, String> valueToId = new HashMap<>();
250+
for (int i = 0; i < numDocs; i++) {
251+
valueToId.put(fooValues.get(i), ids.get(i));
252+
}
253+
254+
List<Integer> sortedValues = new ArrayList<>(fooValues);
255+
if (ascending) {
256+
Collections.sort(sortedValues);
257+
} else {
258+
sortedValues.sort(Collections.reverseOrder());
259+
}
260+
261+
for (int i = 0; i < numDocs; i++) {
262+
int expectedValue = sortedValues.get(i);
263+
assertEquals(expectedValue, response.getHits().getAt(i).getSourceAsMap().get("foo"));
264+
}
265+
}
266+
267+
public void testIndexSortWithNestedField_MultiField() throws IOException {
268+
boolean ascendingPrimary = randomBoolean();
269+
boolean ascendingSecondary = randomBoolean();
270+
prepareCreate("nested-test-index").setSettings(
271+
Settings.builder()
272+
.put(indexSettings())
273+
.put("index.number_of_shards", "1")
274+
.put("index.number_of_replicas", "0")
275+
.putList("index.sort.field", "foo", "foo1")
276+
.putList("index.sort.order", ascendingPrimary ? "asc" : "desc", ascendingSecondary ? "asc" : "desc")
277+
).setMapping(NESTED_TEST_MAPPING).get();
278+
279+
int numDocs = randomIntBetween(10, 30);
280+
List<Tuple<Integer, String>> docValues = new ArrayList<>(numDocs);
281+
List<String> ids = new ArrayList<>(numDocs);
282+
283+
int duplicateValue = randomIntBetween(30, 50);
284+
int numDuplicates = randomIntBetween(3, 5);
285+
286+
for (int i = 0; i < numDocs; i++) {
287+
String id = String.valueOf(i);
288+
int fooValue;
289+
if (i < numDuplicates) {
290+
fooValue = duplicateValue;
291+
} else {
292+
fooValue = randomIntBetween(1, 100);
293+
}
294+
String name = UUID.randomUUID().toString().replace("-", "").substring(0, 5);
295+
addNestedDocuments(id, fooValue, name, name, fooValue);
296+
docValues.add(new Tuple<>(fooValue, name));
297+
ids.add(id);
298+
}
299+
300+
flushAndRefresh("nested-test-index");
301+
ensureGreen("nested-test-index");
302+
SearchResponse response = client().prepareSearch("nested-test-index")
303+
.addSort("foo", ascendingPrimary ? SortOrder.ASC : SortOrder.DESC)
304+
.addSort("foo1", ascendingSecondary ? SortOrder.ASC : SortOrder.DESC)
305+
.setQuery(QueryBuilders.matchAllQuery())
306+
.setSize(numDocs)
307+
.get();
308+
309+
assertEquals(numDocs, response.getHits().getTotalHits().value());
310+
311+
List<Tuple<Integer, String>> sortedValues = new ArrayList<>(docValues);
312+
sortedValues.sort((a, b) -> {
313+
int primaryCompare = ascendingPrimary ? Integer.compare(a.v1(), b.v1()) : Integer.compare(b.v1(), a.v1());
314+
if (primaryCompare != 0) {
315+
return primaryCompare;
316+
}
317+
return ascendingSecondary ? a.v2().compareTo(b.v2()) : b.v2().compareTo(a.v2());
318+
});
319+
320+
for (int i = 0; i < numDocs; i++) {
321+
assertEquals(sortedValues.get(i).v1(), response.getHits().getAt(i).getSourceAsMap().get("foo"));
322+
assertEquals(sortedValues.get(i).v2(), response.getHits().getAt(i).getSourceAsMap().get("foo1"));
323+
}
324+
}
325+
326+
public void testIndexSortWithSortFieldInsideDocBlock() {
327+
IllegalArgumentException exception = expectThrows(
328+
IllegalArgumentException.class,
329+
() -> prepareCreate("nested-sort-test").setSettings(
330+
Settings.builder()
331+
.put(indexSettings())
332+
.put("index.number_of_shards", "1")
333+
.put("index.number_of_replicas", "0")
334+
.putList("index.sort.field", "contacts.age")
335+
.putList("index.sort.order", "desc")
336+
).setMapping(NESTED_TEST_MAPPING).get()
337+
);
338+
339+
assertThat(exception.getMessage(), containsString("index sorting on nested fields is not supported"));
340+
}
149341
}

server/src/main/java/org/opensearch/common/lucene/Lucene.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,7 @@ public class Lucene {
113113
public static final String LATEST_CODEC = "Lucene101";
114114

115115
public static final String SOFT_DELETES_FIELD = "__soft_deletes";
116+
public static final String PARENT_FIELD = "__nested_parent";
116117

117118
public static final NamedAnalyzer STANDARD_ANALYZER = new NamedAnalyzer("_standard", AnalyzerScope.GLOBAL, new StandardAnalyzer());
118119
public static final NamedAnalyzer KEYWORD_ANALYZER = new NamedAnalyzer("_keyword", AnalyzerScope.GLOBAL, new KeywordAnalyzer());

server/src/main/java/org/opensearch/index/engine/InternalEngine.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,7 @@
6666
import org.apache.lucene.util.BytesRef;
6767
import org.apache.lucene.util.InfoStream;
6868
import org.opensearch.ExceptionsHelper;
69+
import org.opensearch.Version;
6970
import org.opensearch.action.index.IndexRequest;
7071
import org.opensearch.common.Booleans;
7172
import org.opensearch.common.Nullable;
@@ -2379,6 +2380,9 @@ private IndexWriterConfig getIndexWriterConfig() {
23792380
iwc.setUseCompoundFile(engineConfig.useCompoundFile());
23802381
if (config().getIndexSort() != null) {
23812382
iwc.setIndexSort(config().getIndexSort());
2383+
if (config().getIndexSettings().getIndexVersionCreated().onOrAfter(Version.V_3_2_0)) {
2384+
iwc.setParentField(Lucene.PARENT_FIELD);
2385+
}
23822386
}
23832387
if (config().getLeafSorter() != null) {
23842388
iwc.setLeafSorter(config().getLeafSorter()); // The default segment search order

0 commit comments

Comments
 (0)