Skip to content

Commit d14fe2d

Browse files
Merge branch 'main' into inner-hits-top-hits-fetch
Signed-off-by: Andre van de Ven <[email protected]>
2 parents bdceef0 + c5d26f7 commit d14fe2d

File tree

122 files changed

+10049
-248
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

122 files changed

+10049
-248
lines changed

.gitignore

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,6 @@
1+
.claude
2+
CLAUDE.md
3+
.cursor*
14

25
# intellij files
36
.idea/
@@ -64,4 +67,4 @@ testfixtures_shared/
6467
.ci/jobs/
6568

6669
# build files generated
67-
doc-tools/missing-doclet/bin/
70+
doc-tools/missing-doclet/bin/

CHANGELOG.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,10 +46,14 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
4646
- Expand fetch phase profiling to multi-shard queries ([#18887](https://github.com/opensearch-project/OpenSearch/pull/18887))
4747
- Prevent shard initialization failure due to streaming consumer errors ([#18877](https://github.com/opensearch-project/OpenSearch/pull/18877))
4848
- APIs for stream transport and new stream-based search api action ([#18722](https://github.com/opensearch-project/OpenSearch/pull/18722))
49+
- Add support for custom remote store segment path prefix to support clusterless configurations ([#18750](https://github.com/opensearch-project/OpenSearch/issues/18750))
4950
- Added the core process for warming merged segments in remote-store enabled domains ([#18683](https://github.com/opensearch-project/OpenSearch/pull/18683))
51+
- Streaming aggregation ([#18874](https://github.com/opensearch-project/OpenSearch/pull/18874))
5052
- Optimize Composite Aggregations by removing unnecessary object allocations ([#18531](https://github.com/opensearch-project/OpenSearch/pull/18531))
5153
- [Star-Tree] Add search support for ip field type ([#18671](https://github.com/opensearch-project/OpenSearch/pull/18671))
5254
- Expand fetch phase profiling to support inner hits and top hits aggregation phases ([##18936](https://github.com/opensearch-project/OpenSearch/pull/18936))
55+
- [Derived Source] Add integration of derived source feature across various paths like get/search/recovery ([#18565](https://github.com/opensearch-project/OpenSearch/pull/18565))
56+
- Supporting Scripted Metric Aggregation when reducing aggregations in InternalValueCount and InternalAvg ([18411](https://github.com/opensearch-project/OpenSearch/pull18411)))
5357

5458
### Changed
5559
- Update Subject interface to use CheckedRunnable ([#18570](https://github.com/opensearch-project/OpenSearch/issues/18570))
@@ -85,6 +89,8 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
8589
- Bump `com.nimbusds:nimbus-jose-jwt` from 10.2 to 10.4 ([#18759](https://github.com/opensearch-project/OpenSearch/pull/18759), [#18804](https://github.com/opensearch-project/OpenSearch/pull/18804))
8690
- Bump `commons-beanutils:commons-beanutils` from 1.9.4 to 1.11.0 ([#18401](https://github.com/opensearch-project/OpenSearch/issues/18401))
8791
- Bump `org.xerial.snappy:snappy-java` from 1.1.10.7 to 1.1.10.8 ([#18803](https://github.com/opensearch-project/OpenSearch/pull/18803))
92+
- Bump `org.ajoberstar.grgit:grgit-core` from 5.2.1 to 5.3.2 ([#18935](https://github.com/opensearch-project/OpenSearch/pull/18935))
93+
- Bump `org.apache.kafka:kafka-clients` from 3.8.1 to 3.9.1 ([#18935](https://github.com/opensearch-project/OpenSearch/pull/18935))
8894

8995
### Deprecated
9096

buildSrc/build.gradle

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -122,7 +122,7 @@ dependencies {
122122
api 'org.jruby.jcodings:jcodings:1.0.58'
123123
api 'org.jruby.joni:joni:2.2.3'
124124
api "com.fasterxml.jackson.core:jackson-databind:${props.getProperty('jackson_databind')}"
125-
api "org.ajoberstar.grgit:grgit-core:5.2.1"
125+
api "org.ajoberstar.grgit:grgit-core:5.3.2"
126126

127127
testFixturesApi "junit:junit:${props.getProperty('junit')}"
128128
testFixturesApi "com.carrotsearch.randomizedtesting:randomizedtesting-runner:${props.getProperty('randomizedrunner')}"

modules/reindex/src/test/java/org/opensearch/index/reindex/ReindexBasicTests.java

Lines changed: 306 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,14 @@
3232

3333
package org.opensearch.index.reindex;
3434

35+
import org.opensearch.action.bulk.BulkRequestBuilder;
36+
import org.opensearch.action.bulk.BulkResponse;
3537
import org.opensearch.action.index.IndexRequestBuilder;
38+
import org.opensearch.action.search.SearchResponse;
39+
import org.opensearch.common.settings.Settings;
40+
import org.opensearch.common.xcontent.XContentType;
41+
import org.opensearch.search.SearchHit;
42+
import org.opensearch.search.sort.SortOrder;
3643

3744
import java.util.ArrayList;
3845
import java.util.Collection;
@@ -41,7 +48,9 @@
4148
import java.util.Map;
4249
import java.util.stream.Collectors;
4350

51+
import static org.opensearch.index.query.QueryBuilders.matchAllQuery;
4452
import static org.opensearch.index.query.QueryBuilders.termQuery;
53+
import static org.opensearch.test.hamcrest.OpenSearchAssertions.assertAcked;
4554
import static org.opensearch.test.hamcrest.OpenSearchAssertions.assertHitCount;
4655
import static org.hamcrest.Matchers.greaterThanOrEqualTo;
4756
import static org.hamcrest.Matchers.hasSize;
@@ -177,4 +186,301 @@ public void testMissingSources() {
177186
assertThat(response, matcher().created(0).slices(hasSize(0)));
178187
}
179188

189+
public void testReindexWithDerivedSource() throws Exception {
190+
// Create source index with derived source setting enabled
191+
String sourceIndexMapping = """
192+
{
193+
"settings": {
194+
"index": {
195+
"number_of_shards": 1,
196+
"number_of_replicas": 0,
197+
"derived_source": {
198+
"enabled": true
199+
}
200+
}
201+
},
202+
"mappings": {
203+
"_doc": {
204+
"properties": {
205+
"foo": {
206+
"type": "keyword",
207+
"store": true
208+
},
209+
"bar": {
210+
"type": "integer",
211+
"store": true
212+
}
213+
}
214+
}
215+
}
216+
}""";
217+
218+
// Create indices
219+
assertAcked(prepareCreate("source_index").setSource(sourceIndexMapping, XContentType.JSON));
220+
assertAcked(prepareCreate("dest_index").setSource(sourceIndexMapping, XContentType.JSON));
221+
ensureGreen();
222+
223+
// Index some documents
224+
int numDocs = randomIntBetween(5, 20);
225+
List<IndexRequestBuilder> docs = new ArrayList<>();
226+
for (int i = 0; i < numDocs; i++) {
227+
docs.add(client().prepareIndex("source_index").setId(Integer.toString(i)).setSource("foo", "value_" + i, "bar", i));
228+
}
229+
indexRandom(true, docs);
230+
231+
// Test 1: Basic reindex
232+
ReindexRequestBuilder copy = reindex().source("source_index").destination("dest_index").refresh(true);
233+
234+
BulkByScrollResponse response = copy.get();
235+
assertThat(response, matcher().created(numDocs));
236+
long expectedCount = client().prepareSearch("dest_index").setQuery(matchAllQuery()).get().getHits().getTotalHits().value();
237+
assertEquals(numDocs, expectedCount);
238+
239+
// Test 2: Reindex with query filter
240+
String destIndexFiltered = "dest_index_filtered";
241+
assertAcked(prepareCreate(destIndexFiltered).setSource(sourceIndexMapping, XContentType.JSON));
242+
243+
copy = reindex().source("source_index").destination(destIndexFiltered).filter(termQuery("bar", 1)).refresh(true);
244+
245+
response = copy.get();
246+
expectedCount = client().prepareSearch("source_index").setQuery(termQuery("bar", 1)).get().getHits().getTotalHits().value();
247+
assertThat(response, matcher().created(expectedCount));
248+
249+
// Test 3: Reindex with slices
250+
String destIndexSliced = "dest_index_sliced";
251+
assertAcked(prepareCreate(destIndexSliced).setSource(sourceIndexMapping, XContentType.JSON));
252+
253+
int slices = randomSlices();
254+
int expectedSlices = expectedSliceStatuses(slices, "source_index");
255+
256+
copy = reindex().source("source_index").destination(destIndexSliced).setSlices(slices).refresh(true);
257+
258+
response = copy.get();
259+
assertThat(response, matcher().created(numDocs).slices(hasSize(expectedSlices)));
260+
261+
// Test 4: Reindex with maxDocs
262+
String destIndexMaxDocs = "dest_index_maxdocs";
263+
assertAcked(prepareCreate(destIndexMaxDocs).setSource(sourceIndexMapping, XContentType.JSON));
264+
265+
int maxDocs = numDocs / 2;
266+
copy = reindex().source("source_index").destination(destIndexMaxDocs).maxDocs(maxDocs).refresh(true);
267+
268+
response = copy.get();
269+
assertThat(response, matcher().created(maxDocs));
270+
expectedCount = client().prepareSearch(destIndexMaxDocs).setQuery(matchAllQuery()).get().getHits().getTotalHits().value();
271+
assertEquals(maxDocs, expectedCount);
272+
273+
// Test 5: Multiple source indices
274+
String sourceIndex2 = "source_index_2";
275+
assertAcked(prepareCreate(sourceIndex2).setSource(sourceIndexMapping, XContentType.JSON));
276+
277+
int numDocs2 = randomIntBetween(5, 20);
278+
List<IndexRequestBuilder> docs2 = new ArrayList<>();
279+
for (int i = 0; i < numDocs2; i++) {
280+
docs2.add(
281+
client().prepareIndex(sourceIndex2).setId(Integer.toString(i + numDocs)).setSource("foo", "value2_" + i, "bar", i + numDocs)
282+
);
283+
}
284+
indexRandom(true, docs2);
285+
286+
String destIndexMulti = "dest_index_multi";
287+
assertAcked(prepareCreate(destIndexMulti).setSource(sourceIndexMapping, XContentType.JSON));
288+
289+
copy = reindex().source("source_index", "source_index_2").destination(destIndexMulti).refresh(true);
290+
291+
response = copy.get();
292+
assertThat(response, matcher().created(numDocs + numDocs2));
293+
expectedCount = client().prepareSearch(destIndexMulti).setQuery(matchAllQuery()).get().getHits().getTotalHits().value();
294+
assertEquals(numDocs + numDocs2, expectedCount);
295+
}
296+
297+
public void testReindexFromDerivedSourceToNormalIndex() throws Exception {
298+
// Create source index with derived source enabled
299+
String sourceMapping = """
300+
{
301+
"properties": {
302+
"text_field": {
303+
"type": "text",
304+
"store": true
305+
},
306+
"keyword_field": {
307+
"type": "keyword"
308+
},
309+
"numeric_field": {
310+
"type": "long",
311+
"doc_values": true
312+
},
313+
"date_field": {
314+
"type": "date",
315+
"store": true
316+
}
317+
}
318+
}""";
319+
320+
// Create destination index with normal settings
321+
String destMapping = """
322+
{
323+
"properties": {
324+
"text_field": {
325+
"type": "text"
326+
},
327+
"keyword_field": {
328+
"type": "keyword"
329+
},
330+
"numeric_field": {
331+
"type": "long"
332+
},
333+
"date_field": {
334+
"type": "date"
335+
}
336+
}
337+
}""";
338+
339+
// Create source index
340+
assertAcked(
341+
prepareCreate("source_index").setSettings(
342+
Settings.builder().put("index.number_of_shards", 2).put("index.derived_source.enabled", true)
343+
).setMapping(sourceMapping)
344+
);
345+
346+
// Create destination index
347+
assertAcked(prepareCreate("dest_index").setMapping(destMapping));
348+
349+
// Index test documents
350+
int numDocs = randomIntBetween(100, 200);
351+
final List<IndexRequestBuilder> docs = new ArrayList<>();
352+
for (int i = 0; i < numDocs; i++) {
353+
docs.add(
354+
client().prepareIndex("source_index")
355+
.setId(Integer.toString(i))
356+
.setSource(
357+
"text_field",
358+
"text value " + i,
359+
"keyword_field",
360+
"key_" + i,
361+
"numeric_field",
362+
i,
363+
"date_field",
364+
System.currentTimeMillis()
365+
)
366+
);
367+
}
368+
indexRandom(true, docs);
369+
refresh("source_index");
370+
371+
// Test 1: Basic reindex without slices
372+
ReindexRequestBuilder reindex = reindex().source("source_index").destination("dest_index").refresh(true);
373+
BulkByScrollResponse response = reindex.get();
374+
assertThat(response, matcher().created(numDocs));
375+
verifyReindexedContent("dest_index", numDocs);
376+
377+
// Test 2: Reindex with query filter
378+
String destFilteredIndex = "dest_filtered_index";
379+
assertAcked(prepareCreate(destFilteredIndex).setMapping(destMapping));
380+
reindex = reindex().source("source_index").destination(destFilteredIndex).filter(termQuery("keyword_field", "key_1")).refresh(true);
381+
response = reindex.get();
382+
assertThat(response, matcher().created(1));
383+
verifyReindexedContent(destFilteredIndex, 1);
384+
385+
// Test 3: Reindex with slices
386+
String destSlicedIndex = "dest_sliced_index";
387+
assertAcked(prepareCreate(destSlicedIndex).setMapping(destMapping));
388+
int slices = randomSlices();
389+
int expectedSlices = expectedSliceStatuses(slices, "source_index");
390+
391+
reindex = reindex().source("source_index").destination(destSlicedIndex).setSlices(slices).refresh(true);
392+
response = reindex.get();
393+
assertThat(response, matcher().created(numDocs).slices(hasSize(expectedSlices)));
394+
verifyReindexedContent(destSlicedIndex, numDocs);
395+
396+
// Test 4: Reindex with field transformation
397+
String destTransformedIndex = "dest_transformed_index";
398+
String transformedMapping = """
399+
{
400+
"properties": {
401+
"new_text_field": {
402+
"type": "text"
403+
},
404+
"new_keyword_field": {
405+
"type": "keyword"
406+
},
407+
"modified_numeric": {
408+
"type": "long"
409+
},
410+
"date_field": {
411+
"type": "date"
412+
}
413+
}
414+
}""";
415+
assertAcked(prepareCreate(destTransformedIndex).setMapping(transformedMapping));
416+
417+
// First reindex the documents
418+
reindex = reindex().source("source_index").destination(destTransformedIndex).refresh(true);
419+
response = reindex.get();
420+
assertThat(response, matcher().created(numDocs));
421+
422+
// Then transform using bulk update
423+
BulkRequestBuilder bulkRequest = client().prepareBulk();
424+
SearchResponse searchResponse = client().prepareSearch(destTransformedIndex).setQuery(matchAllQuery()).setSize(numDocs).get();
425+
426+
for (SearchHit hit : searchResponse.getHits()) {
427+
Map<String, Object> source = hit.getSourceAsMap();
428+
Map<String, Object> newSource = new HashMap<>();
429+
430+
// Transform fields
431+
newSource.put("new_text_field", source.get("text_field"));
432+
newSource.put("new_keyword_field", source.get("keyword_field"));
433+
newSource.put("modified_numeric", ((Number) source.get("numeric_field")).longValue() + 1000);
434+
newSource.put("date_field", source.get("date_field"));
435+
436+
bulkRequest.add(client().prepareIndex(destTransformedIndex).setId(hit.getId()).setSource(newSource));
437+
}
438+
439+
BulkResponse bulkResponse = bulkRequest.get();
440+
assertFalse(bulkResponse.hasFailures());
441+
refresh(destTransformedIndex);
442+
verifyTransformedContent(destTransformedIndex, numDocs);
443+
}
444+
445+
private void verifyReindexedContent(String indexName, int expectedCount) {
446+
refresh(indexName);
447+
SearchResponse searchResponse = client().prepareSearch(indexName)
448+
.setQuery(matchAllQuery())
449+
.setSize(expectedCount)
450+
.addSort("numeric_field", SortOrder.ASC)
451+
.get();
452+
453+
assertHitCount(searchResponse, expectedCount);
454+
455+
for (SearchHit hit : searchResponse.getHits()) {
456+
Map<String, Object> source = hit.getSourceAsMap();
457+
int id = Integer.parseInt(hit.getId());
458+
459+
assertEquals("text value " + id, source.get("text_field"));
460+
assertEquals("key_" + id, source.get("keyword_field"));
461+
assertEquals(id, ((Number) source.get("numeric_field")).intValue());
462+
assertNotNull(source.get("date_field"));
463+
}
464+
}
465+
466+
private void verifyTransformedContent(String indexName, int expectedCount) {
467+
refresh(indexName);
468+
SearchResponse searchResponse = client().prepareSearch(indexName)
469+
.setQuery(matchAllQuery())
470+
.setSize(expectedCount)
471+
.addSort("modified_numeric", SortOrder.ASC)
472+
.get();
473+
474+
assertHitCount(searchResponse, expectedCount);
475+
476+
for (SearchHit hit : searchResponse.getHits()) {
477+
Map<String, Object> source = hit.getSourceAsMap();
478+
int id = Integer.parseInt(hit.getId());
479+
480+
assertEquals("text value " + id, source.get("new_text_field"));
481+
assertEquals("key_" + id, source.get("new_keyword_field"));
482+
assertEquals(id + 1000, ((Number) source.get("modified_numeric")).longValue());
483+
assertNotNull(source.get("date_field"));
484+
}
485+
}
180486
}

0 commit comments

Comments
 (0)