Skip to content

Commit c99adc7

Browse files
authored
Inner hits support with hybrid query (#1253)
* Inner Hits in Hybrid query Signed-off-by: Varun Jain <[email protected]> * Inner hits support with hybrid query Signed-off-by: Varun Jain <[email protected]> * Add changelog Signed-off-by: Varun Jain <[email protected]> * fix integ tests Signed-off-by: Varun Jain <[email protected]> * Modify comment Signed-off-by: Varun Jain <[email protected]> * Explain test case Signed-off-by: Varun Jain <[email protected]> * Optimize inner hits count calculation method Signed-off-by: Varun Jain <[email protected]> --------- Signed-off-by: Varun Jain <[email protected]>
1 parent 1b47f0e commit c99adc7

File tree

13 files changed

+1096
-159
lines changed

13 files changed

+1096
-159
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
1313
- Optimize embedding generation in Text Embedding Processor ([#1191](https://github.com/opensearch-project/neural-search/pull/1191))
1414
- Optimize embedding generation in Sparse Encoding Processor ([#1246](https://github.com/opensearch-project/neural-search/pull/1246))
1515
- Optimize embedding generation in Text/Image Embedding Processor ([#1249](https://github.com/opensearch-project/neural-search/pull/1249))
16+
- Inner hits support with hybrid query ([#1253](https://github.com/opensearch-project/neural-search/pull/1253))
1617

1718
### Enhancements
1819

build.gradle

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -285,6 +285,7 @@ dependencies {
285285
testFixturesCompileOnly group: 'com.google.guava', name: 'guava', version:'32.1.3-jre'
286286
testFixturesImplementation fileTree(dir: knnJarDirectory, include: ["opensearch-knn-${opensearch_build}.jar", "remote-index-build-client-${opensearch_build}.jar"])
287287
testImplementation fileTree(dir: knnJarDirectory, include: ["opensearch-knn-${opensearch_build}.jar", "remote-index-build-client-${opensearch_build}.jar"])
288+
testImplementation "org.opensearch.plugin:parent-join-client:${opensearch_version}"
288289
}
289290

290291
// In order to add the jar to the classpath, we need to unzip the

src/main/java/org/opensearch/neuralsearch/query/HybridQueryBuilder.java

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
import java.util.List;
1111
import java.util.ListIterator;
1212
import java.util.Locale;
13+
import java.util.Map;
1314
import java.util.Objects;
1415
import java.util.stream.Collectors;
1516

@@ -25,6 +26,7 @@
2526
import org.opensearch.core.xcontent.XContentParser;
2627
import org.opensearch.index.IndexSettings;
2728
import org.opensearch.index.query.AbstractQueryBuilder;
29+
import org.opensearch.index.query.InnerHitContextBuilder;
2830
import org.opensearch.index.query.QueryBuilder;
2931
import org.opensearch.index.query.QueryRewriteContext;
3032
import org.opensearch.index.query.QueryShardContext;
@@ -392,4 +394,19 @@ public void visit(QueryBuilderVisitor visitor) {
392394
subQueryBuilder.visit(subVisitor);
393395
}
394396
}
397+
398+
/**
399+
* Extracts the inner hits from the hybrid query tree structure.
400+
* While it extracts inner hits, child inner hits are inlined into the inner hit builder they belong to.
401+
* This implementation handles inner hits for all sub-queries within the hybrid query.
402+
*
403+
* @param innerHits the map to collect inner hit contexts, where the key is the inner hit name
404+
* and the value is the corresponding inner hit context builder
405+
*/
406+
@Override
407+
protected void extractInnerHitBuilders(Map<String, InnerHitContextBuilder> innerHits) {
408+
for (QueryBuilder queryBuilder : queries) {
409+
InnerHitContextBuilder.extractInnerHits(queryBuilder, innerHits);
410+
}
411+
}
395412
}

src/test/java/org/opensearch/neuralsearch/query/HybridQueryAggregationsIT.java

Lines changed: 51 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -482,6 +482,7 @@ private void prepareResourcesForNestegAggregationsScenario(String index) throws
482482
index,
483483
buildIndexConfiguration(
484484
List.of(new KNNFieldConfig("location", 2, TEST_SPACE_TYPE)),
485+
Map.of(),
485486
List.of(),
486487
List.of(),
487488
List.of(FLOAT_FIELD_NAME_IMDB),
@@ -701,105 +702,123 @@ private void initializeIndexIfNotExist(String indexName) throws IOException {
701702
&& !indexExists(TEST_MULTI_DOC_INDEX_WITH_TEXT_AND_INT_MULTIPLE_SHARDS)) {
702703
createIndexWithConfiguration(
703704
indexName,
704-
buildIndexConfiguration(List.of(), List.of(), List.of(INTEGER_FIELD_1), List.of(KEYWORD_FIELD_1), List.of(DATE_FIELD_1), 3),
705+
buildIndexConfiguration(List.of(), Map.of(), List.of(INTEGER_FIELD_1), List.of(KEYWORD_FIELD_1), List.of(DATE_FIELD_1), 3),
705706
""
706707
);
707708

708-
addKnnDoc(
709+
indexTheDocument(
709710
indexName,
710711
"1",
711712
List.of(),
712713
List.of(),
713714
Collections.singletonList(TEST_TEXT_FIELD_NAME_1),
714715
Collections.singletonList(TEST_DOC_TEXT1),
715716
List.of(),
716-
List.of(),
717+
Map.of(),
717718
List.of(INTEGER_FIELD_1, INTEGER_FIELD_PRICE),
718719
List.of(INTEGER_FIELD_1_VALUE, INTEGER_FIELD_PRICE_1_VALUE),
719720
List.of(KEYWORD_FIELD_1),
720721
List.of(KEYWORD_FIELD_1_VALUE),
721722
List.of(DATE_FIELD_1),
722-
List.of(DATE_FIELD_1_VALUE)
723+
List.of(DATE_FIELD_1_VALUE),
724+
List.of(),
725+
List.of(),
726+
null
723727
);
724-
addKnnDoc(
728+
indexTheDocument(
725729
indexName,
726730
"2",
727731
List.of(),
728732
List.of(),
729733
Collections.singletonList(TEST_TEXT_FIELD_NAME_1),
730734
Collections.singletonList(TEST_DOC_TEXT3),
731735
List.of(),
732-
List.of(),
736+
Map.of(),
733737
List.of(INTEGER_FIELD_1, INTEGER_FIELD_PRICE),
734738
List.of(INTEGER_FIELD_2_VALUE, INTEGER_FIELD_PRICE_2_VALUE),
735739
List.of(),
736740
List.of(),
737741
List.of(DATE_FIELD_1),
738-
List.of(DATE_FIELD_2_VALUE)
742+
List.of(DATE_FIELD_2_VALUE),
743+
List.of(),
744+
List.of(),
745+
null
739746
);
740-
addKnnDoc(
747+
indexTheDocument(
741748
indexName,
742749
"3",
743750
List.of(),
744751
List.of(),
745752
Collections.singletonList(TEST_TEXT_FIELD_NAME_1),
746753
Collections.singletonList(TEST_DOC_TEXT2),
747754
List.of(),
748-
List.of(),
755+
Map.of(),
749756
List.of(INTEGER_FIELD_PRICE),
750757
List.of(INTEGER_FIELD_PRICE_3_VALUE),
751758
List.of(KEYWORD_FIELD_1),
752759
List.of(KEYWORD_FIELD_2_VALUE),
753760
List.of(DATE_FIELD_1),
754-
List.of(DATE_FIELD_3_VALUE)
761+
List.of(DATE_FIELD_3_VALUE),
762+
List.of(),
763+
List.of(),
764+
null
755765
);
756-
addKnnDoc(
766+
indexTheDocument(
757767
indexName,
758768
"4",
759769
List.of(),
760770
List.of(),
761771
Collections.singletonList(TEST_TEXT_FIELD_NAME_1),
762772
Collections.singletonList(TEST_DOC_TEXT4),
763773
List.of(),
764-
List.of(),
774+
Map.of(),
765775
List.of(INTEGER_FIELD_1, INTEGER_FIELD_PRICE),
766776
List.of(INTEGER_FIELD_3_VALUE, INTEGER_FIELD_PRICE_4_VALUE),
767777
List.of(KEYWORD_FIELD_1),
768778
List.of(KEYWORD_FIELD_3_VALUE),
769779
List.of(DATE_FIELD_1),
770-
List.of(DATE_FIELD_2_VALUE)
780+
List.of(DATE_FIELD_2_VALUE),
781+
List.of(),
782+
List.of(),
783+
null
771784
);
772-
addKnnDoc(
785+
indexTheDocument(
773786
indexName,
774787
"5",
775788
List.of(),
776789
List.of(),
777790
Collections.singletonList(TEST_TEXT_FIELD_NAME_1),
778791
Collections.singletonList(TEST_DOC_TEXT5),
779792
List.of(),
780-
List.of(),
793+
Map.of(),
781794
List.of(INTEGER_FIELD_1, INTEGER_FIELD_PRICE),
782795
List.of(INTEGER_FIELD_3_VALUE, INTEGER_FIELD_PRICE_5_VALUE),
783796
List.of(KEYWORD_FIELD_1),
784797
List.of(KEYWORD_FIELD_4_VALUE),
785798
List.of(DATE_FIELD_1),
786-
List.of(DATE_FIELD_4_VALUE)
799+
List.of(DATE_FIELD_4_VALUE),
800+
List.of(),
801+
List.of(),
802+
null
787803
);
788-
addKnnDoc(
804+
indexTheDocument(
789805
indexName,
790806
"6",
791807
List.of(),
792808
List.of(),
793809
Collections.singletonList(TEST_TEXT_FIELD_NAME_1),
794810
Collections.singletonList(TEST_DOC_TEXT6),
795811
List.of(),
796-
List.of(),
812+
Map.of(),
797813
List.of(INTEGER_FIELD_1, INTEGER_FIELD_PRICE),
798814
List.of(INTEGER_FIELD_4_VALUE, INTEGER_FIELD_PRICE_6_VALUE),
799815
List.of(KEYWORD_FIELD_1),
800816
List.of(KEYWORD_FIELD_4_VALUE),
801817
List.of(DATE_FIELD_1),
802-
List.of(DATE_FIELD_4_VALUE)
818+
List.of(DATE_FIELD_4_VALUE),
819+
List.of(),
820+
List.of(),
821+
null
803822
);
804823
}
805824
}
@@ -809,42 +828,48 @@ private void initializeIndexWithOneShardIfNotExists(String indexName) {
809828
if (!indexExists(indexName)) {
810829
createIndexWithConfiguration(
811830
indexName,
812-
buildIndexConfiguration(List.of(), List.of(), List.of(INTEGER_FIELD_1), List.of(KEYWORD_FIELD_1), List.of(), 1),
831+
buildIndexConfiguration(List.of(), Map.of(), List.of(INTEGER_FIELD_1), List.of(KEYWORD_FIELD_1), List.of(), 1),
813832
""
814833
);
815834

816-
addKnnDoc(
835+
indexTheDocument(
817836
indexName,
818837
"1",
819838
List.of(),
820839
List.of(),
821840
Collections.singletonList(TEST_TEXT_FIELD_NAME_1),
822841
Collections.singletonList(TEST_DOC_TEXT1),
823842
List.of(),
824-
List.of(),
843+
Map.of(),
825844
List.of(INTEGER_FIELD_1),
826845
List.of(INTEGER_FIELD_1_VALUE),
827846
List.of(),
828847
List.of(),
829848
List.of(),
830-
List.of()
849+
List.of(),
850+
List.of(),
851+
List.of(),
852+
null
831853
);
832854

833-
addKnnDoc(
855+
indexTheDocument(
834856
indexName,
835857
"2",
836858
List.of(),
837859
List.of(),
838860
Collections.singletonList(TEST_TEXT_FIELD_NAME_1),
839861
Collections.singletonList(TEST_DOC_TEXT3),
840862
List.of(),
841-
List.of(),
863+
Map.of(),
842864
List.of(INTEGER_FIELD_1),
843865
List.of(INTEGER_FIELD_2_VALUE),
844866
List.of(),
845867
List.of(),
846868
List.of(),
847-
List.of()
869+
List.of(),
870+
List.of(),
871+
List.of(),
872+
null
848873
);
849874
}
850875
}

src/test/java/org/opensearch/neuralsearch/query/HybridQueryBuilderTests.java

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
import static org.opensearch.neuralsearch.query.NeuralQueryBuilder.MODEL_ID_FIELD;
1919
import static org.opensearch.neuralsearch.query.NeuralQueryBuilder.QUERY_TEXT_FIELD;
2020

21+
import java.util.HashMap;
2122
import java.util.HashSet;
2223
import java.util.Iterator;
2324
import java.util.List;
@@ -31,6 +32,7 @@
3132
import org.apache.lucene.search.MatchNoDocsQuery;
3233
import org.apache.lucene.search.Query;
3334
import org.apache.lucene.search.TermQuery;
35+
import org.apache.lucene.search.join.ScoreMode;
3436
import org.mockito.Mock;
3537
import org.mockito.MockitoAnnotations;
3638
import org.opensearch.Version;
@@ -56,8 +58,12 @@
5658
import org.opensearch.index.IndexSettings;
5759
import org.opensearch.index.mapper.MappedFieldType;
5860
import org.opensearch.index.mapper.TextFieldMapper;
61+
import org.opensearch.index.query.InnerHitBuilder;
62+
import org.opensearch.index.query.InnerHitContextBuilder;
5963
import org.opensearch.index.query.BoolQueryBuilder;
6064
import org.opensearch.index.query.MatchAllQueryBuilder;
65+
import org.opensearch.index.query.MatchQueryBuilder;
66+
import org.opensearch.index.query.NestedQueryBuilder;
6167
import org.opensearch.index.query.QueryBuilder;
6268
import org.opensearch.index.query.QueryBuilders;
6369
import org.opensearch.index.query.QueryShardContext;
@@ -1096,6 +1102,48 @@ public void testFilter() {
10961102
assertEquals(new MatchAllQueryBuilder(), updatedNeuralSparseQueryBuilder.filter().get(0));
10971103
}
10981104

1105+
public void testExtractInnerHitsBuilders() {
1106+
NestedQueryBuilder nestedQueryBuilder1 = new NestedQueryBuilder(
1107+
"path1",
1108+
new MatchQueryBuilder("testFieldName1", "testValue1"),
1109+
ScoreMode.Max
1110+
);
1111+
nestedQueryBuilder1.innerHit(new InnerHitBuilder());
1112+
NestedQueryBuilder nestedQueryBuilder2 = new NestedQueryBuilder(
1113+
"path2",
1114+
new MatchQueryBuilder("testFieldName2", "testValue2"),
1115+
ScoreMode.Max
1116+
);
1117+
HybridQueryBuilder hybridQueryBuilder = new HybridQueryBuilder().add(nestedQueryBuilder1).add(nestedQueryBuilder2);
1118+
Map<String, InnerHitContextBuilder> innerHitsMap = new HashMap<>();
1119+
hybridQueryBuilder.extractInnerHitBuilders(innerHitsMap);
1120+
assertEquals("path1", innerHitsMap.keySet().iterator().next());
1121+
assertEquals(1, innerHitsMap.size());
1122+
}
1123+
1124+
public void testExtractInnerHitsBuilders_whenMultipleInnerHitsOnSamePath_thenFail() {
1125+
InnerHitBuilder innerHitBuilder = new InnerHitBuilder();
1126+
NestedQueryBuilder nestedQueryBuilder1 = new NestedQueryBuilder(
1127+
"path1",
1128+
new MatchQueryBuilder("testFieldName1", "testValue1"),
1129+
ScoreMode.Max
1130+
);
1131+
nestedQueryBuilder1.innerHit(innerHitBuilder);
1132+
NestedQueryBuilder nestedQueryBuilder2 = new NestedQueryBuilder(
1133+
"path1",
1134+
new MatchQueryBuilder("testFieldName1", "testValue2"),
1135+
ScoreMode.Max
1136+
);
1137+
nestedQueryBuilder2.innerHit(innerHitBuilder);
1138+
HybridQueryBuilder hybridQueryBuilder = new HybridQueryBuilder().add(nestedQueryBuilder1).add(nestedQueryBuilder2);
1139+
Map<String, InnerHitContextBuilder> innerHitsMap = new HashMap<>();
1140+
IllegalArgumentException e = expectThrows(
1141+
IllegalArgumentException.class,
1142+
() -> hybridQueryBuilder.extractInnerHitBuilders(innerHitsMap)
1143+
);
1144+
assertEquals("[inner_hits] already contains an entry for key [path1]", e.getMessage());
1145+
}
1146+
10991147
private Map<String, Object> getInnerMap(Object innerObject, String queryName, String fieldName) {
11001148
if (!(innerObject instanceof Map)) {
11011149
fail("field name does not map to nested object");

src/test/java/org/opensearch/neuralsearch/query/HybridQueryExplainIT.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -858,7 +858,7 @@ private void initializeIndexIfNotExist(String indexName) {
858858
indexName,
859859
buildIndexConfiguration(
860860
Collections.singletonList(new KNNFieldConfig(TEST_KNN_VECTOR_FIELD_NAME_1, TEST_DIMENSION, TEST_SPACE_TYPE)),
861-
List.of(TEST_NESTED_TYPE_FIELD_NAME_1),
861+
Map.of(TEST_NESTED_TYPE_FIELD_NAME_1, Map.of()),
862862
1
863863
),
864864
""
@@ -871,7 +871,7 @@ private void initializeIndexIfNotExist(String indexName) {
871871
indexName,
872872
buildIndexConfiguration(
873873
Collections.singletonList(new KNNFieldConfig(TEST_KNN_VECTOR_FIELD_NAME_1, TEST_DIMENSION, TEST_SPACE_TYPE)),
874-
List.of(),
874+
Map.of(),
875875
1
876876
),
877877
""

0 commit comments

Comments
 (0)