Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
import org.elasticsearch.common.lucene.Lucene;
import org.elasticsearch.common.lucene.search.Queries;
import org.elasticsearch.common.lucene.search.TopDocsAndMaxScore;
import org.elasticsearch.core.CheckedFunction;
import org.elasticsearch.index.mapper.NestedObjectMapper;
import org.elasticsearch.index.search.ESToParentBlockJoinQuery;
import org.elasticsearch.index.search.NestedHelper;
Expand Down Expand Up @@ -260,6 +261,26 @@ protected int doHashCode() {

@Override
protected Query doToQuery(SearchExecutionContext context) throws IOException {
return toQuery((this.query::toQuery), path, scoreMode, ignoreUnmapped, context);
}

/**
* Returns the primitive Lucene query for a nested query given the primitive query to wrap
* @param <E> exception that the queryProvider may throw
* @param queryProvider Retrieves tye query to use given the SearchExecutionContext
* @param path nested path
* @param scoreMode score mode to use
* @param ignoreUnmapped whether to ignore unmapped fields
* @param context search execution context
* @return the primitive Lucene query
*/
public static <E extends Exception> Query toQuery(
CheckedFunction<SearchExecutionContext, Query, E> queryProvider,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Great CheckedFunction usage!

String path,
ScoreMode scoreMode,
boolean ignoreUnmapped,
SearchExecutionContext context
) throws E {
if (context.allowExpensiveQueries() == false) {
throw new ElasticsearchException(
"[joining] queries cannot be executed when '" + ALLOW_EXPENSIVE_QUERIES.getKey() + "' is set to false."
Expand All @@ -285,7 +306,7 @@ protected Query doToQuery(SearchExecutionContext context) throws IOException {

try {
context.nestedScope().nextLevel(mapper);
innerQuery = this.query.toQuery(context);
innerQuery = queryProvider.apply(context);
} finally {
context.nestedScope().previousLevel();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

package org.elasticsearch.xpack.inference.mapper;

import org.apache.lucene.search.MatchNoDocsQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.join.BitSetProducer;
import org.apache.lucene.search.join.ScoreMode;
Expand Down Expand Up @@ -352,6 +353,21 @@ public Query termQuery(Object value, SearchExecutionContext context) {
throw new IllegalArgumentException(CONTENT_TYPE + " fields do not support term query");
}

@Override
public Query existsQuery(SearchExecutionContext context) {
if (getEmbeddingsField() == null) {
return new MatchNoDocsQuery();
}

return NestedQueryBuilder.toQuery(
(c -> getEmbeddingsField().fieldType().existsQuery(c)),
getChunksFieldName(name()),
ScoreMode.None,
false,
context
);
}

@Override
public ValueFetcher valueFetcher(SearchExecutionContext context, String format) {
// Redirect the fetcher to load the original values of the field
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchNoDocsQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
Expand Down Expand Up @@ -42,6 +43,7 @@
import org.elasticsearch.index.mapper.SourceToParse;
import org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper;
import org.elasticsearch.index.mapper.vectors.SparseVectorFieldMapper;
import org.elasticsearch.index.query.SearchExecutionContext;
import org.elasticsearch.index.search.ESToParentBlockJoinQuery;
import org.elasticsearch.inference.Model;
import org.elasticsearch.inference.SimilarityMeasure;
Expand Down Expand Up @@ -180,36 +182,10 @@ public void testDynamicUpdate() throws IOException {
final String fieldName = "semantic";
final String inferenceId = "test_service";

MapperService mapperService = createMapperService(mapping(b -> {}));
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I extracted this as a method to enable updating the mapping to check the actual exists queries created

mapperService.merge(
"_doc",
new CompressedXContent(
Strings.toString(PutMappingRequest.simpleMapping(fieldName, "type=semantic_text,inference_id=" + inferenceId))
),
MapperService.MergeReason.MAPPING_UPDATE
);

SemanticTextField semanticTextField = new SemanticTextField(
MapperService mapperService = mapperServiceForFieldWithModelSettings(
fieldName,
List.of(),
new SemanticTextField.InferenceResult(
inferenceId,
new SemanticTextField.ModelSettings(TaskType.SPARSE_EMBEDDING, null, null, null),
List.of()
),
XContentType.JSON
);
XContentBuilder builder = JsonXContent.contentBuilder().startObject();
builder.field(semanticTextField.fieldName());
builder.value(semanticTextField);
builder.endObject();

SourceToParse sourceToParse = new SourceToParse("test", BytesReference.bytes(builder), XContentType.JSON);
ParsedDocument parsedDocument = mapperService.documentMapper().parse(sourceToParse);
mapperService.merge(
"_doc",
parsedDocument.dynamicMappingsUpdate().toCompressedXContent(),
MapperService.MergeReason.MAPPING_UPDATE
inferenceId,
new SemanticTextField.ModelSettings(TaskType.SPARSE_EMBEDDING, null, null, null)
);
assertSemanticTextField(mapperService, fieldName, true);
}
Expand Down Expand Up @@ -565,6 +541,51 @@ private MapperService mapperServiceForFieldWithModelSettings(
return mapperService;
}

public void testExistsQuerySparseVector() throws IOException {
final String fieldName = "semantic";
final String inferenceId = "test_service";

MapperService mapperService = mapperServiceForFieldWithModelSettings(
fieldName,
inferenceId,
new SemanticTextField.ModelSettings(TaskType.SPARSE_EMBEDDING, null, null, null)
);

Mapper mapper = mapperService.mappingLookup().getMapper(fieldName);
assertNotNull(mapper);
SearchExecutionContext searchExecutionContext = createSearchExecutionContext(mapperService);
Query existsQuery = ((SemanticTextFieldMapper) mapper).fieldType().existsQuery(searchExecutionContext);
assertThat(existsQuery, instanceOf(ESToParentBlockJoinQuery.class));
}

public void testExistsQueryDenseVector() throws IOException {
final String fieldName = "semantic";
final String inferenceId = "test_service";

MapperService mapperService = mapperServiceForFieldWithModelSettings(
fieldName,
inferenceId,
new SemanticTextField.ModelSettings(
TaskType.TEXT_EMBEDDING,
1024,
SimilarityMeasure.COSINE,
DenseVectorFieldMapper.ElementType.FLOAT
)
);

Mapper mapper = mapperService.mappingLookup().getMapper(fieldName);
assertNotNull(mapper);
SearchExecutionContext searchExecutionContext = createSearchExecutionContext(mapperService);
Query existsQuery = ((SemanticTextFieldMapper) mapper).fieldType().existsQuery(searchExecutionContext);
assertThat(existsQuery, instanceOf(ESToParentBlockJoinQuery.class));
}

@Override
protected void assertExistsQuery(MappedFieldType fieldType, Query query, LuceneDocument fields) {
// Until a doc is indexed, the query is rewritten as match no docs
assertThat(query, instanceOf(MatchNoDocsQuery.class));
}

private static void addSemanticTextMapping(XContentBuilder mappingBuilder, String fieldName, String modelId) throws IOException {
mappingBuilder.startObject(fieldName);
mappingBuilder.field("type", SemanticTextFieldMapper.CONTENT_TYPE);
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
setup:
- requires:
cluster_features: "gte_v8.15.0"
reason: semantic_text introduced in 8.15.0

- do:
inference.put:
task_type: sparse_embedding
inference_id: sparse-inference-id
body: >
{
"service": "test_service",
"service_settings": {
"model": "my_model",
"api_key": "abc64"
},
"task_settings": {
}
}

- do:
inference.put:
task_type: text_embedding
inference_id: dense-inference-id
body: >
{
"service": "text_embedding_test_service",
"service_settings": {
"model": "my_model",
"dimensions": 10,
"api_key": "abc64",
"similarity": "COSINE"
},
"task_settings": {
}
}

- do:
indices.create:
index: test-sparse-index
body:
mappings:
properties:
inference_field:
type: semantic_text
inference_id: sparse-inference-id

- do:
indices.create:
index: test-dense-index
body:
mappings:
properties:
inference_field:
type: semantic_text
inference_id: dense-inference-id

---
"Exists query with no indexed documents":
- do:
search:
index: test-sparse-index
body:
query:
exists:
field: "inference_field"

- match: { hits.total.value: 0 }

---
"Exists query with null indexed documents":
- do:
index:
index: test-sparse-index
id: doc
body:
inference_field: null
refresh: true

- do:
search:
index: test-sparse-index
body:
query:
exists:
field: "inference_field"

- match: { hits.total.value: 0 }

- do:
index:
index: test-dense-index
id: doc
body:
inference_field: null
refresh: true

- do:
search:
index: test-dense-index
body:
query:
exists:
field: "inference_field"

- match: { hits.total.value: 0 }

---
"Exists query with indexed documents":
- do:
index:
index: test-sparse-index
id: doc
body:
inference_field: "hello world"
refresh: true

- do:
search:
index: test-sparse-index
body:
query:
exists:
field: "inference_field"

- match: { hits.total.value: 1 }

- do:
index:
index: test-dense-index
id: doc
body:
inference_field: "hello world"
refresh: true

- do:
search:
index: test-dense-index
body:
query:
exists:
field: "inference_field"

- match: { hits.total.value: 1 }