diff --git a/docs/changelog/128735.yaml b/docs/changelog/128735.yaml new file mode 100644 index 0000000000000..33ea2e4e97d91 --- /dev/null +++ b/docs/changelog/128735.yaml @@ -0,0 +1,5 @@ +pr: 128735 +summary: Add option to include or exclude vectors from `_source` retrieval +area: Vector Search +type: feature +issues: [] diff --git a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/230_source_exclude_vectors.yml b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/230_source_exclude_vectors.yml new file mode 100644 index 0000000000000..f8c3835ac6b31 --- /dev/null +++ b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/230_source_exclude_vectors.yml @@ -0,0 +1,225 @@ +setup: + - requires: + reason: 'exclude_vectors option is required' + test_runner_features: [ capabilities ] + capabilities: + - method: GET + path: /_search + capabilities: [ exclude_vectors_param ] + - skip: + features: "headers" + + - do: + indices.create: + index: test + body: + mappings: + properties: + name: + type: keyword + sparse_vector: + type: sparse_vector + vector: + type: dense_vector + dims: 5 + similarity: l2_norm + + nested: + type: nested + properties: + paragraph_id: + type: keyword + vector: + type: dense_vector + dims: 5 + similarity: l2_norm + sparse_vector: + type: sparse_vector + + - do: + index: + index: test + id: "1" + body: + name: cow.jpg + vector: [36, 267, -311, 12, -202] + + - do: + index: + index: test + id: "2" + body: + name: moose.jpg + nested: + - paragraph_id: 0 + vector: [-0.5, 100.0, -13, 14.8, -156.0] + - paragraph_id: 2 + vector: [0, 100.0, 0, 14.8, -156.0] + - paragraph_id: 3 + vector: [0, 1.0, 0, 1.8, -15.0] + + - do: + index: + index: test + id: "3" + body: + name: rabbit.jpg + vector: [-0.5, 100.0, -13, 14.8, -156.0] + sparse_vector: + running: 3 + good: 17 + run: 22 + + - do: + index: + index: test + id: "4" + body: + name: zoolander.jpg + nested: + - paragraph_id: 0 + vector: [ -0.5, 100.0, -13, 14.8, -156.0 ] + sparse_vector: + running: 3 + good: 17 + run: 22 + - paragraph_id: 1 + sparse_vector: + modeling: 32 + model: 20 + mode: 54 + - paragraph_id: 2 + vector: [ -9.8, 109, 32, 14.8, 23 ] + + + - do: + indices.refresh: {} + +--- +"exclude vectors": + - do: + search: + index: test + body: + _source: + exclude_vectors: true + sort: ["name"] + + - match: { hits.hits.0._id: "1"} + - match: { hits.hits.0._source.name: "cow.jpg"} + - not_exists: hits.hits.0._source.vector + + - match: { hits.hits.1._id: "2"} + - match: { hits.hits.1._source.name: "moose.jpg"} + - length: { hits.hits.1._source.nested: 3 } + - not_exists: hits.hits.1._source.nested.0.vector + - match: { hits.hits.1._source.nested.0.paragraph_id: 0 } + - not_exists: hits.hits.1._source.nested.1.vector + - match: { hits.hits.1._source.nested.1.paragraph_id: 2 } + - not_exists: hits.hits.1._source.nested.2.vector + - match: { hits.hits.1._source.nested.2.paragraph_id: 3 } + + - match: { hits.hits.2._id: "3" } + - match: { hits.hits.2._source.name: "rabbit.jpg" } + - not_exists: hits.hits.2._source.vector + - not_exists: hits.hits.2._source.sparse_vector + + - match: { hits.hits.3._id: "4" } + - match: { hits.hits.3._source.name: "zoolander.jpg" } + - length: { hits.hits.3._source.nested: 3 } + - not_exists: hits.hits.3._source.nested.0.vector + - not_exists: hits.hits.3._source.nested.0.sparse_vector + - match: { hits.hits.3._source.nested.0.paragraph_id: 0 } + - not_exists: hits.hits.3._source.nested.1.sparse_vector + - match: { hits.hits.3._source.nested.1.paragraph_id: 1 } + - not_exists: hits.hits.3._source.nested.2.vector + - match: { hits.hits.3._source.nested.2.paragraph_id: 2 } + +--- +"include vectors": + - do: + search: + index: test + body: + _source: + exclude_vectors: false + sort: ["name"] + + - match: { hits.hits.0._id: "1"} + - match: { hits.hits.0._source.name: "cow.jpg"} + - exists: hits.hits.0._source.vector + + - match: { hits.hits.1._id: "2"} + - match: { hits.hits.1._source.name: "moose.jpg"} + - length: { hits.hits.1._source.nested: 3 } + - exists: hits.hits.1._source.nested.0.vector + - match: { hits.hits.1._source.nested.0.paragraph_id: 0 } + - exists: hits.hits.1._source.nested.1.vector + - match: { hits.hits.1._source.nested.1.paragraph_id: 2 } + - exists: hits.hits.1._source.nested.2.vector + - match: { hits.hits.1._source.nested.2.paragraph_id: 3 } + + - match: { hits.hits.2._id: "3" } + - match: { hits.hits.2._source.name: "rabbit.jpg" } + - exists: hits.hits.2._source.vector + - exists: hits.hits.2._source.sparse_vector + + - match: { hits.hits.3._id: "4" } + - match: { hits.hits.3._source.name: "zoolander.jpg" } + - length: { hits.hits.3._source.nested: 3 } + - exists: hits.hits.3._source.nested.0.vector + - exists: hits.hits.3._source.nested.0.sparse_vector + - match: { hits.hits.3._source.nested.0.paragraph_id: 0 } + - exists: hits.hits.3._source.nested.1.sparse_vector + - match: { hits.hits.3._source.nested.1.paragraph_id: 1 } + - exists: hits.hits.3._source.nested.2.vector + - match: { hits.hits.3._source.nested.2.paragraph_id: 2 } + +--- +"exclude vectors with fields": + - do: + search: + index: test + body: + _source: + exclude_vectors: true + sort: ["name"] + fields: [vector, sparse_vector, nested.*] + + - match: { hits.hits.0._id: "1"} + - match: { hits.hits.0._source.name: "cow.jpg"} + - not_exists: hits.hits.0._source.vector + - exists: hits.hits.0.fields.vector + + - match: { hits.hits.1._id: "2"} + - match: { hits.hits.1._source.name: "moose.jpg"} + - length: { hits.hits.1._source.nested: 3 } + - not_exists: hits.hits.1._source.nested.0.vector + - match: { hits.hits.1._source.nested.0.paragraph_id: 0 } + - not_exists: hits.hits.1._source.nested.1.vector + - match: { hits.hits.1._source.nested.1.paragraph_id: 2 } + - not_exists: hits.hits.1._source.nested.2.vector + - match: { hits.hits.1._source.nested.2.paragraph_id: 3 } + + - match: { hits.hits.2._id: "3" } + - match: { hits.hits.2._source.name: "rabbit.jpg" } + - not_exists: hits.hits.2._source.vector + - exists: hits.hits.2.fields.vector + - not_exists: hits.hits.2._source.sparse_vector + - exists: hits.hits.2.fields.sparse_vector + + + - match: { hits.hits.3._id: "4" } + - match: { hits.hits.3._source.name: "zoolander.jpg" } + - length: { hits.hits.3._source.nested: 3 } + - not_exists: hits.hits.3._source.nested.0.vector + - exists: hits.hits.3.fields.nested.0.vector + - not_exists: hits.hits.3._source.nested.0.sparse_vector + - match: { hits.hits.3._source.nested.0.paragraph_id: 0 } + - exists: hits.hits.3.fields.nested.0.sparse_vector + - not_exists: hits.hits.3._source.nested.1.sparse_vector + - match: { hits.hits.3._source.nested.1.paragraph_id: 1 } + - exists: hits.hits.3.fields.nested.1.sparse_vector + - not_exists: hits.hits.3._source.nested.2.vector + - match: { hits.hits.3._source.nested.2.paragraph_id: 2 } + - exists: hits.hits.3.fields.nested.2.vector diff --git a/server/src/main/java/org/elasticsearch/TransportVersions.java b/server/src/main/java/org/elasticsearch/TransportVersions.java index 87ae0f2ae0fa0..088ae350beb4e 100644 --- a/server/src/main/java/org/elasticsearch/TransportVersions.java +++ b/server/src/main/java/org/elasticsearch/TransportVersions.java @@ -236,6 +236,8 @@ static TransportVersion def(int id) { public static final TransportVersion ILM_ADD_SKIP_SETTING_8_19 = def(8_841_0_43); public static final TransportVersion ESQL_REGEX_MATCH_WITH_CASE_INSENSITIVITY_8_19 = def(8_841_0_44); public static final TransportVersion ESQL_QUERY_PLANNING_DURATION_8_19 = def(8_841_0_45); + public static final TransportVersion SEARCH_SOURCE_EXCLUDE_VECTORS_PARAM_8_19 = def(8_841_0_46); + /* * STOP! READ THIS FIRST! No, really, * ____ _____ ___ ____ _ ____ _____ _ ____ _____ _ _ ___ ____ _____ ___ ____ ____ _____ _ diff --git a/server/src/main/java/org/elasticsearch/index/mapper/MappedFieldType.java b/server/src/main/java/org/elasticsearch/index/mapper/MappedFieldType.java index 46eaf7b045cea..ed750f4515473 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/MappedFieldType.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/MappedFieldType.java @@ -199,6 +199,15 @@ public boolean isDimension() { return false; } + /** + * Vector embeddings are typically large and not intended for human consumption, so such fields may be excluded from responses. + * + * @return true if this field contains vector embeddings. + */ + public boolean isVectorEmbedding() { + return false; + } + /** * @return true if field has script values. */ diff --git a/server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java index 87254a8fc384c..0e12fc9af9243 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java @@ -2113,6 +2113,11 @@ public boolean isAggregatable() { return false; } + @Override + public boolean isVectorEmbedding() { + return true; + } + @Override public IndexFieldData.Builder fielddataBuilder(FieldDataContext fieldDataContext) { return elementType.fielddataBuilder(this, fieldDataContext); diff --git a/server/src/main/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldMapper.java index 025a5c5ae2b72..bb81fc3466a26 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldMapper.java @@ -121,6 +121,11 @@ public String typeName() { return CONTENT_TYPE; } + @Override + public boolean isVectorEmbedding() { + return true; + } + @Override public IndexFieldData.Builder fielddataBuilder(FieldDataContext fieldDataContext) { throw new IllegalArgumentException("[sparse_vector] fields do not support sorting, scripting or aggregating"); diff --git a/server/src/main/java/org/elasticsearch/rest/action/search/SearchCapabilities.java b/server/src/main/java/org/elasticsearch/rest/action/search/SearchCapabilities.java index 23dac0fad43a6..9dc25fa59f7d6 100644 --- a/server/src/main/java/org/elasticsearch/rest/action/search/SearchCapabilities.java +++ b/server/src/main/java/org/elasticsearch/rest/action/search/SearchCapabilities.java @@ -46,8 +46,8 @@ private SearchCapabilities() {} private static final String INDEX_SELECTOR_SYNTAX = "index_expression_selectors"; private static final String SIGNIFICANT_TERMS_BACKGROUND_FILTER_AS_SUB = "significant_terms_background_filter_as_sub"; - private static final String SIGNIFICANT_TERMS_ON_NESTED_FIELDS = "significant_terms_on_nested_fields"; + private static final String EXCLUDE_VECTORS_PARAM = "exclude_vectors_param"; public static final Set CAPABILITIES; static { @@ -67,6 +67,7 @@ private SearchCapabilities() {} capabilities.add(INDEX_SELECTOR_SYNTAX); capabilities.add(SIGNIFICANT_TERMS_BACKGROUND_FILTER_AS_SUB); capabilities.add(SIGNIFICANT_TERMS_ON_NESTED_FIELDS); + capabilities.add(EXCLUDE_VECTORS_PARAM); CAPABILITIES = Set.copyOf(capabilities); } } diff --git a/server/src/main/java/org/elasticsearch/search/fetch/FetchContext.java b/server/src/main/java/org/elasticsearch/search/fetch/FetchContext.java index 4bddd6ec2e906..7f1aa9fcf13c8 100644 --- a/server/src/main/java/org/elasticsearch/search/fetch/FetchContext.java +++ b/server/src/main/java/org/elasticsearch/search/fetch/FetchContext.java @@ -69,7 +69,9 @@ private static FetchSourceContext buildFetchSourceContext(SearchContext in) { if (sfc != null && sfc.fetchFields()) { for (String field : sfc.fieldNames()) { if (SourceFieldMapper.NAME.equals(field)) { - fsc = fsc == null ? FetchSourceContext.of(true) : FetchSourceContext.of(true, fsc.includes(), fsc.excludes()); + fsc = fsc == null + ? FetchSourceContext.of(true) + : FetchSourceContext.of(true, fsc.excludeVectors(), fsc.includes(), fsc.excludes()); } } } diff --git a/server/src/main/java/org/elasticsearch/search/fetch/FetchPhase.java b/server/src/main/java/org/elasticsearch/search/fetch/FetchPhase.java index 47d544c748b3f..f750a83d8f61d 100644 --- a/server/src/main/java/org/elasticsearch/search/fetch/FetchPhase.java +++ b/server/src/main/java/org/elasticsearch/search/fetch/FetchPhase.java @@ -13,9 +13,12 @@ import org.apache.logging.log4j.Logger; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.search.TotalHits; +import org.apache.lucene.util.automaton.CharacterRunAutomaton; +import org.elasticsearch.common.regex.Regex; import org.elasticsearch.index.fieldvisitor.LeafStoredFieldLoader; import org.elasticsearch.index.fieldvisitor.StoredFieldLoader; import org.elasticsearch.index.mapper.IdLoader; +import org.elasticsearch.index.mapper.MappedFieldType; import org.elasticsearch.index.mapper.SourceLoader; import org.elasticsearch.search.LeafNestedDocuments; import org.elasticsearch.search.NestedDocuments; @@ -24,10 +27,12 @@ import org.elasticsearch.search.SearchHits; import org.elasticsearch.search.SearchShardTarget; import org.elasticsearch.search.fetch.FetchSubPhase.HitContext; +import org.elasticsearch.search.fetch.subphase.FetchSourceContext; import org.elasticsearch.search.fetch.subphase.InnerHitsContext; import org.elasticsearch.search.fetch.subphase.InnerHitsPhase; import org.elasticsearch.search.internal.SearchContext; import org.elasticsearch.search.lookup.Source; +import org.elasticsearch.search.lookup.SourceFilter; import org.elasticsearch.search.lookup.SourceProvider; import org.elasticsearch.search.profile.ProfileResult; import org.elasticsearch.search.profile.Profilers; @@ -44,6 +49,7 @@ import java.util.List; import java.util.Map; import java.util.function.Supplier; +import java.util.stream.Collectors; /** * Fetch phase of a search request, used to fetch the actual top matching documents to be returned to the client, identified @@ -110,7 +116,13 @@ public Source getSource(LeafReaderContext ctx, int doc) { } private SearchHits buildSearchHits(SearchContext context, int[] docIdsToLoad, Profiler profiler, RankDocShardInfo rankDocs) { - SourceLoader sourceLoader = context.newSourceLoader(null); + // Optionally remove sparse and dense vector fields early to: + // - Reduce the in-memory size of the source + // - Speed up retrieval of the synthetic source + // Note: These vectors will no longer be accessible via _source for any sub-fetch processors, + // but they are typically accessed through doc values instead (e.g: re-scorer). + SourceFilter sourceFilter = maybeExcludeNonSemanticTextVectorFields(context); + SourceLoader sourceLoader = context.newSourceLoader(sourceFilter); FetchContext fetchContext = new FetchContext(context, sourceLoader); PreloadedSourceProvider sourceProvider = new PreloadedSourceProvider(); @@ -421,4 +433,70 @@ public String toString() { } }; } + + /** + * Determines whether vector fields should be excluded from the source based on the {@link FetchSourceContext}. + * Returns {@code true} if vector fields are explicitly marked to be excluded and {@code false} otherwise. + */ + private static boolean shouldExcludeVectorsFromSource(SearchContext context) { + if (context.fetchSourceContext() == null) { + return false; + } + return context.fetchSourceContext().excludeVectors() != null && context.fetchSourceContext().excludeVectors(); + } + + /** + * Returns a {@link SourceFilter} that excludes vector fields not associated with semantic text fields, + * unless vectors are explicitly requested to be included in the source. + * Returns {@code null} when vectors should not be filtered out. + */ + private static SourceFilter maybeExcludeNonSemanticTextVectorFields(SearchContext context) { + if (shouldExcludeVectorsFromSource(context) == false) { + return null; + } + var lookup = context.getSearchExecutionContext().getMappingLookup(); + var fetchFieldsAut = context.fetchFieldsContext() != null && context.fetchFieldsContext().fields().size() > 0 + ? new CharacterRunAutomaton( + Regex.simpleMatchToAutomaton(context.fetchFieldsContext().fields().stream().map(f -> f.field).toArray(String[]::new)) + ) + : null; + var inferenceFieldsAut = lookup.inferenceFields().size() > 0 + ? new CharacterRunAutomaton( + Regex.simpleMatchToAutomaton(lookup.inferenceFields().keySet().stream().map(f -> f + "*").toArray(String[]::new)) + ) + : null; + + List lateExcludes = new ArrayList<>(); + var excludes = lookup.getFullNameToFieldType().values().stream().filter(MappedFieldType::isVectorEmbedding).filter(f -> { + // Exclude the field specified by the `fields` option + if (fetchFieldsAut != null && fetchFieldsAut.run(f.name())) { + lateExcludes.add(f.name()); + return false; + } + // Exclude vectors from semantic text fields, as they are processed separately + return inferenceFieldsAut == null || inferenceFieldsAut.run(f.name()) == false; + }).map(f -> f.name()).collect(Collectors.toList()); + + if (lateExcludes.size() > 0) { + /** + * Adds the vector field specified by the `fields` option to the excludes list of the fetch source context. + * This ensures that vector fields are available to sub-fetch phases, but excluded during the {@link FetchSourcePhase}. + */ + if (context.fetchSourceContext() != null && context.fetchSourceContext().excludes() != null) { + for (var exclude : context.fetchSourceContext().excludes()) { + lateExcludes.add(exclude); + } + } + var fetchSourceContext = context.fetchSourceContext() == null + ? FetchSourceContext.of(true, false, null, lateExcludes.toArray(String[]::new)) + : FetchSourceContext.of( + context.fetchSourceContext().fetchSource(), + context.fetchSourceContext().excludeVectors(), + context.fetchSourceContext().includes(), + lateExcludes.toArray(String[]::new) + ); + context.fetchSourceContext(fetchSourceContext); + } + return excludes.isEmpty() ? null : new SourceFilter(new String[] {}, excludes.toArray(String[]::new)); + } } diff --git a/server/src/main/java/org/elasticsearch/search/fetch/subphase/FetchSourceContext.java b/server/src/main/java/org/elasticsearch/search/fetch/subphase/FetchSourceContext.java index 0594fa4909783..943a698f756a2 100644 --- a/server/src/main/java/org/elasticsearch/search/fetch/subphase/FetchSourceContext.java +++ b/server/src/main/java/org/elasticsearch/search/fetch/subphase/FetchSourceContext.java @@ -9,6 +9,8 @@ package org.elasticsearch.search.fetch.subphase; +import org.elasticsearch.TransportVersion; +import org.elasticsearch.TransportVersions; import org.elasticsearch.common.ParsingException; import org.elasticsearch.common.Strings; import org.elasticsearch.common.io.stream.StreamInput; @@ -27,56 +29,85 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.List; +import java.util.Objects; /** * Context used to fetch the {@code _source}. */ public class FetchSourceContext implements Writeable, ToXContentObject { - + public static final ParseField EXCLUDE_VECTORS_FIELD = new ParseField("exclude_vectors"); public static final ParseField INCLUDES_FIELD = new ParseField("includes", "include"); public static final ParseField EXCLUDES_FIELD = new ParseField("excludes", "exclude"); - public static final FetchSourceContext FETCH_SOURCE = new FetchSourceContext(true, Strings.EMPTY_ARRAY, Strings.EMPTY_ARRAY); - public static final FetchSourceContext DO_NOT_FETCH_SOURCE = new FetchSourceContext(false, Strings.EMPTY_ARRAY, Strings.EMPTY_ARRAY); + public static final FetchSourceContext FETCH_SOURCE = new FetchSourceContext(true, null, Strings.EMPTY_ARRAY, Strings.EMPTY_ARRAY); + public static final FetchSourceContext DO_NOT_FETCH_SOURCE = new FetchSourceContext( + false, + null, + Strings.EMPTY_ARRAY, + Strings.EMPTY_ARRAY + ); private final boolean fetchSource; private final String[] includes; private final String[] excludes; + private final Boolean excludeVectors; public static FetchSourceContext of(boolean fetchSource) { return fetchSource ? FETCH_SOURCE : DO_NOT_FETCH_SOURCE; } public static FetchSourceContext of(boolean fetchSource, @Nullable String[] includes, @Nullable String[] excludes) { - if ((includes == null || includes.length == 0) && (excludes == null || excludes.length == 0)) { + return of(fetchSource, null, includes, excludes); + } + + public static FetchSourceContext of( + boolean fetchSource, + Boolean excludeVectors, + @Nullable String[] includes, + @Nullable String[] excludes + ) { + if (excludeVectors == null && (includes == null || includes.length == 0) && (excludes == null || excludes.length == 0)) { return of(fetchSource); } - return new FetchSourceContext(fetchSource, includes, excludes); + return new FetchSourceContext(fetchSource, excludeVectors, includes, excludes); + } + + private FetchSourceContext(boolean fetchSource, Boolean excludeVectors, @Nullable String[] includes, @Nullable String[] excludes) { + this.fetchSource = fetchSource; + this.excludeVectors = excludeVectors; + this.includes = includes == null ? Strings.EMPTY_ARRAY : includes; + this.excludes = excludes == null ? Strings.EMPTY_ARRAY : excludes; } public static FetchSourceContext readFrom(StreamInput in) throws IOException { final boolean fetchSource = in.readBoolean(); + final Boolean excludeVectors = isVersionCompatibleWithExcludeVectors(in.getTransportVersion()) ? in.readOptionalBoolean() : null; final String[] includes = in.readStringArray(); final String[] excludes = in.readStringArray(); - return of(fetchSource, includes, excludes); - } - - private FetchSourceContext(boolean fetchSource, @Nullable String[] includes, @Nullable String[] excludes) { - this.fetchSource = fetchSource; - this.includes = includes == null ? Strings.EMPTY_ARRAY : includes; - this.excludes = excludes == null ? Strings.EMPTY_ARRAY : excludes; + return of(fetchSource, excludeVectors, includes, excludes); } @Override public void writeTo(StreamOutput out) throws IOException { out.writeBoolean(fetchSource); + if (isVersionCompatibleWithExcludeVectors(out.getTransportVersion())) { + out.writeOptionalBoolean(excludeVectors); + } out.writeStringArray(includes); out.writeStringArray(excludes); } + private static boolean isVersionCompatibleWithExcludeVectors(TransportVersion version) { + return version.isPatchFrom(TransportVersions.SEARCH_SOURCE_EXCLUDE_VECTORS_PARAM_8_19); + } + public boolean fetchSource() { return this.fetchSource; } + public Boolean excludeVectors() { + return this.excludeVectors; + } + public String[] includes() { return this.includes; } @@ -135,6 +166,7 @@ public static FetchSourceContext fromXContent(XContentParser parser) throws IOEx XContentParser.Token token = parser.currentToken(); boolean fetchSource = true; + Boolean excludeVectors = null; String[] includes = Strings.EMPTY_ARRAY; String[] excludes = Strings.EMPTY_ARRAY; if (token == XContentParser.Token.VALUE_BOOLEAN) { @@ -169,6 +201,18 @@ public static FetchSourceContext fromXContent(XContentParser parser) throws IOEx includes = new String[] { parser.text() }; } else if (EXCLUDES_FIELD.match(currentFieldName, parser.getDeprecationHandler())) { excludes = new String[] { parser.text() }; + } else if (EXCLUDE_VECTORS_FIELD.match(currentFieldName, parser.getDeprecationHandler())) { + excludeVectors = parser.booleanValue(); + } else { + throw new ParsingException( + parser.getTokenLocation(), + "Unknown key for a " + token + " in [" + currentFieldName + "].", + parser.getTokenLocation() + ); + } + } else if (token == XContentParser.Token.VALUE_BOOLEAN) { + if (EXCLUDE_VECTORS_FIELD.match(currentFieldName, parser.getDeprecationHandler())) { + excludeVectors = parser.booleanValue(); } else { throw new ParsingException( parser.getTokenLocation(), @@ -201,7 +245,7 @@ public static FetchSourceContext fromXContent(XContentParser parser) throws IOEx parser.getTokenLocation() ); } - return FetchSourceContext.of(fetchSource, includes, excludes); + return FetchSourceContext.of(fetchSource, excludeVectors, includes, excludes); } private static String[] parseStringArray(XContentParser parser, String currentFieldName) throws IOException { @@ -227,6 +271,9 @@ private static String[] parseStringArray(XContentParser parser, String currentFi public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { if (fetchSource) { builder.startObject(); + if (excludeVectors != null) { + builder.field(EXCLUDE_VECTORS_FIELD.getPreferredName(), excludeVectors); + } builder.array(INCLUDES_FIELD.getPreferredName(), includes); builder.array(EXCLUDES_FIELD.getPreferredName(), excludes); builder.endObject(); @@ -244,6 +291,7 @@ public boolean equals(Object o) { FetchSourceContext that = (FetchSourceContext) o; if (fetchSource != that.fetchSource) return false; + if (excludeVectors != that.excludeVectors) return false; if (Arrays.equals(excludes, that.excludes) == false) return false; if (Arrays.equals(includes, that.includes) == false) return false; @@ -252,7 +300,7 @@ public boolean equals(Object o) { @Override public int hashCode() { - int result = (fetchSource ? 1 : 0); + int result = Objects.hash(fetchSource, excludeVectors); result = 31 * result + Arrays.hashCode(includes); result = 31 * result + Arrays.hashCode(excludes); return result; diff --git a/server/src/test/java/org/elasticsearch/search/fetch/subphase/FetchSourceContextTests.java b/server/src/test/java/org/elasticsearch/search/fetch/subphase/FetchSourceContextTests.java index 3234f5a638680..e19567addb39f 100644 --- a/server/src/test/java/org/elasticsearch/search/fetch/subphase/FetchSourceContextTests.java +++ b/server/src/test/java/org/elasticsearch/search/fetch/subphase/FetchSourceContextTests.java @@ -35,6 +35,7 @@ protected Writeable.Reader instanceReader() { protected FetchSourceContext createTestInstance() { return FetchSourceContext.of( true, + randomBoolean() ? null : randomBoolean(), randomArray(0, 5, String[]::new, () -> randomAlphaOfLength(5)), randomArray(0, 5, String[]::new, () -> randomAlphaOfLength(5)) ); @@ -42,7 +43,27 @@ protected FetchSourceContext createTestInstance() { @Override protected FetchSourceContext mutateInstance(FetchSourceContext instance) { - return null;// TODO implement https://github.com/elastic/elasticsearch/issues/25929 + return switch (randomInt(2)) { + case 0 -> FetchSourceContext.of( + true, + instance.excludeVectors() != null ? instance.excludeVectors() == false : randomBoolean(), + instance.includes(), + instance.excludes() + ); + case 1 -> FetchSourceContext.of( + true, + instance.excludeVectors(), + randomArray(instance.includes().length + 1, instance.includes().length + 5, String[]::new, () -> randomAlphaOfLength(5)), + instance.excludes() + ); + case 2 -> FetchSourceContext.of( + true, + instance.excludeVectors(), + instance.includes(), + randomArray(instance.excludes().length + 1, instance.excludes().length + 5, String[]::new, () -> randomAlphaOfLength(5)) + ); + default -> throw new AssertionError("cannot reach"); + }; } public void testFromXContentException() throws IOException { diff --git a/x-pack/plugin/rank-vectors/src/main/java/org/elasticsearch/xpack/rank/vectors/mapper/RankVectorsFieldMapper.java b/x-pack/plugin/rank-vectors/src/main/java/org/elasticsearch/xpack/rank/vectors/mapper/RankVectorsFieldMapper.java index 8f2e74f3bc130..7c536640f1f95 100644 --- a/x-pack/plugin/rank-vectors/src/main/java/org/elasticsearch/xpack/rank/vectors/mapper/RankVectorsFieldMapper.java +++ b/x-pack/plugin/rank-vectors/src/main/java/org/elasticsearch/xpack/rank/vectors/mapper/RankVectorsFieldMapper.java @@ -172,6 +172,11 @@ public String typeName() { return CONTENT_TYPE; } + @Override + public boolean isVectorEmbedding() { + return true; + } + @Override public ValueFetcher valueFetcher(SearchExecutionContext context, String format) { if (format != null) {