Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
48 commits
Select commit Hold shift + click to select a range
d0ba2d0
Initial POC
kderusso Aug 15, 2023
9eb75bf
Comments
kderusso Aug 15, 2023
b057810
Cleanup
kderusso Aug 16, 2023
6ae3a7a
Cleanup
kderusso Aug 16, 2023
280a234
Typo
kderusso Aug 16, 2023
5c1c1af
Spotless
kderusso Aug 16, 2023
d2855a1
Initial yaml test
kderusso Aug 16, 2023
4034f13
Add more yaml test cases
kderusso Aug 17, 2023
736378e
Fix typo in yaml
kderusso Aug 17, 2023
1588021
Fix another typo in test
kderusso Aug 17, 2023
e0021f5
Trying to make sure index and dims are returned in mappings too
kderusso Aug 17, 2023
4c8fdaa
Prepare for rebase
kderusso Aug 17, 2023
d5b0894
Fix compile error
kderusso Aug 18, 2023
6d4b143
Go back to non testing value
kderusso Aug 18, 2023
a237dda
Update DynamicMappingTests
kderusso Aug 18, 2023
20ce7ea
Make dims param updateable
kderusso Aug 21, 2023
4a5aa54
Cleanup
kderusso Aug 21, 2023
37cc28f
Spotless
kderusso Aug 21, 2023
2de801a
Fix some tests
kderusso Aug 21, 2023
4ddc64a
Fix yaml test
kderusso Aug 22, 2023
05cc248
Fix dims exception text
kderusso Aug 22, 2023
9e508e6
Update docs/changelog/98512.yaml
kderusso Aug 22, 2023
09a3a96
Update changelog entry
kderusso Aug 22, 2023
f5353cc
Version check before post processing dynamic mappers
kderusso Aug 22, 2023
42e79fd
Spotless
kderusso Aug 22, 2023
b1c8c51
Add additional yaml test cases
kderusso Aug 22, 2023
0f89350
Add one more case
kderusso Aug 22, 2023
c1370c6
Exclude fields mapped from templates from being dynamically mapped as…
kderusso Aug 23, 2023
f17082c
Make param final
kderusso Aug 23, 2023
0007fc3
PR feedback
kderusso Aug 24, 2023
f4b22bc
Move postProcessDynamicMappers from context to live in DocumentParser…
kderusso Aug 24, 2023
5d3dfd0
Spotless
kderusso Aug 24, 2023
115b6d5
Update dense_vector mapping docs
kderusso Aug 28, 2023
ff95ff9
Dynamically map dims for all use cases
kderusso Aug 28, 2023
756feb0
Whitespace
kderusso Aug 28, 2023
ebd9b44
Add subobject test
kderusso Aug 30, 2023
88f2bfb
Refactor dynamicMappers to be a Map<String,List<Mapper>>
kderusso Aug 30, 2023
f8cf0d3
Fix timeout
kderusso Aug 31, 2023
b814adb
Cleanup
kderusso Aug 31, 2023
2b42034
Spotless
kderusso Aug 31, 2023
e0fa759
Cleanup
kderusso Aug 31, 2023
1cccc2d
Committing test authored by @benwtrent
kderusso Aug 31, 2023
676d343
Committing working subobject before cleanup
kderusso Sep 6, 2023
ad0be33
Cleanup
kderusso Sep 6, 2023
71918af
Spotless
kderusso Sep 6, 2023
8934f8e
Fix typos in yaml test
kderusso Sep 6, 2023
84676ad
PR feedback
kderusso Sep 6, 2023
49f4577
PR feedback
kderusso Sep 6, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions docs/changelog/98512.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
pr: 98512
summary: Automatically map float arrays of lengths 128 - 2048 as dense_vector
area: Application
type: feature
issues:
- 97532
8 changes: 6 additions & 2 deletions docs/reference/mapping/types/dense-vector.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,9 @@ In many cases, a brute-force kNN search is not efficient enough. For this
reason, the `dense_vector` type supports indexing vectors into a specialized
data structure to support fast kNN retrieval through the <<search-api-knn, `knn` option>> in the search API

Unmapped array fields of float elements with size between 128 and 2048 are dynamically mapped as `dense_vector` with a default similariy of `cosine`.
You can override the default similarity by explicitly mapping the field as `dense_vector` with the desired similarity.

Indexing is enabled by default for dense vector fields.
When indexing is enabled, you can define the vector similarity to use in kNN search:

Expand Down Expand Up @@ -128,8 +131,9 @@ trade off of lower precision. Vectors using `byte` require dimensions with
integer values between -128 to 127, inclusive for both indexing and searching.

`dims`::
(Required, integer)
Number of vector dimensions. Can't exceed `2048`.
(Optional, integer)
Number of vector dimensions. Can't exceed `2048`. If `dims` is not specified,
it will be set to the length of the first vector added to the field.

`index`::
(Optional, Boolean)
Expand Down

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@

import static org.elasticsearch.index.mapper.MapperService.INDEX_MAPPING_NESTED_FIELDS_LIMIT_SETTING;
import static org.elasticsearch.index.mapper.MapperService.INDEX_MAPPING_TOTAL_FIELDS_LIMIT_SETTING;
import static org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper.MIN_DIMS_FOR_DYNAMIC_FLOAT_MAPPING;
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked;
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertSearchHits;
import static org.hamcrest.Matchers.containsString;
Expand Down Expand Up @@ -662,4 +663,32 @@ public void testSubobjectsFalse() throws Exception {
assertNotNull(properties.get("time.max"));
});
}

public void testKnnSubObject() throws Exception {
assertAcked(indicesAdmin().prepareCreate("test").setMapping("""
{
"properties": {
"obj": {
"type": "object",
"dynamic": "true"
},
"mapped_obj": {
"type": "object",
"dynamic": "true",
"properties": {
"vector": {
"type": "dense_vector"
}
}
}
}
}""").get());

client().index(new IndexRequest("test").source("mapped_obj.vector", Randomness.get().doubles(3, 0.0, 5.0).toArray())).get();

client().index(
new IndexRequest("test").source("obj.vector", Randomness.get().doubles(MIN_DIMS_FOR_DYNAMIC_FLOAT_MAPPING, 0.0, 5.0).toArray())
).get();

}
}
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
import org.elasticsearch.index.IndexVersion;
import org.elasticsearch.index.fielddata.FieldDataContext;
import org.elasticsearch.index.fielddata.IndexFieldDataCache;
import org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper;
import org.elasticsearch.index.query.SearchExecutionContext;
import org.elasticsearch.indices.breaker.NoneCircuitBreakerService;
import org.elasticsearch.plugins.internal.DocumentParsingObserver;
Expand All @@ -40,11 +41,16 @@
import java.util.function.Consumer;
import java.util.function.Supplier;

import static org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper.MAX_DIMS_COUNT;
import static org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper.MIN_DIMS_FOR_DYNAMIC_FLOAT_MAPPING;

/**
* A parser for documents
*/
public final class DocumentParser {

public static final IndexVersion DYNAMICALLY_MAP_DENSE_VECTORS_INDEX_VERSION = IndexVersion.V_8_11_0;

private final XContentParserConfiguration parserConfiguration;
private final Supplier<DocumentParsingObserver> documentParsingObserverSupplier;
private final MappingParserContext mappingParserContext;
Expand Down Expand Up @@ -244,9 +250,8 @@ static Mapping createDynamicUpdate(DocumentParserContext context) {
return null;
}
RootObjectMapper.Builder rootBuilder = context.updateRoot();
for (Mapper mapper : context.getDynamicMappers()) {
rootBuilder.addDynamic(mapper.name(), null, mapper, context);
}
context.getDynamicMappers().forEach(mapper -> rootBuilder.addDynamic(mapper.name(), null, mapper, context));

for (RuntimeField runtimeField : context.getDynamicRuntimeFields()) {
rootBuilder.addRuntimeField(runtimeField);
}
Expand Down Expand Up @@ -588,6 +593,33 @@ private static void parseNonDynamicArray(DocumentParserContext context, final St
parseValue(context, lastFieldName);
}
}
postProcessDynamicArrayMapping(context, lastFieldName);
}

/**
* Arrays that have been classified as floats and meet specific criteria are re-mapped to dense_vector.
*/
private static void postProcessDynamicArrayMapping(DocumentParserContext context, String fieldName) {
if (context.indexSettings().getIndexVersionCreated().onOrAfter(DYNAMICALLY_MAP_DENSE_VECTORS_INDEX_VERSION)) {
final MapperBuilderContext builderContext = context.createDynamicMapperBuilderContext();
final String fullFieldName = builderContext.buildFullName(fieldName);
final List<Mapper> mappers = context.getDynamicMappers(fullFieldName);
if (mappers == null
|| context.isFieldAppliedFromTemplate(fullFieldName)
|| context.isCopyToField(fullFieldName)
|| mappers.size() < MIN_DIMS_FOR_DYNAMIC_FLOAT_MAPPING
|| mappers.size() > MAX_DIMS_COUNT
|| mappers.stream().allMatch(m -> m instanceof NumberFieldMapper && "float".equals(m.typeName())) == false) {
return;
}

DenseVectorFieldMapper.Builder builder = new DenseVectorFieldMapper.Builder(
fieldName,
context.indexSettings().getIndexVersionCreated()
);
DenseVectorFieldMapper denseVectorFieldMapper = builder.build(builderContext);
context.updateDynamicMappers(fullFieldName, List.of(denseVectorFieldMapper));
}
}

private static void throwEOFOnParseArray(String arrayFieldName, DocumentParserContext context) {
Expand Down Expand Up @@ -677,6 +709,7 @@ private static void parseCopyFields(DocumentParserContext context, List<String>
assert targetDoc != null;
final DocumentParserContext copyToContext = context.createCopyToContext(field, targetDoc);
innerParseObject(copyToContext);
context.markFieldAsCopyTo(field);
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ protected void addDoc(LuceneDocument doc) {
private final MappingParserContext mappingParserContext;
private final SourceToParse sourceToParse;
private final Set<String> ignoredFields;
private final List<Mapper> dynamicMappers;
private final Map<String, List<Mapper>> dynamicMappers;
private final Set<String> newFieldsSeen;
private final Map<String, ObjectMapper> dynamicObjectMappers;
private final List<RuntimeField> dynamicRuntimeFields;
Expand All @@ -94,13 +94,15 @@ protected void addDoc(LuceneDocument doc) {
private String id;
private Field version;
private SeqNoFieldMapper.SequenceIDFields seqID;
private final Set<String> fieldsAppliedFromTemplates;
private final Set<String> copyToFields;

private DocumentParserContext(
MappingLookup mappingLookup,
MappingParserContext mappingParserContext,
SourceToParse sourceToParse,
Set<String> ignoreFields,
List<Mapper> dynamicMappers,
Map<String, List<Mapper>> dynamicMappers,
Set<String> newFieldsSeen,
Map<String, ObjectMapper> dynamicObjectMappers,
List<RuntimeField> dynamicRuntimeFields,
Expand All @@ -109,7 +111,9 @@ private DocumentParserContext(
SeqNoFieldMapper.SequenceIDFields seqID,
DocumentDimensions dimensions,
ObjectMapper parent,
ObjectMapper.Dynamic dynamic
ObjectMapper.Dynamic dynamic,
Set<String> fieldsAppliedFromTemplates,
Set<String> copyToFields
) {
this.mappingLookup = mappingLookup;
this.mappingParserContext = mappingParserContext;
Expand All @@ -125,6 +129,8 @@ private DocumentParserContext(
this.dimensions = dimensions;
this.parent = parent;
this.dynamic = dynamic;
this.fieldsAppliedFromTemplates = fieldsAppliedFromTemplates;
this.copyToFields = copyToFields;
}

private DocumentParserContext(ObjectMapper parent, ObjectMapper.Dynamic dynamic, DocumentParserContext in) {
Expand All @@ -142,7 +148,9 @@ private DocumentParserContext(ObjectMapper parent, ObjectMapper.Dynamic dynamic,
in.seqID,
in.dimensions,
parent,
dynamic
dynamic,
in.fieldsAppliedFromTemplates,
in.copyToFields
);
}

Expand All @@ -158,7 +166,7 @@ protected DocumentParserContext(
mappingParserContext,
source,
new HashSet<>(),
new ArrayList<>(),
new HashMap<>(),
new HashSet<>(),
new HashMap<>(),
new ArrayList<>(),
Expand All @@ -167,7 +175,9 @@ protected DocumentParserContext(
null,
DocumentDimensions.fromIndexSettings(mappingParserContext.getIndexSettings()),
parent,
dynamic
dynamic,
new HashSet<>(),
new HashSet<>()
);
}

Expand Down Expand Up @@ -275,6 +285,22 @@ public ObjectMapper.Dynamic dynamic() {
return dynamic;
}

public void markFieldAsAppliedFromTemplate(String fieldName) {
fieldsAppliedFromTemplates.add(fieldName);
}

public boolean isFieldAppliedFromTemplate(String name) {
return fieldsAppliedFromTemplates.contains(name);
}

public void markFieldAsCopyTo(String fieldName) {
copyToFields.add(fieldName);
}

public boolean isCopyToField(String name) {
return copyToFields.contains(name);
}

/**
* Add a new mapper dynamically created while parsing.
*/
Expand All @@ -283,6 +309,7 @@ public final void addDynamicMapper(Mapper mapper) {
if (mapper instanceof ObjectMapper) {
MappingLookup.checkObjectDepthLimit(indexSettings().getMappingDepthLimit(), mapper.name());
}

// eagerly check field name limit here to avoid OOM errors
// only check fields that are not already mapped or tracked in order to avoid hitting field limit too early via double-counting
// note that existing fields can also receive dynamic mapping updates (e.g. constant_keyword to fix the value)
Expand All @@ -302,23 +329,39 @@ public final void addDynamicMapper(Mapper mapper) {
addDynamicMapper(submapper);
}
}

// TODO we may want to stop adding object mappers to the dynamic mappers list: most times they will be mapped when parsing their
// sub-fields (see ObjectMapper.Builder#addDynamic), which causes extra work as the two variants of the same object field
// will be merged together when creating the final dynamic update. The only cases where object fields need extra treatment are
// dynamically mapped objects when the incoming document defines no sub-fields in them:
// 1) by default, they would be empty containers in the mappings, is it then important to map them?
// 2) they can be the result of applying a dynamic template which may define sub-fields or set dynamic, enabled or subobjects.
dynamicMappers.add(mapper);
dynamicMappers.computeIfAbsent(mapper.name(), k -> new ArrayList<>()).add(mapper);
}

/**
* Get dynamic mappers created as a result of parsing an incoming document. Responsible for exposing all the newly created
* fields that need to be merged into the existing mappings. Used to create the required mapping update at the end of document parsing.
* Consists of a flat set of {@link Mapper}s that will need to be added to their respective parent {@link ObjectMapper}s in order
* Consists of a all {@link Mapper}s that will need to be added to their respective parent {@link ObjectMapper}s in order
* to become part of the resulting dynamic mapping update.
*/
public final List<Mapper> getDynamicMappers() {
return dynamicMappers;
return dynamicMappers.values().stream().flatMap(List::stream).toList();
}

/**
* Returns the dynamic Consists of a flat set of {@link Mapper}s associated with a field name that will need to be added to their
* respective parent {@link ObjectMapper}s in order to become part of the resulting dynamic mapping update.
* @param fieldName Full field name with dot-notation.
* @return List of Mappers or null
*/
public final List<Mapper> getDynamicMappers(String fieldName) {
return dynamicMappers.get(fieldName);
}

public void updateDynamicMappers(String name, List<Mapper> mappers) {
dynamicMappers.remove(name);
mappers.forEach(this::addDynamicMapper);
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -208,7 +208,9 @@ private static void createDynamicField(
DateFormatter dateFormatter,
CheckedRunnable<IOException> dynamicFieldStrategy
) throws IOException {
if (applyMatchingTemplate(context, name, matchType, dateFormatter) == false) {
if (applyMatchingTemplate(context, name, matchType, dateFormatter)) {
context.markFieldAsAppliedFromTemplate(name);
} else {
dynamicFieldStrategy.run();
}
}
Expand Down
Loading