diff --git a/docs/changelog/101967.yaml b/docs/changelog/101967.yaml new file mode 100644 index 0000000000000..84f188db1e30b --- /dev/null +++ b/docs/changelog/101967.yaml @@ -0,0 +1,5 @@ +pr: 101967 +summary: "Fix incorrect dynamic mapping for non-numeric-value arrays #101965" +area: Mapping +type: bug +issues: [] diff --git a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/60_dense_vector_dynamic_mapping.yml b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/60_dense_vector_dynamic_mapping.yml index 151698482368a..8453bfbe297e4 100644 --- a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/60_dense_vector_dynamic_mapping.yml +++ b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/60_dense_vector_dynamic_mapping.yml @@ -2,7 +2,38 @@ setup: - skip: version: ' - 8.10.99' reason: 'Dynamic mapping of floats to dense_vector was added in 8.11' +--- +"Fields indexed as strings won't be transformed into dense_vector": + - skip: + # TODO adjust after backport + version: ' - 8.11.99' + reason: 'Dynamic mapping of things other than numbers is fixed in 8.12' + - do: + index: + index: strings-are-not-floats + refresh: true + body: + obviously_string: ["foo", "foo", "foo", "foo", "foo", "foo", "foo", "foo", "foo", "foo", + "foo", "foo", "foo", "foo", "foo", "foo", "foo", "foo", "foo", "foo", + "foo", "foo", "foo", "foo", "foo", "foo", "foo", "foo", "foo", "foo", + "foo", "foo", "foo", "foo", "foo", "foo", "foo", "foo", "foo", "foo", + "foo", "foo", "foo", "foo", "foo", "foo", "foo", "foo", "foo", "foo", + "foo", "foo", "foo", "foo", "foo", "foo", "foo", "foo", "foo", "foo", + "foo", "foo", "foo", "foo", "foo", "foo", "foo", "foo", "foo", "foo", + "foo", "foo", "foo", "foo", "foo", "foo", "foo", "foo", "foo", "foo", + "foo", "foo", "foo", "foo", "foo", "foo", "foo", "foo", "foo", "foo", + "foo", "foo", "foo", "foo", "foo", "foo", "foo", "foo", "foo", "foo", + "foo", "foo", "foo", "foo", "foo", "foo", "foo", "foo", "foo", "foo", + "foo", "foo", "foo", "foo", "foo", "foo", "foo", "foo", "foo", "foo", + "foo", "foo", "foo", "foo", "foo", "foo", "foo", "foo"] + - do: + cluster.health: + wait_for_events: languid + - do: + indices.get_mapping: + index: strings-are-not-floats + - match: { strings-are-not-floats.mappings.properties.obviously_string.type: text } --- "Fields with float arrays below the threshold still map as float": diff --git a/server/src/main/java/org/elasticsearch/index/mapper/DocumentParser.java b/server/src/main/java/org/elasticsearch/index/mapper/DocumentParser.java index 996c6243064e9..17af6259ca27c 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/DocumentParser.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/DocumentParser.java @@ -613,8 +613,12 @@ private static void postProcessDynamicArrayMapping(DocumentParserContext context || context.isCopyToField(fullFieldName) || mappers.size() < MIN_DIMS_FOR_DYNAMIC_FLOAT_MAPPING || mappers.size() > MAX_DIMS_COUNT + // Anything that is NOT a number or anything that IS a number but not mapped to `float` should NOT be mapped to dense_vector || mappers.stream() - .allMatch(m -> m instanceof NumberFieldMapper.Builder nb && nb.type != NumberFieldMapper.NumberType.FLOAT)) { + .anyMatch( + m -> m instanceof NumberFieldMapper.Builder == false + || ((NumberFieldMapper.Builder) m).type != NumberFieldMapper.NumberType.FLOAT + )) { return; } diff --git a/server/src/test/java/org/elasticsearch/index/mapper/DynamicMappingTests.java b/server/src/test/java/org/elasticsearch/index/mapper/DynamicMappingTests.java index cd04f81f0f355..4a2d74016615e 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/DynamicMappingTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/DynamicMappingTests.java @@ -21,6 +21,7 @@ import java.io.IOException; import java.time.Instant; +import java.util.stream.Stream; import static org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper.MAX_DIMS_COUNT; import static org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper.MIN_DIMS_FOR_DYNAMIC_FLOAT_MAPPING; @@ -991,4 +992,15 @@ public void testDefaultDenseVectorMappingsObject() throws IOException { ObjectMapper parent = (ObjectMapper) update.getRoot().getMapper("parent_object"); assertThat(((FieldMapper) parent.getMapper("mapsToDenseVector")).fieldType().typeName(), equalTo("dense_vector")); } + + public void testStringArraysAreText() throws IOException { + DocumentMapper mapper = createDocumentMapper(topMapping(b -> b.field("numeric_detection", true))); + BytesReference source = BytesReference.bytes( + XContentFactory.jsonBuilder().startObject().field("mapsToString", Stream.generate(() -> "foo").limit(129).toArray()).endObject() + ); + ParsedDocument parsedDocument = mapper.parse(new SourceToParse("id", source, XContentType.JSON)); + Mapping update = parsedDocument.dynamicMappingsUpdate(); + assertNotNull(update); + assertThat(((FieldMapper) update.getRoot().getMapper("mapsToString")).fieldType().typeName(), equalTo("text")); + } }