From c24bc8e69cbd0857bb7bbdd2805e49428619b67d Mon Sep 17 00:00:00 2001 From: Jim Ferenczi Date: Tue, 7 Jan 2025 19:27:06 +0000 Subject: [PATCH] Semantic text with legacy format cannot store term vectors (#119690) This change fixes the store option of the underlying sparse vector field for semantic text. For legacy indices, the store option should remain false since changing this option on an existing index is not allowed and that term vectors are already stored in _source. --- .../inference/mapper/SemanticTextFieldMapper.java | 10 +++++++--- .../inference/mapper/SemanticTextFieldMapperTests.java | 6 +++++- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapper.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapper.java index 690a136c566e0..9126fd78cada3 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapper.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapper.java @@ -858,7 +858,7 @@ private static NestedObjectMapper.Builder createChunksField( ); chunksField.dynamic(ObjectMapper.Dynamic.FALSE); if (modelSettings != null) { - chunksField.add(createEmbeddingsField(indexSettings.getIndexVersionCreated(), modelSettings)); + chunksField.add(createEmbeddingsField(indexSettings.getIndexVersionCreated(), modelSettings, useLegacyFormat)); } if (useLegacyFormat) { var chunkTextField = new KeywordFieldMapper.Builder(TEXT_FIELD, indexVersionCreated).indexed(false).docValues(false); @@ -869,9 +869,13 @@ private static NestedObjectMapper.Builder createChunksField( return chunksField; } - private static Mapper.Builder createEmbeddingsField(IndexVersion indexVersionCreated, SemanticTextField.ModelSettings modelSettings) { + private static Mapper.Builder createEmbeddingsField( + IndexVersion indexVersionCreated, + SemanticTextField.ModelSettings modelSettings, + boolean useLegacyFormat + ) { return switch (modelSettings.taskType()) { - case SPARSE_EMBEDDING -> new SparseVectorFieldMapper.Builder(CHUNKED_EMBEDDINGS_FIELD).setStored(true); + case SPARSE_EMBEDDING -> new SparseVectorFieldMapper.Builder(CHUNKED_EMBEDDINGS_FIELD).setStored(useLegacyFormat == false); case TEXT_EMBEDDING -> { DenseVectorFieldMapper.Builder denseVectorMapperBuilder = new DenseVectorFieldMapper.Builder( CHUNKED_EMBEDDINGS_FIELD, diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapperTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapperTests.java index ddc697881eccb..857038439b0b5 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapperTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapperTests.java @@ -531,7 +531,11 @@ private static void assertSemanticTextField(MapperService mapperService, String assertTrue(embeddingsFieldMapper.fieldType() == mapperService.mappingLookup().getFieldType(getEmbeddingsFieldName(fieldName))); assertThat(embeddingsMapper.fullPath(), equalTo(getEmbeddingsFieldName(fieldName))); switch (semanticFieldMapper.fieldType().getModelSettings().taskType()) { - case SPARSE_EMBEDDING -> assertThat(embeddingsMapper, instanceOf(SparseVectorFieldMapper.class)); + case SPARSE_EMBEDDING -> { + assertThat(embeddingsMapper, instanceOf(SparseVectorFieldMapper.class)); + SparseVectorFieldMapper sparseMapper = (SparseVectorFieldMapper) embeddingsMapper; + assertEquals(sparseMapper.fieldType().isStored(), semanticTextFieldType.useLegacyFormat() == false); + } case TEXT_EMBEDDING -> assertThat(embeddingsMapper, instanceOf(DenseVectorFieldMapper.class)); default -> throw new AssertionError("Invalid task type"); }