From ee9f532afb5cc8960e2b20129e32e617a64e9cb7 Mon Sep 17 00:00:00 2001 From: John Mazanec Date: Wed, 9 Feb 2022 13:34:03 -0800 Subject: [PATCH 1/4] [Bug Fix] Set default space to L2 in KNNIndexShard Sets the default value of space type to L2 in KNNIndexShard. KNNIndexShard is used during warmup to load segments into memory. For indices created in ES 7.1 and 7.4, they will not have this value set because the only space we supported was l2. So, we need to hardcode the defaults here. Signed-off-by: John Mazanec --- src/main/java/org/opensearch/knn/index/KNNIndexShard.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/main/java/org/opensearch/knn/index/KNNIndexShard.java b/src/main/java/org/opensearch/knn/index/KNNIndexShard.java index 668ae2d52c..1476ab4bf3 100644 --- a/src/main/java/org/opensearch/knn/index/KNNIndexShard.java +++ b/src/main/java/org/opensearch/knn/index/KNNIndexShard.java @@ -122,7 +122,8 @@ private Map getEnginePaths(IndexReader indexReader, KNNEngine for (FieldInfo fieldInfo : reader.getFieldInfos()) { if (fieldInfo.attributes().containsKey(KNNVectorFieldMapper.KNN_FIELD)) { - SpaceType spaceType = SpaceType.getSpace(fieldInfo.attributes().get(SPACE_TYPE)); + String spaceTypeName = fieldInfo.attributes().getOrDefault(SPACE_TYPE, SpaceType.L2.getValue()); + SpaceType spaceType = SpaceType.getSpace(spaceTypeName); String engineFileName = buildEngineFileName(reader.getSegmentInfo().info.name, knnEngine.getLatestBuildVersion(), fieldInfo.name, fileExtension); From 7e4113c3148a318cbcfabbd76b1839314c620c91 Mon Sep 17 00:00:00 2001 From: John Mazanec Date: Wed, 9 Feb 2022 14:46:30 -0800 Subject: [PATCH 2/4] [BUG FIX] Fix ef_search for nmslib warmup For nmslib, the ef_search parameter is configurable at load time. So, it needs to be passed as a parameter in both the search load phase as well as warmup. This commit adds it to the warmup phase and abstracts load parameters to a helper function so that it can be consistent for both search and warmup. Signed-off-by: John Mazanec --- .../org/opensearch/knn/index/IndexUtil.java | 19 +++++ .../opensearch/knn/index/KNNIndexShard.java | 4 +- .../org/opensearch/knn/index/KNNWeight.java | 16 +--- .../opensearch/knn/index/IndexUtilTests.java | 73 +++++++++++++++++++ 4 files changed, 97 insertions(+), 15 deletions(-) create mode 100644 src/test/java/org/opensearch/knn/index/IndexUtilTests.java diff --git a/src/main/java/org/opensearch/knn/index/IndexUtil.java b/src/main/java/org/opensearch/knn/index/IndexUtil.java index a80464cdb8..220017fe97 100644 --- a/src/main/java/org/opensearch/knn/index/IndexUtil.java +++ b/src/main/java/org/opensearch/knn/index/IndexUtil.java @@ -11,10 +11,13 @@ package org.opensearch.knn.index; +import com.google.common.collect.ImmutableMap; +import com.google.common.collect.Maps; import org.opensearch.cluster.metadata.IndexMetadata; import org.opensearch.cluster.metadata.MappingMetadata; import org.opensearch.common.ValidationException; import org.opensearch.knn.common.KNNConstants; +import org.opensearch.knn.index.util.KNNEngine; import org.opensearch.knn.indices.ModelDao; import org.opensearch.knn.indices.ModelMetadata; @@ -22,6 +25,8 @@ import java.util.Map; import static org.opensearch.knn.common.KNNConstants.BYTES_PER_KILOBYTES; +import static org.opensearch.knn.common.KNNConstants.HNSW_ALGO_EF_SEARCH; +import static org.opensearch.knn.common.KNNConstants.SPACE_TYPE; public class IndexUtil { @@ -156,4 +161,18 @@ public static ValidationException validateKnnField(IndexMetadata indexMetadata, return null; } + + public static Map getLoadParameters(SpaceType spaceType, KNNEngine knnEngine, String indexName) { + Map loadParameters = Maps.newHashMap(ImmutableMap.of( + SPACE_TYPE, spaceType.getValue() + )); + + // For nmslib, we need to add the dynamic ef_search parameter that needs to be passed in when the + // hnsw graphs are loaded into memory + if (KNNEngine.NMSLIB.equals(knnEngine)) { + loadParameters.put(HNSW_ALGO_EF_SEARCH, KNNSettings.getEfSearchParam(indexName)); + } + + return loadParameters; + } } diff --git a/src/main/java/org/opensearch/knn/index/KNNIndexShard.java b/src/main/java/org/opensearch/knn/index/KNNIndexShard.java index 1476ab4bf3..e878721a46 100644 --- a/src/main/java/org/opensearch/knn/index/KNNIndexShard.java +++ b/src/main/java/org/opensearch/knn/index/KNNIndexShard.java @@ -5,7 +5,6 @@ package org.opensearch.knn.index; -import com.google.common.collect.ImmutableMap; import org.apache.lucene.index.FieldInfo; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; @@ -31,6 +30,7 @@ import java.util.stream.Collectors; import static org.opensearch.knn.common.KNNConstants.SPACE_TYPE; +import static org.opensearch.knn.index.IndexUtil.getLoadParameters; import static org.opensearch.knn.index.codec.util.KNNCodecUtil.buildEngineFileName; /** @@ -86,7 +86,7 @@ public void warmup() throws IOException { new NativeMemoryEntryContext.IndexEntryContext( key, NativeMemoryLoadStrategy.IndexLoadStrategy.getInstance(), - ImmutableMap.of(SPACE_TYPE, value.getValue()), + getLoadParameters(value, KNNEngine.getEngineNameFromPath(key), getIndexName()), getIndexName() ), true); } catch (ExecutionException ex) { diff --git a/src/main/java/org/opensearch/knn/index/KNNWeight.java b/src/main/java/org/opensearch/knn/index/KNNWeight.java index ebc5fa1629..c79d1acc8e 100644 --- a/src/main/java/org/opensearch/knn/index/KNNWeight.java +++ b/src/main/java/org/opensearch/knn/index/KNNWeight.java @@ -46,6 +46,7 @@ import static org.opensearch.knn.common.KNNConstants.KNN_ENGINE; import static org.opensearch.knn.common.KNNConstants.MODEL_ID; import static org.opensearch.knn.common.KNNConstants.SPACE_TYPE; +import static org.opensearch.knn.index.IndexUtil.getLoadParameters; import static org.opensearch.knn.plugin.stats.KNNCounter.GRAPH_QUERY_ERRORS; /** @@ -135,24 +136,13 @@ public Scorer scorer(LeafReaderContext context) throws IOException { KNNCounter.GRAPH_QUERY_REQUESTS.increment(); // We need to first get index allocation - NativeMemoryAllocation indexAllocation = null; - - Map loadParameters = new HashMap() {{ - put(SPACE_TYPE, spaceType.getValue()); - }}; - - // For nmslib, we set efSearch from an index setting. This has the advantage of being able to dynamically - // update this value, which we cannot do at the moment for mapping parameters. - if (knnEngine.equals(KNNEngine.NMSLIB)) { - loadParameters.put(KNNConstants.HNSW_ALGO_EF_SEARCH, KNNSettings.getEfSearchParam(knnQuery.getIndexName())); - } - + NativeMemoryAllocation indexAllocation; try { indexAllocation = nativeMemoryCacheManager.get( new NativeMemoryEntryContext.IndexEntryContext( indexPath.toString(), NativeMemoryLoadStrategy.IndexLoadStrategy.getInstance(), - loadParameters, + getLoadParameters(spaceType, knnEngine, knnQuery.getIndexName()), knnQuery.getIndexName() ), true); } catch (ExecutionException e) { diff --git a/src/test/java/org/opensearch/knn/index/IndexUtilTests.java b/src/test/java/org/opensearch/knn/index/IndexUtilTests.java new file mode 100644 index 0000000000..9ec92edf5c --- /dev/null +++ b/src/test/java/org/opensearch/knn/index/IndexUtilTests.java @@ -0,0 +1,73 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + * + * Modifications Copyright OpenSearch Contributors. See + * GitHub history for details. + */ + +package org.opensearch.knn.index; + +import com.google.common.collect.ImmutableMap; +import com.google.common.collect.Maps; +import org.opensearch.cluster.ClusterState; +import org.opensearch.cluster.metadata.IndexMetadata; +import org.opensearch.cluster.metadata.Metadata; +import org.opensearch.cluster.service.ClusterService; +import org.opensearch.common.settings.Settings; +import org.opensearch.knn.KNNTestCase; +import org.opensearch.knn.index.util.KNNEngine; + +import java.util.Map; + +import static org.mockito.ArgumentMatchers.anyString; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; +import static org.opensearch.knn.common.KNNConstants.HNSW_ALGO_EF_SEARCH; +import static org.opensearch.knn.common.KNNConstants.SPACE_TYPE; +import static org.opensearch.knn.index.IndexUtil.getLoadParameters; +import static org.opensearch.knn.index.KNNSettings.KNN_ALGO_PARAM_EF_SEARCH; + +public class IndexUtilTests extends KNNTestCase { + public void testGetLoadParameters() { + // Test faiss to ensure that space type gets set properly + SpaceType spaceType1 = SpaceType.COSINESIMIL; + KNNEngine knnEngine1 = KNNEngine.FAISS; + String indexName = "my-test-index"; + + Map loadParameters = getLoadParameters(spaceType1, knnEngine1, indexName); + assertEquals(1, loadParameters.size()); + assertEquals(spaceType1.getValue(), loadParameters.get(SPACE_TYPE)); + + // Test nmslib to ensure both space type and ef search are properly set + SpaceType spaceType2 = SpaceType.L1; + KNNEngine knnEngine2 = KNNEngine.NMSLIB; + int efSearchValue = 413; + + // We use the constant for the setting here as opposed to the identifier of efSearch in nmslib jni + Map indexSettings = ImmutableMap.of( + KNN_ALGO_PARAM_EF_SEARCH, efSearchValue + ); + + // Because ef search comes from an index setting, we need to mock the long line of calls to get those + // index settings + Settings settings = Settings.builder().loadFromMap(indexSettings).build(); + IndexMetadata indexMetadata = mock(IndexMetadata.class); + when(indexMetadata.getSettings()).thenReturn(settings); + Metadata metadata = mock(Metadata.class); + when(metadata.index(anyString())).thenReturn(indexMetadata); + ClusterState clusterState = mock(ClusterState.class); + when(clusterState.getMetadata()).thenReturn(metadata); + ClusterService clusterService = mock(ClusterService.class); + when(clusterService.state()).thenReturn(clusterState); + KNNSettings.state().setClusterService(clusterService); + + loadParameters = getLoadParameters(spaceType2, knnEngine2, indexName); + assertEquals(2, loadParameters.size()); + assertEquals(spaceType2.getValue(), loadParameters.get(SPACE_TYPE)); + assertEquals(efSearchValue, loadParameters.get(HNSW_ALGO_EF_SEARCH)); + } +} From 769b263e2fd472c255187561270897cc8e813d3c Mon Sep 17 00:00:00 2001 From: John Mazanec Date: Wed, 9 Feb 2022 15:11:55 -0800 Subject: [PATCH 3/4] Add javadocs and remove unused import Signed-off-by: John Mazanec --- src/main/java/org/opensearch/knn/index/IndexUtil.java | 8 ++++++++ .../java/org/opensearch/knn/index/IndexUtilTests.java | 1 - 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/src/main/java/org/opensearch/knn/index/IndexUtil.java b/src/main/java/org/opensearch/knn/index/IndexUtil.java index 220017fe97..3d2f6c7fe4 100644 --- a/src/main/java/org/opensearch/knn/index/IndexUtil.java +++ b/src/main/java/org/opensearch/knn/index/IndexUtil.java @@ -162,6 +162,14 @@ public static ValidationException validateKnnField(IndexMetadata indexMetadata, return null; } + /** + * Gets the load time parameters for a given engine. + * + * @param spaceType Space for this particular segment + * @param knnEngine Engine used for the native library indices being loaded in + * @param indexName Name of OpenSearch index that the segment files belong to + * @return load parameters that will be passed to the JNI. + */ public static Map getLoadParameters(SpaceType spaceType, KNNEngine knnEngine, String indexName) { Map loadParameters = Maps.newHashMap(ImmutableMap.of( SPACE_TYPE, spaceType.getValue() diff --git a/src/test/java/org/opensearch/knn/index/IndexUtilTests.java b/src/test/java/org/opensearch/knn/index/IndexUtilTests.java index 9ec92edf5c..9e5db48b5d 100644 --- a/src/test/java/org/opensearch/knn/index/IndexUtilTests.java +++ b/src/test/java/org/opensearch/knn/index/IndexUtilTests.java @@ -12,7 +12,6 @@ package org.opensearch.knn.index; import com.google.common.collect.ImmutableMap; -import com.google.common.collect.Maps; import org.opensearch.cluster.ClusterState; import org.opensearch.cluster.metadata.IndexMetadata; import org.opensearch.cluster.metadata.Metadata; From fd8bd5e5eb39f51f280e211e66a68f8f261c1621 Mon Sep 17 00:00:00 2001 From: John Mazanec Date: Wed, 9 Feb 2022 21:49:25 -0800 Subject: [PATCH 4/4] Minor refactoring Signed-off-by: John Mazanec --- src/main/java/org/opensearch/knn/index/IndexUtil.java | 5 +++-- src/main/java/org/opensearch/knn/index/KNNIndexShard.java | 6 ++++-- src/main/java/org/opensearch/knn/index/KNNWeight.java | 4 ++-- src/test/java/org/opensearch/knn/index/IndexUtilTests.java | 6 +++--- 4 files changed, 12 insertions(+), 9 deletions(-) diff --git a/src/main/java/org/opensearch/knn/index/IndexUtil.java b/src/main/java/org/opensearch/knn/index/IndexUtil.java index 3d2f6c7fe4..1782d33846 100644 --- a/src/main/java/org/opensearch/knn/index/IndexUtil.java +++ b/src/main/java/org/opensearch/knn/index/IndexUtil.java @@ -22,6 +22,7 @@ import org.opensearch.knn.indices.ModelMetadata; import java.io.File; +import java.util.Collections; import java.util.Map; import static org.opensearch.knn.common.KNNConstants.BYTES_PER_KILOBYTES; @@ -170,7 +171,7 @@ public static ValidationException validateKnnField(IndexMetadata indexMetadata, * @param indexName Name of OpenSearch index that the segment files belong to * @return load parameters that will be passed to the JNI. */ - public static Map getLoadParameters(SpaceType spaceType, KNNEngine knnEngine, String indexName) { + public static Map getParametersAtLoading(SpaceType spaceType, KNNEngine knnEngine, String indexName) { Map loadParameters = Maps.newHashMap(ImmutableMap.of( SPACE_TYPE, spaceType.getValue() )); @@ -181,6 +182,6 @@ public static Map getLoadParameters(SpaceType spaceType, KNNEngi loadParameters.put(HNSW_ALGO_EF_SEARCH, KNNSettings.getEfSearchParam(indexName)); } - return loadParameters; + return Collections.unmodifiableMap(loadParameters); } } diff --git a/src/main/java/org/opensearch/knn/index/KNNIndexShard.java b/src/main/java/org/opensearch/knn/index/KNNIndexShard.java index e878721a46..5448e88566 100644 --- a/src/main/java/org/opensearch/knn/index/KNNIndexShard.java +++ b/src/main/java/org/opensearch/knn/index/KNNIndexShard.java @@ -30,7 +30,7 @@ import java.util.stream.Collectors; import static org.opensearch.knn.common.KNNConstants.SPACE_TYPE; -import static org.opensearch.knn.index.IndexUtil.getLoadParameters; +import static org.opensearch.knn.index.IndexUtil.getParametersAtLoading; import static org.opensearch.knn.index.codec.util.KNNCodecUtil.buildEngineFileName; /** @@ -86,7 +86,7 @@ public void warmup() throws IOException { new NativeMemoryEntryContext.IndexEntryContext( key, NativeMemoryLoadStrategy.IndexLoadStrategy.getInstance(), - getLoadParameters(value, KNNEngine.getEngineNameFromPath(key), getIndexName()), + getParametersAtLoading(value, KNNEngine.getEngineNameFromPath(key), getIndexName()), getIndexName() ), true); } catch (ExecutionException ex) { @@ -122,6 +122,8 @@ private Map getEnginePaths(IndexReader indexReader, KNNEngine for (FieldInfo fieldInfo : reader.getFieldInfos()) { if (fieldInfo.attributes().containsKey(KNNVectorFieldMapper.KNN_FIELD)) { + // Space Type will not be present on ES versions 7.1 and 7.4 because the only available space type + // was L2. So, if Space Type is not present, just fall back to L2 String spaceTypeName = fieldInfo.attributes().getOrDefault(SPACE_TYPE, SpaceType.L2.getValue()); SpaceType spaceType = SpaceType.getSpace(spaceTypeName); String engineFileName = buildEngineFileName(reader.getSegmentInfo().info.name, diff --git a/src/main/java/org/opensearch/knn/index/KNNWeight.java b/src/main/java/org/opensearch/knn/index/KNNWeight.java index c79d1acc8e..6c8c41cb99 100644 --- a/src/main/java/org/opensearch/knn/index/KNNWeight.java +++ b/src/main/java/org/opensearch/knn/index/KNNWeight.java @@ -46,7 +46,7 @@ import static org.opensearch.knn.common.KNNConstants.KNN_ENGINE; import static org.opensearch.knn.common.KNNConstants.MODEL_ID; import static org.opensearch.knn.common.KNNConstants.SPACE_TYPE; -import static org.opensearch.knn.index.IndexUtil.getLoadParameters; +import static org.opensearch.knn.index.IndexUtil.getParametersAtLoading; import static org.opensearch.knn.plugin.stats.KNNCounter.GRAPH_QUERY_ERRORS; /** @@ -142,7 +142,7 @@ public Scorer scorer(LeafReaderContext context) throws IOException { new NativeMemoryEntryContext.IndexEntryContext( indexPath.toString(), NativeMemoryLoadStrategy.IndexLoadStrategy.getInstance(), - getLoadParameters(spaceType, knnEngine, knnQuery.getIndexName()), + getParametersAtLoading(spaceType, knnEngine, knnQuery.getIndexName()), knnQuery.getIndexName() ), true); } catch (ExecutionException e) { diff --git a/src/test/java/org/opensearch/knn/index/IndexUtilTests.java b/src/test/java/org/opensearch/knn/index/IndexUtilTests.java index 9e5db48b5d..e90e300f7d 100644 --- a/src/test/java/org/opensearch/knn/index/IndexUtilTests.java +++ b/src/test/java/org/opensearch/knn/index/IndexUtilTests.java @@ -27,7 +27,7 @@ import static org.mockito.Mockito.when; import static org.opensearch.knn.common.KNNConstants.HNSW_ALGO_EF_SEARCH; import static org.opensearch.knn.common.KNNConstants.SPACE_TYPE; -import static org.opensearch.knn.index.IndexUtil.getLoadParameters; +import static org.opensearch.knn.index.IndexUtil.getParametersAtLoading; import static org.opensearch.knn.index.KNNSettings.KNN_ALGO_PARAM_EF_SEARCH; public class IndexUtilTests extends KNNTestCase { @@ -37,7 +37,7 @@ public void testGetLoadParameters() { KNNEngine knnEngine1 = KNNEngine.FAISS; String indexName = "my-test-index"; - Map loadParameters = getLoadParameters(spaceType1, knnEngine1, indexName); + Map loadParameters = getParametersAtLoading(spaceType1, knnEngine1, indexName); assertEquals(1, loadParameters.size()); assertEquals(spaceType1.getValue(), loadParameters.get(SPACE_TYPE)); @@ -64,7 +64,7 @@ public void testGetLoadParameters() { when(clusterService.state()).thenReturn(clusterState); KNNSettings.state().setClusterService(clusterService); - loadParameters = getLoadParameters(spaceType2, knnEngine2, indexName); + loadParameters = getParametersAtLoading(spaceType2, knnEngine2, indexName); assertEquals(2, loadParameters.size()); assertEquals(spaceType2.getValue(), loadParameters.get(SPACE_TYPE)); assertEquals(efSearchValue, loadParameters.get(HNSW_ALGO_EF_SEARCH));