diff --git a/.evergreen-functions.yml b/.evergreen-functions.yml index daac22d9a5..3d9caeaeb7 100644 --- a/.evergreen-functions.yml +++ b/.evergreen-functions.yml @@ -50,6 +50,8 @@ variables: - VERSION_UPGRADE_HOOK_VERSION - BUILD_SCENARIO - MDB_BASH_DEBUG + - AI_MONGODB_EMBEDDING_INDEXING_KEY + - AI_MONGODB_EMBEDDING_QUERY_KEY functions: @@ -584,6 +586,8 @@ functions: - github_pr_number - project_identifier - revision_order_id + - AI_MONGODB_EMBEDDING_INDEXING_KEY + - AI_MONGODB_EMBEDDING_QUERY_KEY add_to_path: - ${workdir}/bin binary: scripts/evergreen/e2e/e2e.sh diff --git a/.evergreen-tasks.yml b/.evergreen-tasks.yml index cae1ce3dab..fe9e0e2967 100644 --- a/.evergreen-tasks.yml +++ b/.evergreen-tasks.yml @@ -1323,6 +1323,11 @@ tasks: commands: - func: "e2e_test" + - name: e2e_search_community_auto_embedding + tags: ["patch-run"] + commands: + - func: "e2e_test" + - name: e2e_search_community_tls tags: ["patch-run"] commands: diff --git a/.evergreen.yml b/.evergreen.yml index b0a44f065f..505e251e7c 100644 --- a/.evergreen.yml +++ b/.evergreen.yml @@ -663,6 +663,7 @@ task_groups: tasks: - e2e_community_replicaset_scale - e2e_search_community_basic + - e2e_search_community_auto_embedding - e2e_search_community_tls - e2e_search_external_basic - e2e_search_external_tls diff --git a/docker/mongodb-kubernetes-tests/tests/common/search/movies_search_helper.py b/docker/mongodb-kubernetes-tests/tests/common/search/movies_search_helper.py index be21511584..b639b3f6e7 100644 --- a/docker/mongodb-kubernetes-tests/tests/common/search/movies_search_helper.py +++ b/docker/mongodb-kubernetes-tests/tests/common/search/movies_search_helper.py @@ -27,6 +27,9 @@ def restore_sample_database(self): def create_search_index(self): self.search_tester.create_search_index(self.db_name, self.col_name) + def create_auto_embedding_vector_search_index(self): + self.search_tester.create_auto_embedding_vector_search_index(self.db_name, self.col_name) + def wait_for_search_indexes(self): self.search_tester.wait_for_search_indexes_ready(self.db_name, self.col_name) @@ -75,3 +78,51 @@ def execute_example_search_query(self): {"$project": {"title": 1, "plot": 1, "genres": 1, "_id": 0}}, ] ) + + def execute_auto_embedding_vector_search_query(self, query: str = "spy thriller", limit: int = 10): + return self.search_tester.client[self.db_name][self.col_name].aggregate( + [ + { + "$vectorSearch": { + "index": "vector_auto_embed_index", + "path": "plot", + "query": query, + "numCandidates": 150, + "limit": limit, + } + }, + { + "$project": { + "_id": 0, + "plot": 1, + "title": 1, + "score": {"$meta": "vectorSearchScore"}, + } + }, + ] + ) + + def assert_auto_emb_vector_search_query(self, retry_timeout: int = 1): + def wait_for_auto_emb_search_results(): + exp_document_count = 10 + count = 0 + status_msg = "" + try: + result = self.execute_auto_embedding_vector_search_query(limit=exp_document_count) + status_msg = f"{self.db_name}/{self.col_name}: auto-embedding vector search query results:\n" + for r in result: + status_msg += f"{r}\n" + count += 1 + status_msg += f"Count: {count}" + logger.debug(status_msg) + except pymongo.errors.PyMongoError as e: + logger.debug(f"error: {e}") + + return count == exp_document_count, status_msg + + kubetester.run_periodically( + fn=wait_for_auto_emb_search_results, + timeout=retry_timeout, + sleep_time=1, + msg="Auto-embedding vector search query to return correct data", + ) diff --git a/docker/mongodb-kubernetes-tests/tests/common/search/search_tester.py b/docker/mongodb-kubernetes-tests/tests/common/search/search_tester.py index 6dccfb5aeb..b32fa86264 100644 --- a/docker/mongodb-kubernetes-tests/tests/common/search/search_tester.py +++ b/docker/mongodb-kubernetes-tests/tests/common/search/search_tester.py @@ -45,6 +45,33 @@ def create_search_index(self, database_name: str, collection_name: str): result = collection.create_search_index(model=search_index_model) logger.debug(f"create_search_index result: {result}") + def create_auto_embedding_vector_search_index( + self, + database_name: str, + collection_name: str, + index_name: str = "vector_auto_embed_index", + field_path: str = "plot", + model: str = "voyage-4", + ): + database = self.client[database_name] + collection = database[collection_name] + search_index_model = SearchIndexModel( + definition={ + "fields": [ + { + "type": "autoEmbed", + "modality": "text", + "path": field_path, + "model": model, + } + ] + }, + type="vectorSearch", + name=index_name, + ) + result = collection.create_search_index(model=search_index_model) + logger.debug(f"create_auto_embedding_vector_search_index result: {result}") + def wait_for_search_indexes_ready(self, database_name: str, collection_name: str, timeout=60): kubetester.run_periodically( fn=lambda: self.search_indexes_ready(database_name, collection_name), diff --git a/docker/mongodb-kubernetes-tests/tests/search/search_community_auto_embedding.py b/docker/mongodb-kubernetes-tests/tests/search/search_community_auto_embedding.py new file mode 100644 index 0000000000..3cad36cbac --- /dev/null +++ b/docker/mongodb-kubernetes-tests/tests/search/search_community_auto_embedding.py @@ -0,0 +1,133 @@ +import os + +from kubetester import create_or_update_secret, try_load +from kubetester.kubetester import fixture as yaml_fixture +from kubetester.mongodb_community import MongoDBCommunity +from kubetester.mongodb_search import MongoDBSearch +from kubetester.phase import Phase +from pytest import fixture, mark +from tests import test_logger +from tests.common.search import movies_search_helper +from tests.common.search.movies_search_helper import SampleMoviesSearchHelper +from tests.common.search.search_tester import SearchTester +from tests.conftest import get_default_operator + +logger = test_logger.get_test_logger(__name__) + +ADMIN_USER_NAME = "mdb-admin-user" +ADMIN_USER_PASSWORD = "mdb-admin-user-pass" + +MONGOT_USER_NAME = "search-sync-source" +MONGOT_USER_PASSWORD = "search-sync-source-user-password" + +USER_NAME = "mdb-user" +USER_PASSWORD = "mdb-user-pass" + +MDBC_RESOURCE_NAME = "mdbc-rs" +EMBEDDING_INDEXING_KEY_ENV_VAR = "AI_MONGODB_EMBEDDING_INDEXING_KEY" +EMBEDDING_QUERY_KEY_ENV_VAR = "AI_MONGODB_EMBEDDING_QUERY_KEY" +VOYAGE_API_KEY_SECRET_NAME = "voyage-api-keys" +PROVIDER_ENDPOINT = "https://ai.mongodb.com/v1/embeddings" + + +@fixture(scope="function") +def mdbc(namespace: str) -> MongoDBCommunity: + resource = MongoDBCommunity.from_yaml( + yaml_fixture("community-replicaset-sample-mflix.yaml"), + name=MDBC_RESOURCE_NAME, + namespace=namespace, + ) + + if try_load(resource): + return resource + + return resource + + +@fixture(scope="function") +def mdbs(namespace: str) -> MongoDBSearch: + resource = MongoDBSearch.from_yaml( + yaml_fixture("search-minimal.yaml"), + namespace=namespace, + ) + + if try_load(resource): + return resource + + return resource + + +@mark.e2e_search_community_auto_embedding +def test_install_operator(namespace: str, operator_installation_config: dict[str, str]): + operator = get_default_operator(namespace, operator_installation_config=operator_installation_config) + operator.assert_is_running() + + +@mark.e2e_search_community_auto_embedding +def test_install_secrets(namespace: str, mdbs: MongoDBSearch): + create_or_update_secret(namespace=namespace, name=f"{USER_NAME}-password", data={"password": USER_PASSWORD}) + create_or_update_secret( + namespace=namespace, name=f"{ADMIN_USER_NAME}-password", data={"password": ADMIN_USER_PASSWORD} + ) + create_or_update_secret( + namespace=namespace, name=f"{mdbs.name}-{MONGOT_USER_NAME}-password", data={"password": MONGOT_USER_PASSWORD} + ) + + indexing_key = os.getenv(EMBEDDING_INDEXING_KEY_ENV_VAR) + query_key = os.getenv(EMBEDDING_QUERY_KEY_ENV_VAR) + if not indexing_key or not query_key: + raise ValueError( + f"Missing required environment variables: {EMBEDDING_INDEXING_KEY_ENV_VAR} and/or {EMBEDDING_QUERY_KEY_ENV_VAR}" + ) + create_or_update_secret( + namespace=namespace, + name=VOYAGE_API_KEY_SECRET_NAME, + data={"query-key": query_key, "indexing-key": indexing_key}, + ) + + +@mark.e2e_search_community_auto_embedding +def test_create_database_resource(mdbc: MongoDBCommunity): + mdbc.update() + mdbc.assert_reaches_phase(Phase.Running, timeout=300) + + +@mark.e2e_search_community_auto_embedding +def test_create_search_resource(mdbs: MongoDBSearch): + mdbs["spec"]["autoEmbedding"] = { + "embeddingModelAPIKeySecret": {"name": VOYAGE_API_KEY_SECRET_NAME}, + "providerEndpoint": PROVIDER_ENDPOINT, + } + mdbs.update() + mdbs.assert_reaches_phase(Phase.Running, timeout=300) + + +@mark.e2e_search_community_auto_embedding +def test_wait_for_community_resource_ready(mdbc: MongoDBCommunity): + mdbc.assert_reaches_phase(Phase.Running, timeout=300) + + +@fixture(scope="function") +def sample_movies_helper(mdbc: MongoDBCommunity) -> SampleMoviesSearchHelper: + return movies_search_helper.SampleMoviesSearchHelper( + SearchTester(get_connection_string(mdbc, USER_NAME, USER_PASSWORD)) + ) + + +@mark.e2e_search_community_auto_embedding +def test_search_restore_sample_database(sample_movies_helper: SampleMoviesSearchHelper): + sample_movies_helper.restore_sample_database() + + +@mark.e2e_search_community_auto_embedding +def test_search_create_search_index(sample_movies_helper: SampleMoviesSearchHelper): + sample_movies_helper.create_auto_embedding_vector_search_index() + + +@mark.e2e_search_community_auto_embedding +def test_search_assert_search_query(sample_movies_helper: SampleMoviesSearchHelper): + sample_movies_helper.assert_auto_emb_vector_search_query(retry_timeout=90) + + +def get_connection_string(mdbc: MongoDBCommunity, user_name: str, user_password: str) -> str: + return f"mongodb://{user_name}:{user_password}@{mdbc.name}-0.{mdbc.name}-svc.{mdbc.namespace}.svc.cluster.local:27017/?replicaSet={mdbc.name}" diff --git a/scripts/evergreen/deployments/test-app/templates/mongodb-enterprise-tests.yaml b/scripts/evergreen/deployments/test-app/templates/mongodb-enterprise-tests.yaml index 78194ae510..5a484b361d 100644 --- a/scripts/evergreen/deployments/test-app/templates/mongodb-enterprise-tests.yaml +++ b/scripts/evergreen/deployments/test-app/templates/mongodb-enterprise-tests.yaml @@ -198,6 +198,10 @@ spec: value: "{{ .Values.cognito_workload_url }}" - name: cognito_workload_user_id value: "{{ .Values.cognito_workload_user_id }}" + - name: AI_MONGODB_EMBEDDING_INDEXING_KEY + value: "{{ .Values.autoEmbedding.providerMongoDB.indexingKey }}" + - name: AI_MONGODB_EMBEDDING_QUERY_KEY + value: "{{ .Values.autoEmbedding.providerMongoDB.queryKey }}" image: "{{ .Values.mekoTestsRegistry }}/mongodb-kubernetes-tests:{{ .Values.mekoTestsVersion }}" # Options to pytest command should go in the pytest.ini file. command: ["pytest"] diff --git a/scripts/evergreen/deployments/test-app/values.yaml b/scripts/evergreen/deployments/test-app/values.yaml index 440dc56d43..6ada16e0e7 100644 --- a/scripts/evergreen/deployments/test-app/values.yaml +++ b/scripts/evergreen/deployments/test-app/values.yaml @@ -51,3 +51,8 @@ helm: registry: "" repository: "" region: "" + +autoEmbedding: + providerMongoDB: + indexingKey: "" + queryKey: "" diff --git a/scripts/evergreen/e2e/single_e2e.sh b/scripts/evergreen/e2e/single_e2e.sh index b43cb9205b..cfd845524b 100755 --- a/scripts/evergreen/e2e/single_e2e.sh +++ b/scripts/evergreen/e2e/single_e2e.sh @@ -89,6 +89,8 @@ deploy_test_app() { "--set" "helm.oci.registry=${helm_oci_registry}" "--set" "helm.oci.repository=${helm_oci_repository}" "--set" "helm.oci.region=${helm_oci_registry_region}" + "--set" "autoEmbedding.providerMongoDB.indexingKey=${AI_MONGODB_EMBEDDING_INDEXING_KEY}" + "--set" "autoEmbedding.providerMongoDB.queryKey=${AI_MONGODB_EMBEDDING_QUERY_KEY}" ) # shellcheck disable=SC2154