diff --git a/docs/en/cookbook/vector-search.rst b/docs/en/cookbook/vector-search.rst new file mode 100644 index 0000000000..e952ddcfea --- /dev/null +++ b/docs/en/cookbook/vector-search.rst @@ -0,0 +1,186 @@ +Vector Search Cookbook +====================== + +This tutorial demonstrates how to use `MongoDB Atlas Vector Search`_ with Doctrine +MongoDB ODM. Vector search enables semantic queries over vector embeddings, +which are typically generated by an embedding system. + +Step 1: Generate Vector Embeddings +---------------------------------- + +Before storing data, you need to generate vector embeddings for your documents. +You can use an embedding system such as Symfony AI to convert text or other +data into a ``float[]`` vector. + +Example using `Voyage AI`_ and `Symfony AI`_: + +.. code-block:: php + + use Symfony\AI\Platform\Bridge\Voyage\PlatformFactory; + + $platform = PlatformFactory::create(getenv('VOYAGE_API_KEY')); + $vectors = $platform->invoke('voyage-3', <<<'TEXT' + Once upon a time, there was a country called Japan. It was a beautiful country with a lot of mountains and rivers. + The people of Japan were very kind and hardworking. They loved their country very much and took care of it. The + country was very peaceful and prosperous. The people lived happily ever after. + TEXT)->asVectors(); + +Step 2: Define the Model +------------------------ + +Annotate your document with :doc:`#[VectorSearchIndex] attribute ` +and define a vector field of type ``float[]``. +The number of dimensions must match the embedding vector size (e.g., 1024). +The similarity metric can be either cosine, euclidean or dotProduct; they all +return the same result because Voyage AI uses normalized vectors to length 1. + +.. code-block:: php + + use Doctrine\ODM\MongoDB\Mapping\Annotations as ODM; + use Doctrine\ODM\MongoDB\Mapping\ClassMetadata; + use Doctrine\ODM\MongoDB\Types\Type; + use Symfony\AI\Platform\Vector\Vector; + + #[ODM\Document] + #[ODM\VectorSearchIndex( + fields: [ + [ + 'type' => 'vector', + 'path' => 'voyage3Vector', + 'numDimensions' => 1024, + 'similarity' => ClassMetadata::VECTOR_SIMILARITY_DOT_PRODUCT, + ], + [ + 'type' => 'filter', + 'path' => 'published', + ], + ], + name: 'default', + )] + class Guide + { + #[ODM\Id] + public ?string $id = null; + + #[ODM\Field] + public bool $published = false; + + #[ODM\Field] + public ?string $content = null; + + /** @var list|null Embedding vector created from $content */ + #[ODM\Field(type: Type::COLLECTION)] + public ?array $voyage3Vector = null; + + /** @param list|Vector $vector */ + public function setVoyage3Vector(array|Vector $vector): void + { + if ($vector instanceof Vector) { + if ($vector->getDimensions() !== 1024) { + throw new InvalidArgumentException('The embedding vector must have 1024 dimensions.'); + } + + $vector = $vector->getData(); + } + + if (count($vector) !== 1024) { + throw new InvalidArgumentException('The embedding vector must have 1024 dimensions.'); + } + + $this->voyage3Vector = $vector; + } + } + +Step 3: Create the Collection and Insert Documents +-------------------------------------------------- + +Use the ``SchemaManager`` to create the collection and insert documents with vector embeddings. + +.. code-block:: php + + $schemaManager = $dm->getSchemaManager(); + $schemaManager->createDocumentCollection(Guide::class); + +Insert documents: + +.. code-block:: php + + $doc1 = new Guide(); + $doc1->published = true; + $doc1->content = 'First document'; + + $doc2 = new Guide(); + $doc2->published = false; + + $dm->persist($doc1); + $dm->persist($doc2); + $dm->flush(); + +The vector values of each document can be set later, typically using an asynchronous process: + +.. code-block:: php + + $vector1 = $embeddingPlatform->invoke($doc1->content)->asVectors()[0]; + $doc1->setVoyage3Vector($vector1); + + $vector2 = $embeddingPlatform->invoke($doc2->content)->asVectors()[0]; + $doc2->setVoyage3Vector($vector2); + + $dm->flush(); + + +Step 4: Create the Vector Search Index +-------------------------------------- + +When updating documents, the vector search index is asynchronously updated by +MongoDB Atlas. You have to wait a few seconds before the changes are reflected +in search results. + +.. code-block:: php + + $schemaManager->createDocumentSearchIndexes(Guide::class); + + +If the vector search index created after inserting documents, the index is +marked as "READY" when all existing documents are indexed. You can wait for +the index to be ready using the following code: + +.. code-block:: php + + $schemaManager->waitForSearchIndexes([Guide::class]); + +Step 5: Run a Vector Search Aggregation +--------------------------------------- + +Use the aggregation builder to run a vector search query: + +.. code-block:: php + + $results = $dm->createAggregationBuilder(Guide::class) + ->vectorSearch() + ->index('default') + ->path('voyage3Vector') + ->queryVector($vector) + ->filter($qb->expr()->field('published')->equals(true)) + ->numCandidates(10) + ->limit(10) + ->set() + ->field('score') + ->expression(['$meta' => 'vectorSearchScore']) + ->getAggregation()->execute()->toArray(); + + var_dump($results); + +Notes +----- +- Vector embeddings should be generated using a reliable embedding system +- The vector field must be of type ``float[]``, ``int[]`` or ``bool[]``, it + must match with the embedding vector type and dimensions. +- The ``#[VectorSearchIndex]`` annotation configures the index for vector search +- Use the aggregation builder's ``vectorSearch`` stage to query for similar vectors. +- Doctrine ODM 2.13+ is required for vector search support. + + +.. _`MongoDB Atlas Vector Search`: +.. _`Voyage AI`: https://www.voyageai.com/ +.. _`Symfony AI`: https://symfony.com/ai diff --git a/docs/en/reference/aggregation-stage-reference.rst b/docs/en/reference/aggregation-stage-reference.rst index b500868067..8e7aebf613 100644 --- a/docs/en/reference/aggregation-stage-reference.rst +++ b/docs/en/reference/aggregation-stage-reference.rst @@ -694,6 +694,11 @@ number of available operators, please refer to the `MongoDB documentation `_ for a reference of all available operators. +.. note:: + A `Search index `_ + is required for this stage. See the :doc:`#[SearchIndex] attribute <../reference/attributes-reference#search_index>` + for details on how to define it. + .. code-block:: php `_ + is required for this stage. See the :doc:`#[VectorSearchIndex] attribute <../reference/attributes-reference#vector_search_index>` + for details on how to define it. .. code-block:: php