-
-
Notifications
You must be signed in to change notification settings - Fork 515
Validate filter and queryVector parameter in $vectorSearch stage builder
#2857
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -8,10 +8,15 @@ | |
| use Doctrine\ODM\MongoDB\Aggregation\Stage; | ||
| use Doctrine\ODM\MongoDB\Persisters\DocumentPersister; | ||
| use Doctrine\ODM\MongoDB\Query\Expr; | ||
| use InvalidArgumentException; | ||
| use MongoDB\BSON\Binary; | ||
| use MongoDB\BSON\Decimal128; | ||
| use MongoDB\BSON\Int64; | ||
|
|
||
| use function array_is_list; | ||
| use function is_array; | ||
| use function sprintf; | ||
|
|
||
| /** | ||
| * @phpstan-type Vector list<int|Int64>|list<float|Decimal128>|list<bool|0|1>|Binary | ||
| * @phpstan-type VectorSearchStageExpression array{ | ||
|
|
@@ -28,12 +33,15 @@ | |
| */ | ||
| class VectorSearch extends Stage | ||
| { | ||
| private ?bool $exact = null; | ||
| private ?Expr $filter = null; | ||
| private ?string $index = null; | ||
| private ?int $limit = null; | ||
| private ?int $numCandidates = null; | ||
| private ?string $path = null; | ||
| /** @see Binary::TYPE_VECTOR introduced in ext-mongodb 2.2 */ | ||
| private const BINARY_TYPE_VECTOR = 9; | ||
|
|
||
| private ?bool $exact = null; | ||
|
GromNaN marked this conversation as resolved.
|
||
| private array|Expr|null $filter = null; | ||
| private ?string $index = null; | ||
| private ?int $limit = null; | ||
| private ?int $numCandidates = null; | ||
| private ?string $path = null; | ||
| /** @phpstan-var Vector|null */ | ||
| private array|Binary|null $queryVector = null; | ||
|
|
||
|
|
@@ -50,8 +58,10 @@ public function getExpression(): array | |
| $params['exact'] = $this->exact; | ||
| } | ||
|
|
||
| if ($this->filter !== null) { | ||
| if ($this->filter instanceof Expr) { | ||
| $params['filter'] = $this->filter->getQuery(); | ||
| } elseif (is_array($this->filter)) { | ||
| $params['filter'] = $this->filter; | ||
| } | ||
|
|
||
| if ($this->index !== null) { | ||
|
|
@@ -84,7 +94,8 @@ public function exact(bool $exact): static | |
| return $this; | ||
| } | ||
|
|
||
| public function filter(Expr $filter): static | ||
| /** @phpstan-param array<string, mixed>|Expr $filter */ | ||
| public function filter(array|Expr $filter): static | ||
| { | ||
| $this->filter = $filter; | ||
|
|
||
|
|
@@ -122,6 +133,18 @@ public function path(string $path): static | |
| /** @phpstan-param Vector $queryVector */ | ||
| public function queryVector(array|Binary $queryVector): static | ||
| { | ||
| if ($queryVector === []) { | ||
| throw new InvalidArgumentException('Query vector cannot be an empty array.'); | ||
| } | ||
|
|
||
| if (is_array($queryVector) && ! array_is_list($queryVector)) { | ||
| throw new InvalidArgumentException('Query vector must be a list of numbers, got an associative array.'); | ||
|
GromNaN marked this conversation as resolved.
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do you actually intend to validate the contents of the list, or will you rely on the server to report that error?
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, this is a partial validation. I don't know what vector type is expected so iterating over the values would just cost time. |
||
| } | ||
|
|
||
| if ($queryVector instanceof Binary && $queryVector->getType() !== self::BINARY_TYPE_VECTOR) { | ||
| throw new InvalidArgumentException(sprintf('Binary query vector must be of type 9 (Vector), got %d.', $queryVector->getType())); | ||
| } | ||
|
|
||
| $this->queryVector = $queryVector; | ||
|
|
||
| return $this; | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -10,8 +10,10 @@ | |
| use Doctrine\ODM\MongoDB\Tests\BaseTestCase; | ||
| use Documents\User; | ||
| use Documents\VectorEmbedding; | ||
| use InvalidArgumentException; | ||
| use MongoDB\BSON\Binary; | ||
| use MongoDB\BSON\VectorType; | ||
| use PHPUnit\Framework\Attributes\TestWith; | ||
|
|
||
| use function enum_exists; | ||
|
|
||
|
|
@@ -27,12 +29,19 @@ public function testEmptyStage(): void | |
|
|
||
| public function testExact(): void | ||
| { | ||
| [$stage, $builder] = $this->createVectorSearchStage(); | ||
| [$stage] = $this->createVectorSearchStage(); | ||
| $stage->exact(true); | ||
| self::assertSame(['$vectorSearch' => ['exact' => true]], $stage->getExpression()); | ||
| } | ||
|
|
||
| public function testFilter(): void | ||
| public function testFilterArray(): void | ||
| { | ||
| [$stage] = $this->createVectorSearchStage(); | ||
| $stage->filter(['status' => ['$ne' => 'inactive']]); | ||
| self::assertSame(['$vectorSearch' => ['filter' => ['status' => ['$ne' => 'inactive']]]], $stage->getExpression()); | ||
| } | ||
|
|
||
| public function testFilterExpr(): void | ||
| { | ||
| [$stage, $builder] = $this->createVectorSearchStage(); | ||
| $stage->filter($builder->matchExpr()->field('status')->notEqual('inactive')); | ||
|
|
@@ -97,6 +106,17 @@ public function testQueryVectorAcceptsBinary(): void | |
| self::assertSame(['$vectorSearch' => ['queryVector' => $binaryVector]], $stage->getExpression()); | ||
| } | ||
|
|
||
| #[TestWith([new Binary("\x03\x00\x01\x02\x03", Binary::TYPE_GENERIC), 'Binary query vector must be of type 9 (Vector), got 0.'])] | ||
| #[TestWith([[1 => 1, 2 => 3], 'Query vector must be a list of numbers, got an associative array.'])] | ||
| #[TestWith([[], 'Query vector cannot be an empty array.'])] | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Oh cool, I had not seen these attributes before.
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Before attributes, it was the |
||
| public function testQueryVectorInvalidType(mixed $queryVector, string $message): void | ||
| { | ||
| [$stage] = $this->createVectorSearchStage(); | ||
| $this->expectException(InvalidArgumentException::class); | ||
| $this->expectExceptionMessage($message); | ||
| $stage->queryVector($queryVector); | ||
| } | ||
|
|
||
| public function testChainingAllOptions(): void | ||
| { | ||
| [$stage, $builder] = $this->createVectorSearchStage(); | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Just to confirm, will this stick around even after the ext-mongodb 2.2.0 release? I assume you don't want to raise the driver dependency just for this feature.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Exactly, we don't want to update the required version for this feature. But someone could receive a binary vector from the server and use in a query. That's why we have this constant duplicated here.