diff --git a/src/Illuminate/Database/Query/Builder.php b/src/Illuminate/Database/Query/Builder.php index b98159453ad4..ecfda18f8629 100755 --- a/src/Illuminate/Database/Query/Builder.php +++ b/src/Illuminate/Database/Query/Builder.php @@ -16,6 +16,7 @@ use Illuminate\Database\ConnectionInterface; use Illuminate\Database\Eloquent\Builder as EloquentBuilder; use Illuminate\Database\Eloquent\Relations\Relation; +use Illuminate\Database\PostgresConnection; use Illuminate\Database\Query\Grammars\Grammar; use Illuminate\Database\Query\Processors\Processor; use Illuminate\Pagination\Paginator; @@ -458,6 +459,39 @@ public function addSelect($column) return $this; } + /** + * Add a vector-similarity selection to the query. + * + * @param \Illuminate\Contracts\Database\Query\Expression|string $column + * @param \Illuminate\Support\Collection|\Illuminate\Contracts\Support\Arrayable|array|string $vector + * @param string|null $as + * @return $this + */ + public function selectVectorDistance($column, $vector, $as = null) + { + $this->ensureConnectionSupportsVectors(); + + if (is_string($vector)) { + Str::of($vector)->toEmbeddings(); + } + + $this->addBinding( + json_encode( + $vector instanceof Arrayable + ? $vector->toArray() + : $vector, + flags: JSON_THROW_ON_ERROR + ), + 'select', + ); + + $as = $this->getGrammar()->wrap($as ?? $column.'_distance'); + + return $this->addSelect( + new Expression("({$this->getGrammar()->wrap($column)} <=> ?) as {$as}") + ); + } + /** * Force the query to only return distinct results. * @@ -1098,6 +1132,75 @@ public function orWhereColumn($first, $operator = null, $second = null) return $this->whereColumn($first, $operator, $second, 'or'); } + /** + * Add a vector similarity clause to the query, filtering by minimum similarity and ordering by similarity. + * + * @param \Illuminate\Contracts\Database\Query\Expression|string $column + * @param \Illuminate\Support\Collection|\Illuminate\Contracts\Support\Arrayable|array|string $vector + * @param float $minSimilarity A value between 0.0 and 1.0, where 1.0 is identical. + * @param bool $order + * @return $this + */ + public function whereVectorSimilarTo($column, $vector, $minSimilarity = 0.6, $order = true) + { + if (is_string($vector)) { + $vector = Str::of($vector)->toEmbeddings(); + } + + $this->whereVectorDistanceLessThan($column, $vector, 1 - $minSimilarity); + + if ($order) { + $this->orderByVectorDistance($column, $vector); + } + + return $this; + } + + /** + * Add a vector distance "where" clause to the query. + * + * @param \Illuminate\Contracts\Database\Query\Expression|string $column + * @param \Illuminate\Support\Collection|\Illuminate\Contracts\Support\Arrayable|array|string $vector + * @param float $maxDistance + * @param string $boolean + * @return $this + */ + public function whereVectorDistanceLessThan($column, $vector, $maxDistance, $boolean = 'and') + { + $this->ensureConnectionSupportsVectors(); + + if (is_string($vector)) { + Str::of($vector)->toEmbeddings(); + } + + return $this->whereRaw( + "({$this->getGrammar()->wrap($column)} <=> ?) <= ?", + [ + json_encode( + $vector instanceof Arrayable + ? $vector->toArray() + : $vector, + flags: JSON_THROW_ON_ERROR + ), + $maxDistance, + ], + $boolean + ); + } + + /** + * Add a vector distance "or where" clause to the query. + * + * @param \Illuminate\Contracts\Database\Query\Expression|string $column + * @param \Illuminate\Support\Collection|\Illuminate\Contracts\Support\Arrayable|array|string $vector + * @param float $maxDistance + * @return $this + */ + public function orWhereVectorDistanceLessThan($column, $vector, $maxDistance) + { + return $this->whereVectorDistanceLessThan($column, $vector, $maxDistance, 'or'); + } + /** * Add a raw "where" clause to the query. * @@ -2781,6 +2884,39 @@ public function oldest($column = 'created_at') return $this->orderBy($column, 'asc'); } + /** + * Add a vector-distance "order by" clause to the query. + * + * @param \Illuminate\Contracts\Database\Query\Expression|string $column + * @param \Illuminate\Support\Collection|\Illuminate\Contracts\Support\Arrayable|array $vector + * @return $this + */ + public function orderByVectorDistance($column, $vector) + { + $this->ensureConnectionSupportsVectors(); + + if (is_string($vector)) { + Str::of($vector)->toEmbeddings(); + } + + $this->addBinding( + json_encode( + $vector instanceof Arrayable + ? $vector->toArray() + : $vector, + flags: JSON_THROW_ON_ERROR + ), + $this->unions ? 'unionOrder' : 'order' + ); + + $this->{$this->unions ? 'unionOrders' : 'orders'}[] = [ + 'column' => new Expression("({$this->getGrammar()->wrap($column)} <=> ?)"), + 'direction' => 'asc', + ]; + + return $this; + } + /** * Put the query's results in random order. * @@ -4403,6 +4539,18 @@ public function getConnection() return $this->connection; } + /** + * Ensure the database connection supports vector queries. + * + * @return void + */ + protected function ensureConnectionSupportsVectors() + { + if (! $this->connection instanceof PostgresConnection) { + throw new RuntimeException('Vector distance queries are only supported by Postgres.'); + } + } + /** * Get the database query processor instance. * diff --git a/src/Illuminate/Database/Schema/Blueprint.php b/src/Illuminate/Database/Schema/Blueprint.php index dfef4b1a5bd4..1643b86872b1 100755 --- a/src/Illuminate/Database/Schema/Blueprint.php +++ b/src/Illuminate/Database/Schema/Blueprint.php @@ -215,7 +215,7 @@ protected function addImpliedCommands() protected function addFluentIndexes() { foreach ($this->columns as $column) { - foreach (['primary', 'unique', 'index', 'fulltext', 'fullText', 'spatialIndex'] as $index) { + foreach (['primary', 'unique', 'index', 'fulltext', 'fullText', 'spatialIndex', 'vectorIndex'] as $index) { // If the column is supposed to be changed to an auto increment column and // the specified index is primary, there is no need to add a command on // MySQL, as it will be handled during the column definition instead. @@ -227,7 +227,11 @@ protected function addFluentIndexes() // to "true" (boolean), no name has been specified for this index so the // index method can be called without a name and it will generate one. if ($column->{$index} === true) { - $this->{$index}($column->name); + $indexMethod = $index === 'index' && $column->type === 'vector' + ? 'vectorIndex' + : $index; + + $this->{$indexMethod}($column->name); $column->{$index} = null; continue 2; @@ -247,7 +251,11 @@ protected function addFluentIndexes() // value, we'll go ahead and call the index method and pass the name for // the index since the developer specified the explicit name for this. elseif (isset($column->{$index})) { - $this->{$index}($column->name, $column->{$index}); + $indexMethod = $index === 'index' && $column->type === 'vector' + ? 'vectorIndex' + : $index; + + $this->{$indexMethod}($column->name, $column->{$index}); $column->{$index} = null; continue 2; @@ -694,6 +702,18 @@ public function spatialIndex($columns, $name = null, $operatorClass = null) return $this->indexCommand('spatialIndex', $columns, $name, null, $operatorClass); } + /** + * Specify a vector index for the table. + * + * @param string $column + * @param string|null $name + * @return \Illuminate\Database\Schema\IndexDefinition + */ + public function vectorIndex($column, $name = null) + { + return $this->indexCommand('vectorIndex', $column, $name, 'hnsw', 'vector_cosine_ops'); + } + /** * Specify a raw index for the table. * diff --git a/src/Illuminate/Database/Schema/Builder.php b/src/Illuminate/Database/Schema/Builder.php index 50d2ff70648a..4fa753420ee6 100755 --- a/src/Illuminate/Database/Schema/Builder.php +++ b/src/Illuminate/Database/Schema/Builder.php @@ -5,9 +5,11 @@ use Closure; use Illuminate\Container\Container; use Illuminate\Database\Connection; +use Illuminate\Database\PostgresConnection; use Illuminate\Support\Traits\Macroable; use InvalidArgumentException; use LogicException; +use RuntimeException; class Builder { @@ -639,6 +641,38 @@ public function withoutForeignKeyConstraints(Closure $callback) } } + /** + * Create the vector extension on the schema if it does not exist. + * + * @param string|null $schema + * @return void + */ + public function ensureVectorExtensionExists($schema = null) + { + $this->ensureExtensionExists('vector', $schema); + } + + /** + * Create a new extension on the schema if it does not exist. + * + * @param string $name + * @param string|null $schema + * @return void + */ + public function ensureExtensionExists($name, $schema = null) + { + if (! $this->getConnection() instanceof PostgresConnection) { + throw new RuntimeException('Extensions are only supported by Postgres.'); + } + + $name = $this->getConnection()->getSchemaGrammar()->wrap($name); + + $this->getConnection()->statement(match (filled($schema)) { + true => "create extension if not exists {$name} schema {$this->getConnection()->getSchemaGrammar()->wrap($schema)}", + false => "create extension if not exists {$name}", + }); + } + /** * Execute the blueprint to build / modify the table. * diff --git a/src/Illuminate/Database/Schema/ColumnDefinition.php b/src/Illuminate/Database/Schema/ColumnDefinition.php index 5f9df78bc231..8f70e4fdb1c2 100644 --- a/src/Illuminate/Database/Schema/ColumnDefinition.php +++ b/src/Illuminate/Database/Schema/ColumnDefinition.php @@ -26,6 +26,7 @@ * @method $this persisted() Mark the computed generated column as persistent (SQL Server) * @method $this primary(bool $value = true) Add a primary index * @method $this spatialIndex(bool|string $indexName = null) Add a spatial index + * @method $this vectorIndex(bool|string $indexName = null) Add a vector index * @method $this startingValue(int $startingValue) Set the starting value of an auto-incrementing field (MySQL/PostgreSQL) * @method $this storedAs(string|\Illuminate\Contracts\Database\Query\Expression $expression) Create a stored generated column (MySQL/PostgreSQL/SQLite) * @method $this type(string $type) Specify a type for the column diff --git a/src/Illuminate/Database/Schema/Grammars/Grammar.php b/src/Illuminate/Database/Schema/Grammars/Grammar.php index 391324b9c6d2..992d6fb1c730 100755 --- a/src/Illuminate/Database/Schema/Grammars/Grammar.php +++ b/src/Illuminate/Database/Schema/Grammars/Grammar.php @@ -152,6 +152,20 @@ public function compileIndexes($schema, $table) throw new RuntimeException('This database driver does not support retrieving indexes.'); } + /** + * Compile a vector index key command. + * + * @param \Illuminate\Database\Schema\Blueprint $blueprint + * @param \Illuminate\Support\Fluent $command + * @return void + * + * @throws \RuntimeException + */ + public function compileVectorIndex(Blueprint $blueprint, Fluent $command) + { + throw new RuntimeException('The database driver in use does not support vector indexes.'); + } + /** * Compile the query to determine the foreign keys. * diff --git a/src/Illuminate/Database/Schema/Grammars/PostgresGrammar.php b/src/Illuminate/Database/Schema/Grammars/PostgresGrammar.php index 73e78071a33e..36a22cc035f8 100755 --- a/src/Illuminate/Database/Schema/Grammars/PostgresGrammar.php +++ b/src/Illuminate/Database/Schema/Grammars/PostgresGrammar.php @@ -429,6 +429,18 @@ public function compileSpatialIndex(Blueprint $blueprint, Fluent $command) return $this->compileIndex($blueprint, $command); } + /** + * Compile a vector index key command. + * + * @param \Illuminate\Database\Schema\Blueprint $blueprint + * @param \Illuminate\Support\Fluent $command + * @return string + */ + public function compileVectorIndex(Blueprint $blueprint, Fluent $command) + { + return $this->compileIndexWithOperatorClass($blueprint, $command); + } + /** * Compile a spatial index with operator class key command. * diff --git a/tests/Database/DatabasePostgresSchemaGrammarTest.php b/tests/Database/DatabasePostgresSchemaGrammarTest.php index 6d95c9759e00..4c076d81cbd2 100755 --- a/tests/Database/DatabasePostgresSchemaGrammarTest.php +++ b/tests/Database/DatabasePostgresSchemaGrammarTest.php @@ -453,6 +453,56 @@ public function testAddingSpatialIndexWithOperatorClassOnline() $this->assertSame('create index concurrently "my_index" on "geo" using gist ("coordinates" point_ops)', $statements[0]); } + public function testAddingVectorIndex() + { + $blueprint = new Blueprint($this->getConnection(), 'posts'); + $blueprint->vectorIndex('embeddings'); + $statements = $blueprint->toSql(); + + $this->assertCount(1, $statements); + $this->assertSame('create index "posts_embeddings_vectorindex" on "posts" using hnsw ("embeddings" vector_cosine_ops)', $statements[0]); + } + + public function testAddingVectorIndexOnline() + { + $blueprint = new Blueprint($this->getConnection(), 'posts'); + $blueprint->vectorIndex('embeddings')->online(); + $statements = $blueprint->toSql(); + + $this->assertCount(1, $statements); + $this->assertSame('create index concurrently "posts_embeddings_vectorindex" on "posts" using hnsw ("embeddings" vector_cosine_ops)', $statements[0]); + } + + public function testAddingVectorIndexWithName() + { + $blueprint = new Blueprint($this->getConnection(), 'posts'); + $blueprint->vectorIndex('embeddings', 'my_vector_index'); + $statements = $blueprint->toSql(); + + $this->assertCount(1, $statements); + $this->assertSame('create index "my_vector_index" on "posts" using hnsw ("embeddings" vector_cosine_ops)', $statements[0]); + } + + public function testAddingFluentVectorIndex() + { + $blueprint = new Blueprint($this->getConnection(), 'posts'); + $blueprint->vector('embeddings', 1536)->vectorIndex(); + $statements = $blueprint->toSql(); + + $this->assertCount(2, $statements); + $this->assertSame('create index "posts_embeddings_vectorindex" on "posts" using hnsw ("embeddings" vector_cosine_ops)', $statements[1]); + } + + public function testAddingFluentIndexOnVectorColumn() + { + $blueprint = new Blueprint($this->getConnection(), 'posts'); + $blueprint->vector('embeddings', 1536)->index(); + $statements = $blueprint->toSql(); + + $this->assertCount(2, $statements); + $this->assertSame('create index "posts_embeddings_vectorindex" on "posts" using hnsw ("embeddings" vector_cosine_ops)', $statements[1]); + } + public function testAddingRawIndex() { $blueprint = new Blueprint($this->getConnection(), 'users');