-
Notifications
You must be signed in to change notification settings - Fork 5.5k
feat: Add syntax support for CREATE VECTOR INDEX #27027
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -168,7 +168,11 @@ statement | |
| SET updateAssignment (',' updateAssignment)* | ||
| (WHERE where=booleanExpression)? #update | ||
| | MERGE INTO qualifiedName (AS? identifier)? | ||
| USING relation ON expression mergeCase+ #mergeInto | ||
| USING relation ON expression mergeCase+ #mergeInto | ||
| | CREATE VECTOR INDEX identifier ON TABLE qualifiedName | ||
| '(' identifier (',' identifier)? ')' | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The grammar currently restricts the column list to at most two identifiers (identifier (',' identifier)?). Is the intent to strictly enforce the (row_id, embedding_column) pattern? If so this makes sense for the current design, but it may limit future extensions where multiple embedding columns or additional metadata columns might need to be indexed. |
||
| (WITH properties)? | ||
| (UPDATING FOR booleanExpression)? #createVectorIndex | ||
| ; | ||
|
|
||
| query | ||
|
|
@@ -194,7 +198,7 @@ likeClause | |
| ; | ||
|
|
||
| properties | ||
| : '(' property (',' property)* ')' | ||
| : '(' property (',' property)* ','? ')' | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why do we need it? |
||
| ; | ||
|
|
||
| property | ||
|
|
@@ -704,7 +708,7 @@ nonReserved | |
| | FETCH | FILTER | FIRST | FOLLOWING | FORMAT | FUNCTION | FUNCTIONS | ||
| | GRANT | GRANTED | GRANTS | GRAPHVIZ | GROUPS | ||
| | HOUR | ||
| | IF | IGNORE | INCLUDING | INPUT | INTERVAL | INVOKER | IO | ISOLATION | ||
| | IF | IGNORE | INCLUDING | INDEX | INPUT | INTERVAL | INVOKER | IO | ISOLATION | ||
| | JSON | ||
| | KEEP | KEY | ||
| | LANGUAGE | LAST | LATERAL | LEVEL | LIMIT | LOGICAL | ||
|
|
@@ -716,8 +720,8 @@ nonReserved | |
| | SCHEMA | SCHEMAS | SECOND | SECURITY | SERIALIZABLE | SESSION | SET | SETS | SNAPSHOT | SNAPSHOTS | SQL | ||
| | SHOW | SOME | START | STATS | SUBSTRING | SYSTEM | SYSTEM_TIME | SYSTEM_VERSION | ||
| | TABLES | TABLESAMPLE | TAG | TEMPORARY | TEXT | TIME | TIMESTAMP | TO | TRANSACTION | TRUNCATE | TRY_CAST | TYPE | ||
| | UNBOUNDED | UNCOMMITTED | UNIQUE | UPDATE | USE | USER | ||
| | VALIDATE | VERBOSE | VERSION | VIEW | ||
| | UNBOUNDED | UNCOMMITTED | UNIQUE | UPDATE | UPDATING | USE | USER | ||
| | VALIDATE | VECTOR | VERBOSE | VERSION | VIEW | ||
| | WORK | WRITE | ||
| | YEAR | ||
| | ZONE | ||
|
|
@@ -813,6 +817,7 @@ IF: 'IF'; | |
| IGNORE: 'IGNORE'; | ||
| IN: 'IN'; | ||
| INCLUDING: 'INCLUDING'; | ||
| INDEX: 'INDEX'; | ||
| INNER: 'INNER'; | ||
| INPUT: 'INPUT'; | ||
| INSERT: 'INSERT'; | ||
|
|
@@ -942,10 +947,12 @@ UNIQUE: 'UNIQUE'; | |
| UNNEST: 'UNNEST'; | ||
| UPDATE: 'UPDATE'; | ||
| USE: 'USE'; | ||
| UPDATING: 'UPDATING'; | ||
| USER: 'USER'; | ||
| USING: 'USING'; | ||
| VALIDATE: 'VALIDATE'; | ||
| VALUES: 'VALUES'; | ||
| VECTOR: 'VECTOR'; | ||
| VERBOSE: 'VERBOSE'; | ||
| VERSION: 'VERSION'; | ||
| VIEW: 'VIEW'; | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -35,6 +35,7 @@ | |
| import com.facebook.presto.sql.tree.CreateTable; | ||
| import com.facebook.presto.sql.tree.CreateTableAsSelect; | ||
| import com.facebook.presto.sql.tree.CreateTag; | ||
| import com.facebook.presto.sql.tree.CreateVectorIndex; | ||
| import com.facebook.presto.sql.tree.CreateView; | ||
| import com.facebook.presto.sql.tree.Deallocate; | ||
| import com.facebook.presto.sql.tree.Delete; | ||
|
|
@@ -1285,6 +1286,36 @@ protected Void visitCreateTable(CreateTable node, Integer indent) | |
| return null; | ||
| } | ||
|
|
||
| @Override | ||
| protected Void visitCreateVectorIndex(CreateVectorIndex node, Integer indent) | ||
| { | ||
| builder.append("CREATE VECTOR INDEX "); | ||
| builder.append(formatName(node.getIndexName())); | ||
| builder.append(" ON TABLE "); | ||
| builder.append(formatName(node.getTableName())); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The formatter currently omits the optional TABLE keyword even when specified in the original statement. It may be preferable to preserve the original syntax to avoid confusion when round-tripping statements via EXPLAIN or query logging.
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Good catch! Added TABLE keyword |
||
| builder.append(" ("); | ||
| builder.append(node.getColumns().stream() | ||
| .map(Formatter::formatName) | ||
| .collect(joining(", "))); | ||
| builder.append(")"); | ||
|
|
||
| if (!node.getProperties().isEmpty()) { | ||
| builder.append("\nWITH ("); | ||
| builder.append(node.getProperties().stream() | ||
| .map(property -> formatName(property.getName()) + " = " + | ||
| formatExpression(property.getValue(), parameters)) | ||
| .collect(joining(", "))); | ||
| builder.append(")"); | ||
| } | ||
|
|
||
| node.getUpdatingFor().ifPresent(updatingFor -> { | ||
| builder.append("\nUPDATING FOR "); | ||
| builder.append(formatExpression(updatingFor, parameters)); | ||
| }); | ||
|
|
||
| return null; | ||
| } | ||
|
|
||
| private String formatPropertiesMultiLine(List<Property> properties) | ||
| { | ||
| if (properties.isEmpty()) { | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,147 @@ | ||
| /* | ||
| * Licensed under the Apache License, Version 2.0 (the "License"); | ||
| * you may not use this file except in compliance with the License. | ||
| * You may obtain a copy of the License at | ||
| * | ||
| * http://www.apache.org/licenses/LICENSE-2.0 | ||
| * | ||
| * Unless required by applicable law or agreed to in writing, software | ||
| * distributed under the License is distributed on an "AS IS" BASIS, | ||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| * See the License for the specific language governing permissions and | ||
| * limitations under the License. | ||
| */ | ||
| package com.facebook.presto.sql.tree; | ||
|
|
||
| import com.google.common.collect.ImmutableList; | ||
|
|
||
| import java.util.List; | ||
| import java.util.Objects; | ||
| import java.util.Optional; | ||
|
|
||
| import static com.google.common.base.MoreObjects.toStringHelper; | ||
| import static java.util.Objects.requireNonNull; | ||
|
|
||
| public class CreateVectorIndex | ||
| extends Statement | ||
| { | ||
| private final Identifier indexName; | ||
|
Comment on lines
+27
to
+28
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. indexName should also be QualifiedName? |
||
| private final QualifiedName tableName; | ||
| private final List<Identifier> columns; | ||
| private final Optional<Expression> updatingFor; | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Since this predicate may influence incremental index maintenance behavior, it may be useful to clarify whether this expression is expected to be interpreted by the engine or delegated to the connector during index update planning. This would help ensure alignment with the RFC’s expectation of connector-managed lifecycle and refresh semantics.
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It will be interpreted by the engine to a where predicate. |
||
| private final List<Property> properties; | ||
|
|
||
| public CreateVectorIndex( | ||
| Identifier indexName, | ||
| QualifiedName tableName, | ||
| List<Identifier> columns, | ||
| Optional<Expression> updatingFor, | ||
| List<Property> properties) | ||
| { | ||
| this(Optional.empty(), indexName, tableName, columns, updatingFor, properties); | ||
| } | ||
|
|
||
| public CreateVectorIndex( | ||
| NodeLocation location, | ||
| Identifier indexName, | ||
| QualifiedName tableName, | ||
| List<Identifier> columns, | ||
| Optional<Expression> updatingFor, | ||
| List<Property> properties) | ||
| { | ||
| this(Optional.of(location), indexName, tableName, columns, updatingFor, properties); | ||
| } | ||
|
|
||
| private CreateVectorIndex( | ||
| Optional<NodeLocation> location, | ||
| Identifier indexName, | ||
| QualifiedName tableName, | ||
| List<Identifier> columns, | ||
| Optional<Expression> updatingFor, | ||
| List<Property> properties) | ||
| { | ||
| super(location); | ||
| this.indexName = requireNonNull(indexName, "indexName is null"); | ||
| this.tableName = requireNonNull(tableName, "tableName is null"); | ||
| this.columns = ImmutableList.copyOf(requireNonNull(columns, "columns is null")); | ||
| this.updatingFor = requireNonNull(updatingFor, "updatingFor is null"); | ||
| this.properties = ImmutableList.copyOf(requireNonNull(properties, "properties is null")); | ||
| } | ||
|
|
||
| public Identifier getIndexName() | ||
| { | ||
| return indexName; | ||
| } | ||
|
|
||
| public QualifiedName getTableName() | ||
| { | ||
| return tableName; | ||
| } | ||
|
|
||
| public List<Identifier> getColumns() | ||
| { | ||
| return columns; | ||
| } | ||
|
|
||
| public Optional<Expression> getUpdatingFor() | ||
| { | ||
| return updatingFor; | ||
| } | ||
|
|
||
| public List<Property> getProperties() | ||
| { | ||
| return properties; | ||
| } | ||
|
|
||
| @Override | ||
| public <R, C> R accept(AstVisitor<R, C> visitor, C context) | ||
| { | ||
| return visitor.visitCreateVectorIndex(this, context); | ||
| } | ||
|
|
||
| @Override | ||
| public List<Node> getChildren() | ||
| { | ||
| ImmutableList.Builder<Node> children = ImmutableList.builder(); | ||
| children.add(indexName); | ||
| children.addAll(columns); | ||
| updatingFor.ifPresent(children::add); | ||
| children.addAll(properties); | ||
| return children.build(); | ||
| } | ||
|
|
||
| @Override | ||
| public int hashCode() | ||
| { | ||
| return Objects.hash(indexName, tableName, columns, updatingFor, properties); | ||
| } | ||
|
|
||
| @Override | ||
| public boolean equals(Object obj) | ||
| { | ||
| if (this == obj) { | ||
| return true; | ||
| } | ||
| if ((obj == null) || (getClass() != obj.getClass())) { | ||
| return false; | ||
| } | ||
| CreateVectorIndex o = (CreateVectorIndex) obj; | ||
| return Objects.equals(indexName, o.indexName) && | ||
| Objects.equals(tableName, o.tableName) && | ||
| Objects.equals(columns, o.columns) && | ||
| Objects.equals(updatingFor, o.updatingFor) && | ||
| Objects.equals(properties, o.properties); | ||
| } | ||
|
|
||
| @Override | ||
| public String toString() | ||
| { | ||
| return toStringHelper(this) | ||
| .add("indexName", indexName) | ||
| .add("tableName", tableName) | ||
| .add("columns", columns) | ||
| .add("updatingFor", updatingFor) | ||
| .add("properties", properties) | ||
| .toString(); | ||
| } | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should it be
instead of
identifier. Like create view or create table, as index is internally stored as a table.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Hi @gggrace14 , In our implementation the index is not stored as a catalog table but as a connector-managed index file (Stored in s3 based object storage). Because of this, the index name behaves more like a logical identifier rather than a fully qualified relation. Using identifier seemed appropriate in this case, whereas qualifiedName would imply the index itself is a catalog object similar to CREATE TABLE or CREATE VIEW.