-
Notifications
You must be signed in to change notification settings - Fork 5.5k
feat: Add analysis support for CREATE VECTOR INDEX (#27036) #27036
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -110,6 +110,7 @@ | |
| import com.facebook.presto.sql.tree.CreateSchema; | ||
| import com.facebook.presto.sql.tree.CreateTable; | ||
| import com.facebook.presto.sql.tree.CreateTableAsSelect; | ||
| import com.facebook.presto.sql.tree.CreateVectorIndex; | ||
| import com.facebook.presto.sql.tree.CreateView; | ||
| import com.facebook.presto.sql.tree.Cube; | ||
| import com.facebook.presto.sql.tree.Deallocate; | ||
|
|
@@ -1144,6 +1145,71 @@ protected Scope visitCreateTable(CreateTable node, Optional<Scope> scope) | |
| return createAndAssignScope(node, scope); | ||
| } | ||
|
|
||
| @Override | ||
| protected Scope visitCreateVectorIndex(CreateVectorIndex node, Optional<Scope> scope) | ||
| { | ||
| QualifiedObjectName sourceTableName = createQualifiedObjectName(session, node, node.getTableName(), metadata); | ||
| if (!metadataResolver.tableExists(sourceTableName)) { | ||
| throw new SemanticException(MISSING_TABLE, node, "Source table '%s' does not exist", sourceTableName); | ||
| } | ||
|
|
||
| QualifiedObjectName targetTable = createQualifiedObjectName(session, node, node.getIndexName(), metadata); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hi @skyelves , here you're creating targetTable of type QualifiedObjectName from node.getIndexName() of type QualifiedName, are you referencing to some example? Is it okay to keep using QualifiedName at the Analyzer layer? Can you help check? Quick look at the Analysis class shows several QualifiedName used there. |
||
| if (metadataResolver.tableExists(targetTable)) { | ||
| throw new SemanticException(TABLE_ALREADY_EXISTS, node, "Destination table '%s' already exists", targetTable); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Index table '%s' already exists |
||
| } | ||
|
|
||
| // Analyze the source table to build a proper scope with typed columns | ||
| // Use AllowAllAccessControl since we check permissions separately below | ||
| StatementAnalyzer analyzer = new StatementAnalyzer( | ||
| analysis, | ||
| metadata, | ||
| sqlParser, | ||
| new AllowAllAccessControl(), | ||
| session, | ||
| warningCollector); | ||
|
|
||
| Table sourceTable = new Table(node.getTableName()); | ||
| Scope tableScope = analyzer.analyze(sourceTable, scope); | ||
|
|
||
| // Validate that specified columns exist in the source table | ||
| TableHandle sourceTableHandle = metadataResolver.getTableHandle(sourceTableName).get(); | ||
| Map<String, ColumnHandle> sourceColumns = metadataResolver.getColumnHandles(sourceTableHandle); | ||
| for (Identifier column : node.getColumns()) { | ||
| if (!sourceColumns.containsKey(column.getValue())) { | ||
| throw new SemanticException(MISSING_COLUMN, column, "Column '%s' does not exist in source table '%s'", column.getValue(), sourceTableName); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The current validation ensures that the specified columns exist in the source table, which is good. However, since the syntax allows either (embedding) or (row_id, embedding), it would be helpful to also validate the column structure. If only one column is provided, it should be validated as an embedding column rather than a row identifier. Additionally, when two columns are specified, they should follow the (row_id, embedding) order(optional). This validation can help prevent invalid cases like (id) from passing analysis and failing later during index creation. |
||
| } | ||
| } | ||
|
|
||
| // Analyze UPDATING FOR predicate (validates column references, types, etc.) | ||
| node.getUpdatingFor().ifPresent(where -> analyzeWhere(node, tableScope, where)); | ||
|
|
||
| validateProperties(node.getProperties(), scope); | ||
|
|
||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The grammar and AST allow specifying a WHERE clause for index creation via node.getWhere(), however this clause does not appear to be referenced during analysis or synthetic query generation. This may lead to unexpected user behavior, as any filtering semantics implied by the WHERE clause are currently ignored during index construction. Should this clause be incorporated into the index build logic, or alternatively disallowed until filtering semantics are supported?
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The comments should be addressed because we no longer do serialization at the analyzer layer |
||
| Map<String, Expression> allProperties = mapFromProperties(node.getProperties()); | ||
|
|
||
| // user must have read permission on the source table to create a vector index | ||
| Multimap<QualifiedObjectName, Subfield> tableColumnMap = ImmutableMultimap.<QualifiedObjectName, Subfield>builder() | ||
| .putAll(sourceTableName, sourceColumns.keySet().stream() | ||
| .map(column -> new Subfield(column, ImmutableList.of())) | ||
| .collect(toImmutableSet())) | ||
| .build(); | ||
| analysis.addTableColumnAndSubfieldReferences(accessControl, session.getIdentity(), | ||
| session.getTransactionId(), session.getAccessControlContext(), tableColumnMap, tableColumnMap); | ||
|
|
||
| analysis.addAccessControlCheckForTable(TABLE_CREATE, | ||
| new AccessControlInfoForTable(accessControl, session.getIdentity(), | ||
| session.getTransactionId(), session.getAccessControlContext(), targetTable)); | ||
|
|
||
| analysis.setCreateVectorIndexAnalysis(new Analysis.CreateVectorIndexAnalysis( | ||
| sourceTableName, | ||
| targetTable, | ||
| node.getColumns(), | ||
| allProperties, | ||
| node.getUpdatingFor())); | ||
|
|
||
| return createAndAssignScope(node, scope, Field.newUnqualified(node.getLocation(), "result", VARCHAR)); | ||
| } | ||
|
|
||
| @Override | ||
| protected Scope visitProperty(Property node, Optional<Scope> scope) | ||
| { | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The index artifact is currently represented using QualifiedObjectName, similar to a table. Since vector indexes may also be implemented as connector-managed artifacts (e.g., external index files or metadata entries), it would be better to treat this as a logical index identifier rather than strictly a physical table. This would allow connectors to map the index name to their own storage model while keeping the engine abstraction consistent across different implementations.