Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,11 @@ statement
SET updateAssignment (',' updateAssignment)*
(WHERE where=booleanExpression)? #update
| MERGE INTO qualifiedName (AS? identifier)?
USING relation ON expression mergeCase+ #mergeInto
USING relation ON expression mergeCase+ #mergeInto
| CREATE VECTOR INDEX identifier ON TABLE qualifiedName
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should it be

CREATE VECTOR INDEX qualifiedName

instead of identifier. Like create view or create table, as index is internally stored as a table.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hi @gggrace14 , In our implementation the index is not stored as a catalog table but as a connector-managed index file (Stored in s3 based object storage). Because of this, the index name behaves more like a logical identifier rather than a fully qualified relation. Using identifier seemed appropriate in this case, whereas qualifiedName would imply the index itself is a catalog object similar to CREATE TABLE or CREATE VIEW.

'(' identifier (',' identifier)? ')'
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The grammar currently restricts the column list to at most two identifiers (identifier (',' identifier)?). Is the intent to strictly enforce the (row_id, embedding_column) pattern? If so this makes sense for the current design, but it may limit future extensions where multiple embedding columns or additional metadata columns might need to be indexed.

(WITH properties)?
(UPDATING FOR booleanExpression)? #createVectorIndex
;

query
Expand All @@ -194,7 +198,7 @@ likeClause
;

properties
: '(' property (',' property)* ')'
: '(' property (',' property)* ','? ')'
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why do we need it?

;

property
Expand Down Expand Up @@ -704,7 +708,7 @@ nonReserved
| FETCH | FILTER | FIRST | FOLLOWING | FORMAT | FUNCTION | FUNCTIONS
| GRANT | GRANTED | GRANTS | GRAPHVIZ | GROUPS
| HOUR
| IF | IGNORE | INCLUDING | INPUT | INTERVAL | INVOKER | IO | ISOLATION
| IF | IGNORE | INCLUDING | INDEX | INPUT | INTERVAL | INVOKER | IO | ISOLATION
| JSON
| KEEP | KEY
| LANGUAGE | LAST | LATERAL | LEVEL | LIMIT | LOGICAL
Expand All @@ -716,8 +720,8 @@ nonReserved
| SCHEMA | SCHEMAS | SECOND | SECURITY | SERIALIZABLE | SESSION | SET | SETS | SNAPSHOT | SNAPSHOTS | SQL
| SHOW | SOME | START | STATS | SUBSTRING | SYSTEM | SYSTEM_TIME | SYSTEM_VERSION
| TABLES | TABLESAMPLE | TAG | TEMPORARY | TEXT | TIME | TIMESTAMP | TO | TRANSACTION | TRUNCATE | TRY_CAST | TYPE
| UNBOUNDED | UNCOMMITTED | UNIQUE | UPDATE | USE | USER
| VALIDATE | VERBOSE | VERSION | VIEW
| UNBOUNDED | UNCOMMITTED | UNIQUE | UPDATE | UPDATING | USE | USER
| VALIDATE | VECTOR | VERBOSE | VERSION | VIEW
| WORK | WRITE
| YEAR
| ZONE
Expand Down Expand Up @@ -813,6 +817,7 @@ IF: 'IF';
IGNORE: 'IGNORE';
IN: 'IN';
INCLUDING: 'INCLUDING';
INDEX: 'INDEX';
INNER: 'INNER';
INPUT: 'INPUT';
INSERT: 'INSERT';
Expand Down Expand Up @@ -942,10 +947,12 @@ UNIQUE: 'UNIQUE';
UNNEST: 'UNNEST';
UPDATE: 'UPDATE';
USE: 'USE';
UPDATING: 'UPDATING';
USER: 'USER';
USING: 'USING';
VALIDATE: 'VALIDATE';
VALUES: 'VALUES';
VECTOR: 'VECTOR';
VERBOSE: 'VERBOSE';
VERSION: 'VERSION';
VIEW: 'VIEW';
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
import com.facebook.presto.sql.tree.CreateTable;
import com.facebook.presto.sql.tree.CreateTableAsSelect;
import com.facebook.presto.sql.tree.CreateTag;
import com.facebook.presto.sql.tree.CreateVectorIndex;
import com.facebook.presto.sql.tree.CreateView;
import com.facebook.presto.sql.tree.Deallocate;
import com.facebook.presto.sql.tree.Delete;
Expand Down Expand Up @@ -1285,6 +1286,36 @@ protected Void visitCreateTable(CreateTable node, Integer indent)
return null;
}

@Override
protected Void visitCreateVectorIndex(CreateVectorIndex node, Integer indent)
{
builder.append("CREATE VECTOR INDEX ");
builder.append(formatName(node.getIndexName()));
builder.append(" ON TABLE ");
builder.append(formatName(node.getTableName()));
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The formatter currently omits the optional TABLE keyword even when specified in the original statement. It may be preferable to preserve the original syntax to avoid confusion when round-tripping statements via EXPLAIN or query logging.

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good catch! Added TABLE keyword

builder.append(" (");
builder.append(node.getColumns().stream()
.map(Formatter::formatName)
.collect(joining(", ")));
builder.append(")");

if (!node.getProperties().isEmpty()) {
builder.append("\nWITH (");
builder.append(node.getProperties().stream()
.map(property -> formatName(property.getName()) + " = " +
formatExpression(property.getValue(), parameters))
.collect(joining(", ")));
builder.append(")");
}

node.getUpdatingFor().ifPresent(updatingFor -> {
builder.append("\nUPDATING FOR ");
builder.append(formatExpression(updatingFor, parameters));
});

return null;
}

private String formatPropertiesMultiLine(List<Property> properties)
{
if (properties.isEmpty()) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@
import com.facebook.presto.sql.tree.CreateTableAsSelect;
import com.facebook.presto.sql.tree.CreateTag;
import com.facebook.presto.sql.tree.CreateType;
import com.facebook.presto.sql.tree.CreateVectorIndex;
import com.facebook.presto.sql.tree.CreateView;
import com.facebook.presto.sql.tree.Cube;
import com.facebook.presto.sql.tree.CurrentTime;
Expand Down Expand Up @@ -368,6 +369,37 @@ public Node visitCreateTable(SqlBaseParser.CreateTableContext context)
comment);
}

@Override
public Node visitCreateVectorIndex(SqlBaseParser.CreateVectorIndexContext context)
{
Identifier indexName = (Identifier) visit(context.identifier(0));
QualifiedName tableName = getQualifiedName(context.qualifiedName());

// Columns start from identifier(1) onwards
List<Identifier> columns = context.identifier().stream()
.skip(1) // Skip index name
.map(id -> (Identifier) visit(id))
.collect(toImmutableList());

Optional<Expression> updatingFor = Optional.empty();
if (context.UPDATING() != null) {
updatingFor = Optional.of((Expression) visit(context.booleanExpression()));
}

List<Property> properties = ImmutableList.of();
if (context.properties() != null) {
properties = visit(context.properties().property(), Property.class);
}

return new CreateVectorIndex(
getLocation(context),
indexName,
tableName,
columns,
updatingFor,
properties);
}

@Override
public Node visitCreateType(SqlBaseParser.CreateTypeContext context)
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -582,6 +582,11 @@ protected R visitCreateTable(CreateTable node, C context)
return visitStatement(node, context);
}

protected R visitCreateVectorIndex(CreateVectorIndex node, C context)
{
return visitStatement(node, context);
}

protected R visitCreateType(CreateType node, C context)
{
return visitStatement(node, context);
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,147 @@
/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.facebook.presto.sql.tree;

import com.google.common.collect.ImmutableList;

import java.util.List;
import java.util.Objects;
import java.util.Optional;

import static com.google.common.base.MoreObjects.toStringHelper;
import static java.util.Objects.requireNonNull;

public class CreateVectorIndex
extends Statement
{
private final Identifier indexName;
Comment on lines +27 to +28
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

indexName should also be QualifiedName?

private final QualifiedName tableName;
private final List<Identifier> columns;
private final Optional<Expression> updatingFor;
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since this predicate may influence incremental index maintenance behavior, it may be useful to clarify whether this expression is expected to be interpreted by the engine or delegated to the connector during index update planning. This would help ensure alignment with the RFC’s expectation of connector-managed lifecycle and refresh semantics.

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It will be interpreted by the engine to a where predicate.

private final List<Property> properties;

public CreateVectorIndex(
Identifier indexName,
QualifiedName tableName,
List<Identifier> columns,
Optional<Expression> updatingFor,
List<Property> properties)
{
this(Optional.empty(), indexName, tableName, columns, updatingFor, properties);
}

public CreateVectorIndex(
NodeLocation location,
Identifier indexName,
QualifiedName tableName,
List<Identifier> columns,
Optional<Expression> updatingFor,
List<Property> properties)
{
this(Optional.of(location), indexName, tableName, columns, updatingFor, properties);
}

private CreateVectorIndex(
Optional<NodeLocation> location,
Identifier indexName,
QualifiedName tableName,
List<Identifier> columns,
Optional<Expression> updatingFor,
List<Property> properties)
{
super(location);
this.indexName = requireNonNull(indexName, "indexName is null");
this.tableName = requireNonNull(tableName, "tableName is null");
this.columns = ImmutableList.copyOf(requireNonNull(columns, "columns is null"));
this.updatingFor = requireNonNull(updatingFor, "updatingFor is null");
this.properties = ImmutableList.copyOf(requireNonNull(properties, "properties is null"));
}

public Identifier getIndexName()
{
return indexName;
}

public QualifiedName getTableName()
{
return tableName;
}

public List<Identifier> getColumns()
{
return columns;
}

public Optional<Expression> getUpdatingFor()
{
return updatingFor;
}

public List<Property> getProperties()
{
return properties;
}

@Override
public <R, C> R accept(AstVisitor<R, C> visitor, C context)
{
return visitor.visitCreateVectorIndex(this, context);
}

@Override
public List<Node> getChildren()
{
ImmutableList.Builder<Node> children = ImmutableList.builder();
children.add(indexName);
children.addAll(columns);
updatingFor.ifPresent(children::add);
children.addAll(properties);
return children.build();
}

@Override
public int hashCode()
{
return Objects.hash(indexName, tableName, columns, updatingFor, properties);
}

@Override
public boolean equals(Object obj)
{
if (this == obj) {
return true;
}
if ((obj == null) || (getClass() != obj.getClass())) {
return false;
}
CreateVectorIndex o = (CreateVectorIndex) obj;
return Objects.equals(indexName, o.indexName) &&
Objects.equals(tableName, o.tableName) &&
Objects.equals(columns, o.columns) &&
Objects.equals(updatingFor, o.updatingFor) &&
Objects.equals(properties, o.properties);
}

@Override
public String toString()
{
return toStringHelper(this)
.add("indexName", indexName)
.add("tableName", tableName)
.add("columns", columns)
.add("updatingFor", updatingFor)
.add("properties", properties)
.toString();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -590,6 +590,20 @@ protected R visitCreateTable(CreateTable node, C context)
return null;
}

@Override
protected R visitCreateVectorIndex(CreateVectorIndex node, C context)
{
process(node.getIndexName(), context);
for (Identifier column : node.getColumns()) {
process(column, context);
}
node.getUpdatingFor().ifPresent(updatingFor -> process(updatingFor, context));
for (Property property : node.getProperties()) {
process(property, context);
}
return null;
}

@Override
protected R visitStartTransaction(StartTransaction node, C context)
{
Expand Down
Loading
Loading