-
Notifications
You must be signed in to change notification settings - Fork 3.6k
Support CREATE TABLE AS SELECT and INSERT in BigQuery
#13094
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
fb5e4d5
cdf273a
426ecb9
6633525
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,63 @@ | ||
| /* | ||
| * Licensed under the Apache License, Version 2.0 (the "License"); | ||
| * you may not use this file except in compliance with the License. | ||
| * You may obtain a copy of the License at | ||
| * | ||
| * http://www.apache.org/licenses/LICENSE-2.0 | ||
| * | ||
| * Unless required by applicable law or agreed to in writing, software | ||
| * distributed under the License is distributed on an "AS IS" BASIS, | ||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| * See the License for the specific language governing permissions and | ||
| * limitations under the License. | ||
| */ | ||
| package io.trino.plugin.bigquery; | ||
|
|
||
| import com.fasterxml.jackson.annotation.JsonCreator; | ||
| import com.fasterxml.jackson.annotation.JsonProperty; | ||
| import com.google.common.collect.ImmutableList; | ||
| import io.trino.spi.connector.ConnectorInsertTableHandle; | ||
| import io.trino.spi.type.Type; | ||
|
|
||
| import java.util.List; | ||
|
|
||
| import static com.google.common.base.Preconditions.checkArgument; | ||
| import static java.util.Objects.requireNonNull; | ||
|
|
||
| public class BigQueryInsertTableHandle | ||
| implements ConnectorInsertTableHandle | ||
| { | ||
| private final RemoteTableName remoteTableName; | ||
| private final List<String> columnNames; | ||
| private final List<Type> columnTypes; | ||
|
|
||
| @JsonCreator | ||
| public BigQueryInsertTableHandle( | ||
| @JsonProperty("remoteTableName") RemoteTableName remoteTableName, | ||
| @JsonProperty("columnNames") List<String> columnNames, | ||
| @JsonProperty("columnTypes") List<Type> columnTypes) | ||
| { | ||
| this.remoteTableName = requireNonNull(remoteTableName, "remoteTableName is null"); | ||
| this.columnNames = ImmutableList.copyOf(requireNonNull(columnNames, "columnNames is null")); | ||
| this.columnTypes = ImmutableList.copyOf(requireNonNull(columnTypes, "columnTypes is null")); | ||
| checkArgument(columnNames.size() == columnTypes.size(), "columnNames and columnTypes must have the same size"); | ||
| } | ||
|
|
||
| @JsonProperty | ||
| public RemoteTableName getRemoteTableName() | ||
| { | ||
| return remoteTableName; | ||
| } | ||
|
|
||
| @JsonProperty | ||
| public List<String> getColumnNames() | ||
| { | ||
| return columnNames; | ||
| } | ||
|
|
||
| @JsonProperty | ||
| public List<Type> getColumnTypes() | ||
| { | ||
| return columnTypes; | ||
| } | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -29,16 +29,21 @@ | |
| import com.google.common.collect.ImmutableSet; | ||
| import com.google.common.collect.Streams; | ||
| import io.airlift.log.Logger; | ||
| import io.airlift.slice.Slice; | ||
| import io.trino.plugin.bigquery.BigQueryClient.RemoteDatabaseObject; | ||
| import io.trino.plugin.bigquery.ptf.Query.QueryHandle; | ||
| import io.trino.spi.TrinoException; | ||
| import io.trino.spi.connector.Assignment; | ||
| import io.trino.spi.connector.ColumnHandle; | ||
| import io.trino.spi.connector.ColumnMetadata; | ||
| import io.trino.spi.connector.ColumnSchema; | ||
| import io.trino.spi.connector.ConnectorInsertTableHandle; | ||
| import io.trino.spi.connector.ConnectorMetadata; | ||
| import io.trino.spi.connector.ConnectorOutputMetadata; | ||
| import io.trino.spi.connector.ConnectorOutputTableHandle; | ||
| import io.trino.spi.connector.ConnectorSession; | ||
| import io.trino.spi.connector.ConnectorTableHandle; | ||
| import io.trino.spi.connector.ConnectorTableLayout; | ||
| import io.trino.spi.connector.ConnectorTableMetadata; | ||
| import io.trino.spi.connector.ConnectorTableProperties; | ||
| import io.trino.spi.connector.ConnectorTableSchema; | ||
|
|
@@ -48,6 +53,7 @@ | |
| import io.trino.spi.connector.InMemoryRecordSet; | ||
| import io.trino.spi.connector.ProjectionApplicationResult; | ||
| import io.trino.spi.connector.RecordCursor; | ||
| import io.trino.spi.connector.RetryMode; | ||
| import io.trino.spi.connector.SchemaNotFoundException; | ||
| import io.trino.spi.connector.SchemaTableName; | ||
| import io.trino.spi.connector.SchemaTablePrefix; | ||
|
|
@@ -58,11 +64,13 @@ | |
| import io.trino.spi.predicate.TupleDomain; | ||
| import io.trino.spi.ptf.ConnectorTableFunctionHandle; | ||
| import io.trino.spi.security.TrinoPrincipal; | ||
| import io.trino.spi.statistics.ComputedStatistics; | ||
| import io.trino.spi.type.Type; | ||
| import io.trino.spi.type.VarcharType; | ||
|
|
||
| import javax.inject.Inject; | ||
|
|
||
| import java.util.Collection; | ||
| import java.util.List; | ||
| import java.util.Map; | ||
| import java.util.Optional; | ||
|
|
@@ -86,6 +94,7 @@ | |
| import static io.trino.plugin.bigquery.BigQueryTableHandle.getPartitionType; | ||
| import static io.trino.plugin.bigquery.BigQueryType.toField; | ||
| import static io.trino.plugin.bigquery.BigQueryUtil.isWildcardTable; | ||
| import static io.trino.spi.StandardErrorCode.NOT_SUPPORTED; | ||
| import static java.util.Locale.ENGLISH; | ||
| import static java.util.Objects.requireNonNull; | ||
| import static java.util.function.Function.identity; | ||
|
|
@@ -388,9 +397,7 @@ public void createSchema(ConnectorSession session, String schemaName, Map<String | |
| public void dropSchema(ConnectorSession session, String schemaName) | ||
| { | ||
| BigQueryClient client = bigQueryClientFactory.create(session); | ||
| String remoteSchemaName = client.toRemoteDataset(getProjectId(client), schemaName) | ||
| .map(RemoteDatabaseObject::getOnlyRemoteName) | ||
| .orElseThrow(() -> new SchemaNotFoundException(schemaName)); | ||
| String remoteSchemaName = getRemoteSchemaName(client, getProjectId(client), schemaName); | ||
| client.dropSchema(DatasetId.of(remoteSchemaName)); | ||
| } | ||
|
|
||
|
|
@@ -408,7 +415,13 @@ public void createTable(ConnectorSession session, ConnectorTableMetadata tableMe | |
| } | ||
| } | ||
|
|
||
| private void createTable(ConnectorSession session, ConnectorTableMetadata tableMetadata) | ||
| @Override | ||
| public ConnectorOutputTableHandle beginCreateTable(ConnectorSession session, ConnectorTableMetadata tableMetadata, Optional<ConnectorTableLayout> layout, RetryMode retryMode) | ||
| { | ||
| return createTable(session, tableMetadata); | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Does this also need a defensive check to verify query retries are not enabled?
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think so. Thanks for catching that. Do you want to send the PR?
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||
| } | ||
|
|
||
| private BigQueryOutputTableHandle createTable(ConnectorSession session, ConnectorTableMetadata tableMetadata) | ||
| { | ||
| SchemaTableName schemaTableName = tableMetadata.getTable(); | ||
| String schemaName = schemaTableName.getSchemaName(); | ||
|
|
@@ -418,16 +431,34 @@ private void createTable(ConnectorSession session, ConnectorTableMetadata tableM | |
| throw new SchemaNotFoundException(schemaName); | ||
| } | ||
|
|
||
| List<Field> fields = tableMetadata.getColumns().stream() | ||
| .map(column -> toField(column.getName(), column.getType(), column.getComment())) | ||
| .collect(toImmutableList()); | ||
| int columnSize = tableMetadata.getColumns().size(); | ||
| ImmutableList.Builder<Field> fields = ImmutableList.builderWithExpectedSize(columnSize); | ||
| ImmutableList.Builder<String> columnsNames = ImmutableList.builderWithExpectedSize(columnSize); | ||
| ImmutableList.Builder<Type> columnsTypes = ImmutableList.builderWithExpectedSize(columnSize); | ||
| for (ColumnMetadata column : tableMetadata.getColumns()) { | ||
| fields.add(toField(column.getName(), column.getType(), column.getComment())); | ||
| columnsNames.add(column.getName()); | ||
| columnsTypes.add(column.getType()); | ||
| } | ||
|
|
||
| TableId tableId = TableId.of(schemaName, tableName); | ||
| TableDefinition tableDefinition = StandardTableDefinition.of(Schema.of(fields)); | ||
| BigQueryClient client = bigQueryClientFactory.create(session); | ||
| String projectId = getProjectId(client); | ||
| String remoteSchemaName = getRemoteSchemaName(client, projectId, schemaName); | ||
|
|
||
| TableId tableId = TableId.of(projectId, remoteSchemaName, tableName); | ||
| TableDefinition tableDefinition = StandardTableDefinition.of(Schema.of(fields.build())); | ||
| TableInfo.Builder tableInfo = TableInfo.newBuilder(tableId, tableDefinition); | ||
| tableMetadata.getComment().ifPresent(tableInfo::setDescription); | ||
|
|
||
| bigQueryClientFactory.create(session).createTable(tableInfo.build()); | ||
| client.createTable(tableInfo.build()); | ||
|
|
||
| return new BigQueryOutputTableHandle(new RemoteTableName(tableId), columnsNames.build(), columnsTypes.build()); | ||
| } | ||
|
|
||
| @Override | ||
| public Optional<ConnectorOutputMetadata> finishCreateTable(ConnectorSession session, ConnectorOutputTableHandle tableHandle, Collection<Slice> fragments, Collection<ComputedStatistics> computedStatistics) | ||
| { | ||
| return Optional.empty(); | ||
| } | ||
|
|
||
| @Override | ||
|
|
@@ -442,6 +473,32 @@ public void dropTable(ConnectorSession session, ConnectorTableHandle tableHandle | |
| client.dropTable(tableId); | ||
| } | ||
|
|
||
| @Override | ||
| public ConnectorInsertTableHandle beginInsert(ConnectorSession session, ConnectorTableHandle tableHandle, List<ColumnHandle> columns, RetryMode retryMode) | ||
| { | ||
| if (retryMode != RetryMode.NO_RETRIES) { | ||
| throw new TrinoException(NOT_SUPPORTED, "This connector does not support query retries"); | ||
| } | ||
| BigQueryTableHandle table = (BigQueryTableHandle) tableHandle; | ||
| if (isWildcardTable(TableDefinition.Type.valueOf(table.asPlainTable().getType()), table.asPlainTable().getRemoteTableName().getTableName())) { | ||
| throw new TrinoException(BIGQUERY_UNSUPPORTED_OPERATION, "This connector does not support inserting into wildcard tables"); | ||
| } | ||
| ImmutableList.Builder<String> columnNames = ImmutableList.builderWithExpectedSize(columns.size()); | ||
| ImmutableList.Builder<Type> columnTypes = ImmutableList.builderWithExpectedSize(columns.size()); | ||
| for (ColumnHandle columnHandle : columns) { | ||
| BigQueryColumnHandle column = (BigQueryColumnHandle) columnHandle; | ||
| columnNames.add(column.getName()); | ||
| columnTypes.add(column.getTrinoType()); | ||
| } | ||
| return new BigQueryInsertTableHandle(table.asPlainTable().getRemoteTableName(), columnNames.build(), columnTypes.build()); | ||
| } | ||
|
|
||
| @Override | ||
| public Optional<ConnectorOutputMetadata> finishInsert(ConnectorSession session, ConnectorInsertTableHandle insertHandle, Collection<Slice> fragments, Collection<ComputedStatistics> computedStatistics) | ||
| { | ||
| return Optional.empty(); | ||
| } | ||
|
|
||
| @Override | ||
| public Optional<ProjectionApplicationResult<ConnectorTableHandle>> applyProjection( | ||
| ConnectorSession session, | ||
|
|
@@ -511,6 +568,13 @@ public Optional<TableFunctionApplicationResult<ConnectorTableHandle>> applyTable | |
| return Optional.of(new TableFunctionApplicationResult<>(tableHandle, columnHandles)); | ||
| } | ||
|
|
||
| private String getRemoteSchemaName(BigQueryClient client, String projectId, String datasetName) | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Probably we should also call this in
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Turns out this is not needed/the wrong thing to do. When creating schemas we already list them and the listing will include the lowercase name so Trino will see a schema already exists - I've added a test to verify this similar to what you added for DROP SCHEMA in #13812 |
||
| { | ||
| return client.toRemoteDataset(projectId, datasetName) | ||
| .map(RemoteDatabaseObject::getOnlyRemoteName) | ||
| .orElseThrow(() -> new SchemaNotFoundException(datasetName)); | ||
| } | ||
|
|
||
| private static boolean containSameElements(Iterable<? extends ColumnHandle> first, Iterable<? extends ColumnHandle> second) | ||
| { | ||
| return ImmutableSet.copyOf(first).equals(ImmutableSet.copyOf(second)); | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,63 @@ | ||
| /* | ||
| * Licensed under the Apache License, Version 2.0 (the "License"); | ||
| * you may not use this file except in compliance with the License. | ||
| * You may obtain a copy of the License at | ||
| * | ||
| * http://www.apache.org/licenses/LICENSE-2.0 | ||
| * | ||
| * Unless required by applicable law or agreed to in writing, software | ||
| * distributed under the License is distributed on an "AS IS" BASIS, | ||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| * See the License for the specific language governing permissions and | ||
| * limitations under the License. | ||
| */ | ||
| package io.trino.plugin.bigquery; | ||
|
|
||
| import com.fasterxml.jackson.annotation.JsonCreator; | ||
| import com.fasterxml.jackson.annotation.JsonProperty; | ||
| import com.google.common.collect.ImmutableList; | ||
| import io.trino.spi.connector.ConnectorOutputTableHandle; | ||
| import io.trino.spi.type.Type; | ||
|
|
||
| import java.util.List; | ||
|
|
||
| import static com.google.common.base.Preconditions.checkArgument; | ||
| import static java.util.Objects.requireNonNull; | ||
|
|
||
| public class BigQueryOutputTableHandle | ||
| implements ConnectorOutputTableHandle | ||
| { | ||
| private final RemoteTableName remoteTableName; | ||
| private final List<String> columnNames; | ||
| private final List<Type> columnTypes; | ||
|
|
||
| @JsonCreator | ||
| public BigQueryOutputTableHandle( | ||
| @JsonProperty("remoteTableName") RemoteTableName remoteTableName, | ||
| @JsonProperty("columnNames") List<String> columnNames, | ||
| @JsonProperty("columnTypes") List<Type> columnTypes) | ||
| { | ||
| this.remoteTableName = requireNonNull(remoteTableName, "remoteTableName is null"); | ||
| this.columnNames = ImmutableList.copyOf(requireNonNull(columnNames, "columnNames is null")); | ||
| this.columnTypes = ImmutableList.copyOf(requireNonNull(columnTypes, "columnTypes is null")); | ||
| checkArgument(columnNames.size() == columnTypes.size(), "columnNames and columnTypes must have the same size"); | ||
| } | ||
|
|
||
| @JsonProperty | ||
| public RemoteTableName getRemoteTableName() | ||
| { | ||
| return remoteTableName; | ||
| } | ||
|
|
||
| @JsonProperty | ||
| public List<String> getColumnNames() | ||
| { | ||
| return columnNames; | ||
| } | ||
|
|
||
| @JsonProperty | ||
| public List<Type> getColumnTypes() | ||
| { | ||
| return columnTypes; | ||
| } | ||
| } |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
😃