From 6680247660535d7973b2e5fa5c8f00bf8bed5a68 Mon Sep 17 00:00:00 2001 From: Grant Nicholas Date: Wed, 25 Jan 2023 11:57:37 -0600 Subject: [PATCH 1/5] Introduce SheetsConnectorTableHandle for google sheets connector --- .../sheets/SheetsConnectorTableHandle.java | 22 ++++++++++++ .../plugin/google/sheets/SheetsMetadata.java | 8 ++--- ...andle.java => SheetsNamedTableHandle.java} | 9 +++-- .../google/sheets/SheetsSplitManager.java | 2 +- .../TestSheetsConnectorTableHandle.java | 35 +++++++++++++++++++ 5 files changed, 66 insertions(+), 10 deletions(-) create mode 100644 plugin/trino-google-sheets/src/main/java/io/trino/plugin/google/sheets/SheetsConnectorTableHandle.java rename plugin/trino-google-sheets/src/main/java/io/trino/plugin/google/sheets/{SheetsTableHandle.java => SheetsNamedTableHandle.java} (90%) create mode 100644 plugin/trino-google-sheets/src/test/java/io/trino/plugin/google/sheets/TestSheetsConnectorTableHandle.java diff --git a/plugin/trino-google-sheets/src/main/java/io/trino/plugin/google/sheets/SheetsConnectorTableHandle.java b/plugin/trino-google-sheets/src/main/java/io/trino/plugin/google/sheets/SheetsConnectorTableHandle.java new file mode 100644 index 000000000000..6dcaf5ffa851 --- /dev/null +++ b/plugin/trino-google-sheets/src/main/java/io/trino/plugin/google/sheets/SheetsConnectorTableHandle.java @@ -0,0 +1,22 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.plugin.google.sheets; + +import io.trino.spi.connector.ConnectorTableHandle; + +public sealed interface SheetsConnectorTableHandle + extends ConnectorTableHandle + permits SheetsNamedTableHandle +{ +} diff --git a/plugin/trino-google-sheets/src/main/java/io/trino/plugin/google/sheets/SheetsMetadata.java b/plugin/trino-google-sheets/src/main/java/io/trino/plugin/google/sheets/SheetsMetadata.java index 763070d3a053..71aea72c9ed2 100644 --- a/plugin/trino-google-sheets/src/main/java/io/trino/plugin/google/sheets/SheetsMetadata.java +++ b/plugin/trino-google-sheets/src/main/java/io/trino/plugin/google/sheets/SheetsMetadata.java @@ -61,7 +61,7 @@ public List listSchemaNames() } @Override - public SheetsTableHandle getTableHandle(ConnectorSession session, SchemaTableName tableName) + public SheetsNamedTableHandle getTableHandle(ConnectorSession session, SchemaTableName tableName) { requireNonNull(tableName, "tableName is null"); if (!listSchemaNames(session).contains(tableName.getSchemaName())) { @@ -73,13 +73,13 @@ public SheetsTableHandle getTableHandle(ConnectorSession session, SchemaTableNam return null; } - return new SheetsTableHandle(tableName.getSchemaName(), tableName.getTableName()); + return new SheetsNamedTableHandle(tableName.getSchemaName(), tableName.getTableName()); } @Override public ConnectorTableMetadata getTableMetadata(ConnectorSession session, ConnectorTableHandle table) { - SheetsTableHandle tableHandle = (SheetsTableHandle) table; + SheetsNamedTableHandle tableHandle = (SheetsNamedTableHandle) table; return getTableMetadata(tableHandle.toSchemaTableName()) .orElseThrow(() -> new TrinoException(SHEETS_UNKNOWN_TABLE_ERROR, "Metadata not found for table " + tableHandle.getTableName())); } @@ -87,7 +87,7 @@ public ConnectorTableMetadata getTableMetadata(ConnectorSession session, Connect @Override public Map getColumnHandles(ConnectorSession session, ConnectorTableHandle tableHandle) { - SheetsTableHandle sheetsTableHandle = (SheetsTableHandle) tableHandle; + SheetsNamedTableHandle sheetsTableHandle = (SheetsNamedTableHandle) tableHandle; SheetsTable table = sheetsClient.getTable(sheetsTableHandle.getTableName()) .orElseThrow(() -> new TableNotFoundException(sheetsTableHandle.toSchemaTableName())); diff --git a/plugin/trino-google-sheets/src/main/java/io/trino/plugin/google/sheets/SheetsTableHandle.java b/plugin/trino-google-sheets/src/main/java/io/trino/plugin/google/sheets/SheetsNamedTableHandle.java similarity index 90% rename from plugin/trino-google-sheets/src/main/java/io/trino/plugin/google/sheets/SheetsTableHandle.java rename to plugin/trino-google-sheets/src/main/java/io/trino/plugin/google/sheets/SheetsNamedTableHandle.java index acaeaa6ac402..752858827be6 100644 --- a/plugin/trino-google-sheets/src/main/java/io/trino/plugin/google/sheets/SheetsTableHandle.java +++ b/plugin/trino-google-sheets/src/main/java/io/trino/plugin/google/sheets/SheetsNamedTableHandle.java @@ -15,20 +15,19 @@ import com.fasterxml.jackson.annotation.JsonCreator; import com.fasterxml.jackson.annotation.JsonProperty; -import io.trino.spi.connector.ConnectorTableHandle; import io.trino.spi.connector.SchemaTableName; import java.util.Objects; import static java.util.Objects.requireNonNull; -public final class SheetsTableHandle - implements ConnectorTableHandle +public final class SheetsNamedTableHandle + implements SheetsConnectorTableHandle { private final SchemaTableName schemaTableName; @JsonCreator - public SheetsTableHandle( + public SheetsNamedTableHandle( @JsonProperty("schemaName") String schemaName, @JsonProperty("tableName") String tableName) { @@ -70,7 +69,7 @@ public boolean equals(Object obj) return false; } - SheetsTableHandle other = (SheetsTableHandle) obj; + SheetsNamedTableHandle other = (SheetsNamedTableHandle) obj; return Objects.equals(this.schemaTableName, other.schemaTableName); } diff --git a/plugin/trino-google-sheets/src/main/java/io/trino/plugin/google/sheets/SheetsSplitManager.java b/plugin/trino-google-sheets/src/main/java/io/trino/plugin/google/sheets/SheetsSplitManager.java index 48d956ca4b32..c3d215c57594 100644 --- a/plugin/trino-google-sheets/src/main/java/io/trino/plugin/google/sheets/SheetsSplitManager.java +++ b/plugin/trino-google-sheets/src/main/java/io/trino/plugin/google/sheets/SheetsSplitManager.java @@ -51,7 +51,7 @@ public ConnectorSplitSource getSplits( DynamicFilter dynamicFilter, Constraint constraint) { - SheetsTableHandle tableHandle = (SheetsTableHandle) connectorTableHandle; + SheetsNamedTableHandle tableHandle = (SheetsNamedTableHandle) connectorTableHandle; SheetsTable table = sheetsClient.getTable(tableHandle.getTableName()) // this can happen if table is removed during a query .orElseThrow(() -> new TableNotFoundException(tableHandle.toSchemaTableName())); diff --git a/plugin/trino-google-sheets/src/test/java/io/trino/plugin/google/sheets/TestSheetsConnectorTableHandle.java b/plugin/trino-google-sheets/src/test/java/io/trino/plugin/google/sheets/TestSheetsConnectorTableHandle.java new file mode 100644 index 000000000000..bcb7d0828062 --- /dev/null +++ b/plugin/trino-google-sheets/src/test/java/io/trino/plugin/google/sheets/TestSheetsConnectorTableHandle.java @@ -0,0 +1,35 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.plugin.google.sheets; + +import io.airlift.json.JsonCodec; +import org.testng.annotations.Test; + +import static org.testng.Assert.assertEquals; + +public class TestSheetsConnectorTableHandle +{ + private final JsonCodec namedCodec = JsonCodec.jsonCodec(SheetsNamedTableHandle.class); + + @Test + public void testRoundTripWithNamedTable() + { + SheetsNamedTableHandle expected = new SheetsNamedTableHandle("schema", "table"); + + String json = namedCodec.toJson(expected); + SheetsNamedTableHandle actual = namedCodec.fromJson(json); + + assertEquals(actual, expected); + } +} From 6c79554608a2886ac5ac9e2acbd69430ff8730a7 Mon Sep 17 00:00:00 2001 From: Grant Nicholas Date: Thu, 2 Feb 2023 13:32:31 -0600 Subject: [PATCH 2/5] Remove unused table name from SheetsTable --- .../main/java/io/trino/plugin/google/sheets/SheetsClient.java | 2 +- .../main/java/io/trino/plugin/google/sheets/SheetsTable.java | 4 ---- 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/plugin/trino-google-sheets/src/main/java/io/trino/plugin/google/sheets/SheetsClient.java b/plugin/trino-google-sheets/src/main/java/io/trino/plugin/google/sheets/SheetsClient.java index c87d170590b6..7f616d2afebe 100644 --- a/plugin/trino-google-sheets/src/main/java/io/trino/plugin/google/sheets/SheetsClient.java +++ b/plugin/trino-google-sheets/src/main/java/io/trino/plugin/google/sheets/SheetsClient.java @@ -134,7 +134,7 @@ public Optional getTable(String tableName) columns.add(new SheetsColumn(columnValue, VarcharType.VARCHAR)); } List> dataValues = values.subList(1, values.size()); // removing header info - return Optional.of(new SheetsTable(tableName, columns.build(), dataValues)); + return Optional.of(new SheetsTable(columns.build(), dataValues)); } return Optional.empty(); } diff --git a/plugin/trino-google-sheets/src/main/java/io/trino/plugin/google/sheets/SheetsTable.java b/plugin/trino-google-sheets/src/main/java/io/trino/plugin/google/sheets/SheetsTable.java index 156ab453ca55..ac8bdc138c40 100644 --- a/plugin/trino-google-sheets/src/main/java/io/trino/plugin/google/sheets/SheetsTable.java +++ b/plugin/trino-google-sheets/src/main/java/io/trino/plugin/google/sheets/SheetsTable.java @@ -20,8 +20,6 @@ import java.util.List; -import static com.google.common.base.Preconditions.checkArgument; -import static com.google.common.base.Strings.isNullOrEmpty; import static java.util.Objects.requireNonNull; public class SheetsTable @@ -31,11 +29,9 @@ public class SheetsTable @JsonCreator public SheetsTable( - @JsonProperty("name") String name, @JsonProperty("columns") List columns, @JsonProperty("values") List> values) { - checkArgument(!isNullOrEmpty(name), "name is null or is empty"); requireNonNull(columns, "columns is null"); ImmutableList.Builder columnsMetadata = ImmutableList.builder(); From 3304931b8455cd5de60ccd39285a61ccec2bea6f Mon Sep 17 00:00:00 2001 From: Grant Nicholas Date: Wed, 25 Jan 2023 13:58:02 -0600 Subject: [PATCH 3/5] Support querying google sheets using the 'sheet' table function --- .../main/sphinx/connector/googlesheets.rst | 42 ++++- plugin/trino-google-sheets/pom.xml | 6 + .../google/sheets/SheetNotFoundException.java | 27 ++++ .../plugin/google/sheets/SheetsClient.java | 46 +++++- .../plugin/google/sheets/SheetsConnector.java | 15 +- .../sheets/SheetsConnectorTableHandle.java | 15 +- .../plugin/google/sheets/SheetsMetadata.java | 50 +++++- .../plugin/google/sheets/SheetsModule.java | 5 + .../google/sheets/SheetsNamedTableHandle.java | 5 - .../google/sheets/SheetsSheetTableHandle.java | 83 ++++++++++ .../plugin/google/sheets/SheetsSplit.java | 39 +++-- .../google/sheets/SheetsSplitManager.java | 32 +++- .../trino/plugin/google/sheets/ptf/Sheet.java | 152 ++++++++++++++++++ .../google/sheets/TestGoogleSheets.java | 110 +++++++++++++ .../TestSheetsConnectorTableHandle.java | 12 ++ .../google/sheets/TestSheetsPlugin.java | 1 + 16 files changed, 597 insertions(+), 43 deletions(-) create mode 100644 plugin/trino-google-sheets/src/main/java/io/trino/plugin/google/sheets/SheetNotFoundException.java create mode 100644 plugin/trino-google-sheets/src/main/java/io/trino/plugin/google/sheets/SheetsSheetTableHandle.java create mode 100644 plugin/trino-google-sheets/src/main/java/io/trino/plugin/google/sheets/ptf/Sheet.java diff --git a/docs/src/main/sphinx/connector/googlesheets.rst b/docs/src/main/sphinx/connector/googlesheets.rst index 71f1c870e7f1..96554d9b8f5f 100644 --- a/docs/src/main/sphinx/connector/googlesheets.rst +++ b/docs/src/main/sphinx/connector/googlesheets.rst @@ -68,8 +68,8 @@ containing the following columns in this order: * Table Name * Sheet ID -* Owner -* Notes +* Owner (optional) +* Notes (optional) See this `example sheet `_ as a reference. @@ -89,10 +89,14 @@ address of the service account. The sheet needs to be mapped to a Trino table name. Specify a table name (column A) and the sheet ID (column B) in the metadata sheet. To refer -to a specific tab in the sheet, add the tab name after the sheet ID, separated -with ``#``. If tab name is not provided, connector loads only 10,000 rows by default from +to a specific range in the sheet, add the range after the sheet ID, separated +with ``#``. If a range is not provided, the connector loads only 10,000 rows by default from the first tab in the sheet. +The first row of the provided sheet range is used as the header and will determine the column +names of the Trino table. +For more details on sheet range syntax see the `google sheets docs `_. + API usage limits ---------------- @@ -133,3 +137,33 @@ SQL support The connector provides :ref:`globally available ` and :ref:`read operation ` statements to access data and metadata in Google Sheets. + +Table functions +--------------- + +The connector provides specific :doc:`table functions ` to +access Google Sheets. + +.. _google-sheets-sheet-function: + +``sheet(id, range) -> table`` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The ``sheet`` function allows you to query a Google Sheet directly without +specifying it as a named table in the metadata sheet. + +For example, for a catalog named 'example':: + + SELECT * + FROM + TABLE(example.system.sheet( + id => 'googleSheetIdHere')); + +A sheet range or named range can be provided as an optional ``range`` argument. +The default sheet range is ``$1:$10000`` if one is not provided:: + + SELECT * + FROM + TABLE(example.system.sheet( + id => 'googleSheetIdHere', + range => 'TabName!A1:B4')); diff --git a/plugin/trino-google-sheets/pom.xml b/plugin/trino-google-sheets/pom.xml index e2ed6e6cdc3c..a91806c94ad3 100644 --- a/plugin/trino-google-sheets/pom.xml +++ b/plugin/trino-google-sheets/pom.xml @@ -183,6 +183,12 @@ test + + org.assertj + assertj-core + test + + org.eclipse.jetty.toolchain jetty-servlet-api diff --git a/plugin/trino-google-sheets/src/main/java/io/trino/plugin/google/sheets/SheetNotFoundException.java b/plugin/trino-google-sheets/src/main/java/io/trino/plugin/google/sheets/SheetNotFoundException.java new file mode 100644 index 000000000000..8a6d735d5022 --- /dev/null +++ b/plugin/trino-google-sheets/src/main/java/io/trino/plugin/google/sheets/SheetNotFoundException.java @@ -0,0 +1,27 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.plugin.google.sheets; + +import io.trino.spi.connector.NotFoundException; + +import static java.lang.String.format; + +public class SheetNotFoundException + extends NotFoundException +{ + public SheetNotFoundException(String sheetExpression) + { + super(format("Sheet '%s' not found", sheetExpression)); + } +} diff --git a/plugin/trino-google-sheets/src/main/java/io/trino/plugin/google/sheets/SheetsClient.java b/plugin/trino-google-sheets/src/main/java/io/trino/plugin/google/sheets/SheetsClient.java index 7f616d2afebe..553b92ec1e7c 100644 --- a/plugin/trino-google-sheets/src/main/java/io/trino/plugin/google/sheets/SheetsClient.java +++ b/plugin/trino-google-sheets/src/main/java/io/trino/plugin/google/sheets/SheetsClient.java @@ -63,6 +63,8 @@ public class SheetsClient { + public static final String DEFAULT_RANGE = "$1:$10000"; + public static final String RANGE_SEPARATOR = "#"; private static final Logger log = Logger.get(SheetsClient.class); private static final String APPLICATION_NAME = "trino google sheets integration"; @@ -115,14 +117,31 @@ public Map> loadAll(Iterable tableLis CacheLoader.from(this::readAllValuesFromSheetExpression)); } + public Optional getTable(SheetsConnectorTableHandle tableHandle) + { + if (tableHandle instanceof SheetsNamedTableHandle namedTableHandle) { + return getTable(namedTableHandle.getTableName()); + } + if (tableHandle instanceof SheetsSheetTableHandle sheetTableHandle) { + return getTableFromValues(readAllValuesFromSheet(sheetTableHandle.getSheetExpression())); + } + throw new IllegalStateException("Found unexpected table handle type " + tableHandle); + } + public Optional getTable(String tableName) { - List> values = convertToStringValues(readAllValues(tableName)); - if (values.size() > 0) { + List> values = readAllValues(tableName); + return getTableFromValues(values); + } + + public Optional getTableFromValues(List> values) + { + List> stringValues = convertToStringValues(values); + if (stringValues.size() > 0) { ImmutableList.Builder columns = ImmutableList.builder(); Set columnNames = new HashSet<>(); // Assuming 1st line is always header - List header = values.get(0); + List header = stringValues.get(0); int count = 0; for (String column : header) { String columnValue = column.toLowerCase(ENGLISH); @@ -133,7 +152,7 @@ public Optional getTable(String tableName) columnNames.add(columnValue); columns.add(new SheetsColumn(columnValue, VarcharType.VARCHAR)); } - List> dataValues = values.subList(1, values.size()); // removing header info + List> dataValues = stringValues.subList(1, values.size()); // removing header info return Optional.of(new SheetsTable(columns.build(), dataValues)); } return Optional.empty(); @@ -162,7 +181,7 @@ public List> readAllValues(String tableName) try { String sheetExpression = tableSheetMappingCache.getUnchecked(tableName) .orElseThrow(() -> new TrinoException(SHEETS_UNKNOWN_TABLE_ERROR, "Sheet expression not found for table " + tableName)); - return sheetDataCache.getUnchecked(sheetExpression); + return readAllValuesFromSheet(sheetExpression); } catch (UncheckedExecutionException e) { throwIfInstanceOf(e.getCause(), TrinoException.class); @@ -170,6 +189,17 @@ public List> readAllValues(String tableName) } } + public List> readAllValuesFromSheet(String sheetExpression) + { + try { + return sheetDataCache.getUnchecked(sheetExpression); + } + catch (UncheckedExecutionException e) { + throwIfInstanceOf(e.getCause(), TrinoException.class); + throw new TrinoException(SHEETS_TABLE_LOAD_ERROR, "Error loading data for sheet: " + sheetExpression, e); + } + } + public static List> convertToStringValues(List> values) { return values.stream() @@ -235,8 +265,8 @@ private List> readAllValuesFromSheetExpression(String sheetExpressi { try { // by default loading up to 10k rows from the first tab of the sheet - String defaultRange = "$1:$10000"; - String[] tableOptions = sheetExpression.split("#"); + String defaultRange = DEFAULT_RANGE; + String[] tableOptions = sheetExpression.split(RANGE_SEPARATOR); String sheetId = tableOptions[0]; if (tableOptions.length > 1) { defaultRange = tableOptions[1]; @@ -245,6 +275,8 @@ private List> readAllValuesFromSheetExpression(String sheetExpressi return sheetsService.spreadsheets().values().get(sheetId, defaultRange).execute().getValues(); } catch (IOException e) { + // TODO: improve error to a {Table|Sheet}NotFoundException + // is a backwards incompatible error code change from SHEETS_UNKNOWN_TABLE_ERROR -> NOT_FOUND throw new TrinoException(SHEETS_UNKNOWN_TABLE_ERROR, "Failed reading data from sheet: " + sheetExpression, e); } } diff --git a/plugin/trino-google-sheets/src/main/java/io/trino/plugin/google/sheets/SheetsConnector.java b/plugin/trino-google-sheets/src/main/java/io/trino/plugin/google/sheets/SheetsConnector.java index d3d0c456a750..166d0f426408 100644 --- a/plugin/trino-google-sheets/src/main/java/io/trino/plugin/google/sheets/SheetsConnector.java +++ b/plugin/trino-google-sheets/src/main/java/io/trino/plugin/google/sheets/SheetsConnector.java @@ -13,6 +13,7 @@ */ package io.trino.plugin.google.sheets; +import com.google.common.collect.ImmutableSet; import io.airlift.bootstrap.LifeCycleManager; import io.trino.spi.connector.Connector; import io.trino.spi.connector.ConnectorMetadata; @@ -20,10 +21,13 @@ import io.trino.spi.connector.ConnectorSession; import io.trino.spi.connector.ConnectorSplitManager; import io.trino.spi.connector.ConnectorTransactionHandle; +import io.trino.spi.ptf.ConnectorTableFunction; import io.trino.spi.transaction.IsolationLevel; import javax.inject.Inject; +import java.util.Set; + import static io.trino.plugin.google.sheets.SheetsTransactionHandle.INSTANCE; import static java.util.Objects.requireNonNull; @@ -34,18 +38,21 @@ public class SheetsConnector private final SheetsMetadata metadata; private final SheetsSplitManager splitManager; private final SheetsRecordSetProvider recordSetProvider; + private final Set connectorTableFunctions; @Inject public SheetsConnector( LifeCycleManager lifeCycleManager, SheetsMetadata metadata, SheetsSplitManager splitManager, - SheetsRecordSetProvider recordSetProvider) + SheetsRecordSetProvider recordSetProvider, + Set connectorTableFunctions) { this.lifeCycleManager = requireNonNull(lifeCycleManager, "lifeCycleManager is null"); this.metadata = requireNonNull(metadata, "metadata is null"); this.splitManager = requireNonNull(splitManager, "splitManager is null"); this.recordSetProvider = requireNonNull(recordSetProvider, "recordSetProvider is null"); + this.connectorTableFunctions = ImmutableSet.copyOf(requireNonNull(connectorTableFunctions, "connectorTableFunctions is null")); } @Override @@ -72,6 +79,12 @@ public ConnectorRecordSetProvider getRecordSetProvider() return recordSetProvider; } + @Override + public Set getTableFunctions() + { + return connectorTableFunctions; + } + @Override public final void shutdown() { diff --git a/plugin/trino-google-sheets/src/main/java/io/trino/plugin/google/sheets/SheetsConnectorTableHandle.java b/plugin/trino-google-sheets/src/main/java/io/trino/plugin/google/sheets/SheetsConnectorTableHandle.java index 6dcaf5ffa851..dded950d5b0b 100644 --- a/plugin/trino-google-sheets/src/main/java/io/trino/plugin/google/sheets/SheetsConnectorTableHandle.java +++ b/plugin/trino-google-sheets/src/main/java/io/trino/plugin/google/sheets/SheetsConnectorTableHandle.java @@ -14,9 +14,22 @@ package io.trino.plugin.google.sheets; import io.trino.spi.connector.ConnectorTableHandle; +import io.trino.spi.connector.NotFoundException; +import io.trino.spi.connector.SchemaTableName; +import io.trino.spi.connector.TableNotFoundException; public sealed interface SheetsConnectorTableHandle extends ConnectorTableHandle - permits SheetsNamedTableHandle + permits SheetsNamedTableHandle, SheetsSheetTableHandle { + static NotFoundException tableNotFound(SheetsConnectorTableHandle tableHandle) + { + if (tableHandle instanceof SheetsNamedTableHandle sheetsNamedTableHandle) { + return new TableNotFoundException(new SchemaTableName(sheetsNamedTableHandle.getSchemaName(), sheetsNamedTableHandle.getTableName())); + } + if (tableHandle instanceof SheetsSheetTableHandle sheetsSheetTableHandle) { + return new SheetNotFoundException(sheetsSheetTableHandle.getSheetExpression()); + } + throw new IllegalStateException("Found unexpected table handle type " + tableHandle); + } } diff --git a/plugin/trino-google-sheets/src/main/java/io/trino/plugin/google/sheets/SheetsMetadata.java b/plugin/trino-google-sheets/src/main/java/io/trino/plugin/google/sheets/SheetsMetadata.java index 71aea72c9ed2..eb7fce10e248 100644 --- a/plugin/trino-google-sheets/src/main/java/io/trino/plugin/google/sheets/SheetsMetadata.java +++ b/plugin/trino-google-sheets/src/main/java/io/trino/plugin/google/sheets/SheetsMetadata.java @@ -18,13 +18,16 @@ import io.trino.spi.TrinoException; import io.trino.spi.connector.ColumnHandle; import io.trino.spi.connector.ColumnMetadata; +import io.trino.spi.connector.ColumnSchema; import io.trino.spi.connector.ConnectorMetadata; import io.trino.spi.connector.ConnectorSession; import io.trino.spi.connector.ConnectorTableHandle; import io.trino.spi.connector.ConnectorTableMetadata; +import io.trino.spi.connector.ConnectorTableSchema; import io.trino.spi.connector.SchemaTableName; import io.trino.spi.connector.SchemaTablePrefix; -import io.trino.spi.connector.TableNotFoundException; +import io.trino.spi.connector.TableFunctionApplicationResult; +import io.trino.spi.ptf.ConnectorTableFunctionHandle; import javax.inject.Inject; @@ -34,7 +37,9 @@ import static com.google.common.collect.ImmutableList.toImmutableList; import static com.google.common.collect.Iterables.getOnlyElement; +import static io.trino.plugin.google.sheets.SheetsConnectorTableHandle.tableNotFound; import static io.trino.plugin.google.sheets.SheetsErrorCode.SHEETS_UNKNOWN_TABLE_ERROR; +import static io.trino.plugin.google.sheets.ptf.Sheet.SheetFunctionHandle; import static java.util.Objects.requireNonNull; public class SheetsMetadata @@ -79,17 +84,18 @@ public SheetsNamedTableHandle getTableHandle(ConnectorSession session, SchemaTab @Override public ConnectorTableMetadata getTableMetadata(ConnectorSession session, ConnectorTableHandle table) { - SheetsNamedTableHandle tableHandle = (SheetsNamedTableHandle) table; - return getTableMetadata(tableHandle.toSchemaTableName()) - .orElseThrow(() -> new TrinoException(SHEETS_UNKNOWN_TABLE_ERROR, "Metadata not found for table " + tableHandle.getTableName())); + SheetsConnectorTableHandle tableHandle = (SheetsConnectorTableHandle) table; + SheetsTable sheetsTable = sheetsClient.getTable(tableHandle) + .orElseThrow(() -> new TrinoException(SHEETS_UNKNOWN_TABLE_ERROR, "Metadata not found for table " + tableNotFound(tableHandle))); + return new ConnectorTableMetadata(getSchemaTableName(tableHandle), sheetsTable.getColumnsMetadata()); } @Override public Map getColumnHandles(ConnectorSession session, ConnectorTableHandle tableHandle) { - SheetsNamedTableHandle sheetsTableHandle = (SheetsNamedTableHandle) tableHandle; - SheetsTable table = sheetsClient.getTable(sheetsTableHandle.getTableName()) - .orElseThrow(() -> new TableNotFoundException(sheetsTableHandle.toSchemaTableName())); + SheetsConnectorTableHandle sheetsTableHandle = (SheetsConnectorTableHandle) tableHandle; + SheetsTable table = sheetsClient.getTable(sheetsTableHandle) + .orElseThrow(() -> tableNotFound(sheetsTableHandle)); ImmutableMap.Builder columnHandles = ImmutableMap.builder(); int index = 0; @@ -145,4 +151,34 @@ public ColumnMetadata getColumnMetadata(ConnectorSession session, ConnectorTable { return ((SheetsColumnHandle) columnHandle).getColumnMetadata(); } + + @Override + public Optional> applyTableFunction(ConnectorSession session, ConnectorTableFunctionHandle handle) + { + if (!(handle instanceof SheetFunctionHandle)) { + return Optional.empty(); + } + + ConnectorTableHandle tableHandle = ((SheetFunctionHandle) handle).getTableHandle(); + ConnectorTableSchema tableSchema = getTableSchema(session, tableHandle); + Map columnHandlesByName = getColumnHandles(session, tableHandle); + List columnHandles = tableSchema.getColumns().stream() + .map(ColumnSchema::getName) + .map(columnHandlesByName::get) + .collect(toImmutableList()); + + return Optional.of(new TableFunctionApplicationResult<>(tableHandle, columnHandles)); + } + + private static SchemaTableName getSchemaTableName(SheetsConnectorTableHandle handle) + { + if (handle instanceof SheetsNamedTableHandle namedTableHandle) { + return new SchemaTableName(namedTableHandle.getSchemaName(), namedTableHandle.getTableName()); + } + if (handle instanceof SheetsSheetTableHandle) { + // TODO (https://github.com/trinodb/trino/issues/6694) SchemaTableName should not be required for synthetic ConnectorTableHandle + return new SchemaTableName("_generated", "_generated"); + } + throw new IllegalStateException("Found unexpected table handle type " + handle); + } } diff --git a/plugin/trino-google-sheets/src/main/java/io/trino/plugin/google/sheets/SheetsModule.java b/plugin/trino-google-sheets/src/main/java/io/trino/plugin/google/sheets/SheetsModule.java index 516fac85b7cf..3353ed2ecf9f 100644 --- a/plugin/trino-google-sheets/src/main/java/io/trino/plugin/google/sheets/SheetsModule.java +++ b/plugin/trino-google-sheets/src/main/java/io/trino/plugin/google/sheets/SheetsModule.java @@ -16,7 +16,10 @@ import com.google.inject.Binder; import com.google.inject.Module; import com.google.inject.Scopes; +import io.trino.plugin.google.sheets.ptf.Sheet; +import io.trino.spi.ptf.ConnectorTableFunction; +import static com.google.inject.multibindings.Multibinder.newSetBinder; import static io.airlift.configuration.ConfigBinder.configBinder; import static io.airlift.json.JsonCodec.listJsonCodec; import static io.airlift.json.JsonCodecBinder.jsonCodecBinder; @@ -36,5 +39,7 @@ public void configure(Binder binder) configBinder(binder).bindConfig(SheetsConfig.class); jsonCodecBinder(binder).bindMapJsonCodec(String.class, listJsonCodec(SheetsTable.class)); + + newSetBinder(binder, ConnectorTableFunction.class).addBinding().toProvider(Sheet.class).in(Scopes.SINGLETON); } } diff --git a/plugin/trino-google-sheets/src/main/java/io/trino/plugin/google/sheets/SheetsNamedTableHandle.java b/plugin/trino-google-sheets/src/main/java/io/trino/plugin/google/sheets/SheetsNamedTableHandle.java index 752858827be6..ea015b98c0f6 100644 --- a/plugin/trino-google-sheets/src/main/java/io/trino/plugin/google/sheets/SheetsNamedTableHandle.java +++ b/plugin/trino-google-sheets/src/main/java/io/trino/plugin/google/sheets/SheetsNamedTableHandle.java @@ -48,11 +48,6 @@ public String getTableName() return schemaTableName.getTableName(); } - public SchemaTableName toSchemaTableName() - { - return schemaTableName; - } - @Override public int hashCode() { diff --git a/plugin/trino-google-sheets/src/main/java/io/trino/plugin/google/sheets/SheetsSheetTableHandle.java b/plugin/trino-google-sheets/src/main/java/io/trino/plugin/google/sheets/SheetsSheetTableHandle.java new file mode 100644 index 000000000000..45c580a642f9 --- /dev/null +++ b/plugin/trino-google-sheets/src/main/java/io/trino/plugin/google/sheets/SheetsSheetTableHandle.java @@ -0,0 +1,83 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.plugin.google.sheets; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonIgnore; +import com.fasterxml.jackson.annotation.JsonProperty; + +import java.util.Objects; + +import static io.trino.plugin.google.sheets.SheetsClient.RANGE_SEPARATOR; +import static java.lang.String.format; +import static java.util.Objects.requireNonNull; + +public final class SheetsSheetTableHandle + implements SheetsConnectorTableHandle +{ + private final String sheetId; + private final String sheetRange; + + @JsonCreator + public SheetsSheetTableHandle( + @JsonProperty("sheetId") String sheetId, + @JsonProperty("sheetRange") String sheetRange) + { + this.sheetId = requireNonNull(sheetId, "sheetId is null"); + this.sheetRange = requireNonNull(sheetRange, "sheetRange is null"); + } + + @JsonProperty + public String getSheetId() + { + return sheetId; + } + + @JsonProperty + public String getSheetRange() + { + return sheetRange; + } + + @JsonIgnore + public String getSheetExpression() + { + return "%s%s%s".formatted(sheetId, RANGE_SEPARATOR, sheetRange); + } + + @Override + public String toString() + { + return format("Sheet[sheetId=%s, sheetRange=%s]", sheetId, sheetRange); + } + + @Override + public boolean equals(Object o) + { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + SheetsSheetTableHandle that = (SheetsSheetTableHandle) o; + return sheetId.equals(that.sheetId) && sheetRange.equals(that.sheetRange); + } + + @Override + public int hashCode() + { + return Objects.hash(sheetId, sheetRange); + } +} diff --git a/plugin/trino-google-sheets/src/main/java/io/trino/plugin/google/sheets/SheetsSplit.java b/plugin/trino-google-sheets/src/main/java/io/trino/plugin/google/sheets/SheetsSplit.java index 021cce90d072..09a4db35c686 100644 --- a/plugin/trino-google-sheets/src/main/java/io/trino/plugin/google/sheets/SheetsSplit.java +++ b/plugin/trino-google-sheets/src/main/java/io/trino/plugin/google/sheets/SheetsSplit.java @@ -22,9 +22,11 @@ import io.trino.spi.connector.ConnectorSplit; import java.util.List; +import java.util.Optional; import static io.airlift.slice.SizeOf.estimatedSizeOf; import static io.airlift.slice.SizeOf.instanceSize; +import static io.airlift.slice.SizeOf.sizeOf; import static java.util.Objects.requireNonNull; public class SheetsSplit @@ -32,35 +34,44 @@ public class SheetsSplit { private static final int INSTANCE_SIZE = instanceSize(SheetsSplit.class); - private final String schemaName; - private final String tableName; + private final Optional schemaName; + private final Optional tableName; + private final Optional sheetExpression; private final List> values; private final List hostAddresses; @JsonCreator public SheetsSplit( - @JsonProperty("schemaName") String schemaName, - @JsonProperty("tableName") String tableName, + @JsonProperty("schemaName") Optional schemaName, + @JsonProperty("tableName") Optional tableName, + @JsonProperty("sheetExpression") Optional sheetExpression, @JsonProperty("values") List> values) { this.schemaName = requireNonNull(schemaName, "schemaName is null"); this.tableName = requireNonNull(tableName, "tableName is null"); + this.sheetExpression = requireNonNull(sheetExpression, "sheetExpression is null"); this.values = requireNonNull(values, "values is null"); this.hostAddresses = ImmutableList.of(); } @JsonProperty - public String getSchemaName() + public Optional getSchemaName() { return schemaName; } @JsonProperty - public String getTableName() + public Optional getTableName() { return tableName; } + @JsonProperty + public Optional getSheetExpression() + { + return sheetExpression; + } + @JsonProperty public List> getValues() { @@ -82,19 +93,21 @@ public List getAddresses() @Override public Object getInfo() { - return ImmutableMap.builder() - .put("schemaName", schemaName) - .put("tableName", tableName) - .put("hostAddresses", hostAddresses) - .buildOrThrow(); + ImmutableMap.Builder builder = ImmutableMap.builder() + .put("hostAddresses", hostAddresses); + schemaName.ifPresent(name -> builder.put("schemaName", name)); + tableName.ifPresent(name -> builder.put("tableName", name)); + sheetExpression.ifPresent(expression -> builder.put("sheetExpression", expression)); + return builder.buildOrThrow(); } @Override public long getRetainedSizeInBytes() { return INSTANCE_SIZE - + estimatedSizeOf(schemaName) - + estimatedSizeOf(tableName) + + sizeOf(schemaName, SizeOf::estimatedSizeOf) + + sizeOf(tableName, SizeOf::estimatedSizeOf) + + sizeOf(sheetExpression, SizeOf::estimatedSizeOf) + estimatedSizeOf(values, value -> estimatedSizeOf(value, SizeOf::estimatedSizeOf)) + estimatedSizeOf(hostAddresses, HostAddress::getRetainedSizeInBytes); } diff --git a/plugin/trino-google-sheets/src/main/java/io/trino/plugin/google/sheets/SheetsSplitManager.java b/plugin/trino-google-sheets/src/main/java/io/trino/plugin/google/sheets/SheetsSplitManager.java index c3d215c57594..e9370e9523ef 100644 --- a/plugin/trino-google-sheets/src/main/java/io/trino/plugin/google/sheets/SheetsSplitManager.java +++ b/plugin/trino-google-sheets/src/main/java/io/trino/plugin/google/sheets/SheetsSplitManager.java @@ -22,14 +22,15 @@ import io.trino.spi.connector.Constraint; import io.trino.spi.connector.DynamicFilter; import io.trino.spi.connector.FixedSplitSource; -import io.trino.spi.connector.TableNotFoundException; import javax.inject.Inject; import java.util.ArrayList; import java.util.Collections; import java.util.List; +import java.util.Optional; +import static io.trino.plugin.google.sheets.SheetsConnectorTableHandle.tableNotFound; import static java.util.Objects.requireNonNull; public class SheetsSplitManager @@ -51,14 +52,35 @@ public ConnectorSplitSource getSplits( DynamicFilter dynamicFilter, Constraint constraint) { - SheetsNamedTableHandle tableHandle = (SheetsNamedTableHandle) connectorTableHandle; - SheetsTable table = sheetsClient.getTable(tableHandle.getTableName()) + SheetsConnectorTableHandle tableHandle = (SheetsConnectorTableHandle) connectorTableHandle; + SheetsTable table = sheetsClient.getTable(tableHandle) // this can happen if table is removed during a query - .orElseThrow(() -> new TableNotFoundException(tableHandle.toSchemaTableName())); + .orElseThrow(() -> tableNotFound(tableHandle)); List splits = new ArrayList<>(); - splits.add(new SheetsSplit(tableHandle.getSchemaName(), tableHandle.getTableName(), table.getValues())); + splits.add(sheetsSplitFromTableHandle(tableHandle, table.getValues())); Collections.shuffle(splits); return new FixedSplitSource(splits); } + + private static SheetsSplit sheetsSplitFromTableHandle( + SheetsConnectorTableHandle tableHandle, + List> values) + { + if (tableHandle instanceof SheetsNamedTableHandle namedTableHandle) { + return new SheetsSplit( + Optional.of(namedTableHandle.getSchemaName()), + Optional.of(namedTableHandle.getTableName()), + Optional.empty(), + values); + } + if (tableHandle instanceof SheetsSheetTableHandle sheetTableHandle) { + return new SheetsSplit( + Optional.empty(), + Optional.empty(), + Optional.of(sheetTableHandle.getSheetExpression()), + values); + } + throw new IllegalStateException("Found unexpected table handle type " + tableHandle); + } } diff --git a/plugin/trino-google-sheets/src/main/java/io/trino/plugin/google/sheets/ptf/Sheet.java b/plugin/trino-google-sheets/src/main/java/io/trino/plugin/google/sheets/ptf/Sheet.java new file mode 100644 index 000000000000..1fa098db8060 --- /dev/null +++ b/plugin/trino-google-sheets/src/main/java/io/trino/plugin/google/sheets/ptf/Sheet.java @@ -0,0 +1,152 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.plugin.google.sheets.ptf; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonProperty; +import com.google.common.collect.ImmutableList; +import com.google.inject.Inject; +import io.airlift.slice.Slice; +import io.trino.plugin.google.sheets.SheetsClient; +import io.trino.plugin.google.sheets.SheetsColumnHandle; +import io.trino.plugin.google.sheets.SheetsConnectorTableHandle; +import io.trino.plugin.google.sheets.SheetsMetadata; +import io.trino.plugin.google.sheets.SheetsSheetTableHandle; +import io.trino.spi.TrinoException; +import io.trino.spi.connector.ConnectorSession; +import io.trino.spi.connector.ConnectorTableHandle; +import io.trino.spi.connector.ConnectorTransactionHandle; +import io.trino.spi.ptf.AbstractConnectorTableFunction; +import io.trino.spi.ptf.Argument; +import io.trino.spi.ptf.ConnectorTableFunction; +import io.trino.spi.ptf.ConnectorTableFunctionHandle; +import io.trino.spi.ptf.Descriptor; +import io.trino.spi.ptf.ScalarArgument; +import io.trino.spi.ptf.ScalarArgumentSpecification; +import io.trino.spi.ptf.TableFunctionAnalysis; + +import javax.inject.Provider; + +import java.util.List; +import java.util.Map; +import java.util.Optional; + +import static com.google.common.collect.ImmutableList.toImmutableList; +import static io.airlift.slice.Slices.utf8Slice; +import static io.trino.plugin.google.sheets.SheetsClient.DEFAULT_RANGE; +import static io.trino.plugin.google.sheets.SheetsClient.RANGE_SEPARATOR; +import static io.trino.spi.StandardErrorCode.INVALID_FUNCTION_ARGUMENT; +import static io.trino.spi.ptf.ReturnTypeSpecification.GenericTable.GENERIC_TABLE; +import static io.trino.spi.type.VarcharType.VARCHAR; +import static java.util.Objects.requireNonNull; + +public class Sheet + implements Provider +{ + public static final String SCHEMA_NAME = "system"; + public static final String NAME = "sheet"; + public static final String ID_ARGUMENT = "ID"; + public static final String RANGE_ARGUMENT = "RANGE"; + + private final SheetsMetadata metadata; + + @Inject + public Sheet(SheetsClient client) + { + this.metadata = new SheetsMetadata(requireNonNull(client, "client is null")); + } + + @Override + public ConnectorTableFunction get() + { + return new SheetFunction(metadata); + } + + public static class SheetFunction + extends AbstractConnectorTableFunction + { + private final SheetsMetadata metadata; + + public SheetFunction(SheetsMetadata metadata) + { + super( + SCHEMA_NAME, + NAME, + ImmutableList.of( + ScalarArgumentSpecification.builder() + .name(ID_ARGUMENT) + .type(VARCHAR) + .build(), + ScalarArgumentSpecification.builder() + .name(RANGE_ARGUMENT) + .type(VARCHAR) + .defaultValue(utf8Slice(DEFAULT_RANGE)) + .build()), + GENERIC_TABLE); + this.metadata = requireNonNull(metadata, "metadata is null"); + } + + @Override + public TableFunctionAnalysis analyze(ConnectorSession session, ConnectorTransactionHandle transaction, Map arguments) + { + String sheetId = ((Slice) ((ScalarArgument) arguments.get(ID_ARGUMENT)).getValue()).toStringUtf8(); + validateSheetId(sheetId); + String rangeArgument = ((Slice) ((ScalarArgument) arguments.get(RANGE_ARGUMENT)).getValue()).toStringUtf8(); + + SheetsConnectorTableHandle tableHandle = new SheetsSheetTableHandle(sheetId, rangeArgument); + SheetFunctionHandle handle = new SheetFunctionHandle(tableHandle); + + List fields = metadata.getColumnHandles(session, tableHandle).entrySet().stream() + .map(entry -> new Descriptor.Field( + entry.getKey(), + Optional.of(((SheetsColumnHandle) (entry.getValue())).getColumnType()))) + .collect(toImmutableList()); + + Descriptor returnedType = new Descriptor(fields); + + return TableFunctionAnalysis.builder() + .returnedType(returnedType) + .handle(handle) + .build(); + } + } + + private static void validateSheetId(String sheetId) + { + // Sheet ids cannot contain "#", if a "#" is present it is because a range is present + // Ranges should be provided through the range argument + // https://developers.google.com/sheets/api/guides/concepts + if (sheetId.contains(RANGE_SEPARATOR)) { + throw new TrinoException(INVALID_FUNCTION_ARGUMENT, "Google sheet ID %s cannot contain '#'. Provide a range through the 'range' argument.".formatted(sheetId)); + } + } + + public static class SheetFunctionHandle + implements ConnectorTableFunctionHandle + { + private final SheetsConnectorTableHandle tableHandle; + + @JsonCreator + public SheetFunctionHandle(@JsonProperty("tableHandle") SheetsConnectorTableHandle tableHandle) + { + this.tableHandle = requireNonNull(tableHandle, "tableHandle is null"); + } + + @JsonProperty + public ConnectorTableHandle getTableHandle() + { + return tableHandle; + } + } +} diff --git a/plugin/trino-google-sheets/src/test/java/io/trino/plugin/google/sheets/TestGoogleSheets.java b/plugin/trino-google-sheets/src/test/java/io/trino/plugin/google/sheets/TestGoogleSheets.java index 2eb742e46d29..61786a99b513 100644 --- a/plugin/trino-google-sheets/src/test/java/io/trino/plugin/google/sheets/TestGoogleSheets.java +++ b/plugin/trino-google-sheets/src/test/java/io/trino/plugin/google/sheets/TestGoogleSheets.java @@ -19,6 +19,8 @@ import org.testng.annotations.Test; import static io.trino.plugin.google.sheets.SheetsQueryRunner.createSheetsQueryRunner; +import static io.trino.plugin.google.sheets.TestSheetsPlugin.DATA_SHEET_ID; +import static org.assertj.core.api.Assertions.assertThatThrownBy; import static org.testng.Assert.assertEquals; public class TestGoogleSheets @@ -78,4 +80,112 @@ public void testTableWithRepeatedAndMissingColumnNames() assertQuery("desc table_with_duplicate_and_missing_column_names", "SELECT * FROM (VALUES('a','varchar','','')," + " ('column_1','varchar','',''), ('column_2','varchar','',''), ('c','varchar','',''))"); } + + @Test + public void testSheetQuerySimple() + { + assertQuery( + "SELECT * FROM TABLE(gsheets.system.sheet(id => '%s'))".formatted(DATA_SHEET_ID), + "VALUES " + + "('1', 'one')," + + "('2', 'two')," + + "('3', 'three')," + + "('4', 'four')," + + "('5', 'five')"); + } + + @Test + public void testSheetQueryFilter() + { + assertQuery( + "SELECT * FROM TABLE(gsheets.system.sheet(id => '%s'))".formatted(DATA_SHEET_ID) + + "WHERE number = '1' and text = 'one'", + "VALUES " + + "('1', 'one')"); + } + + @Test + public void testSheetQueryWithSheet() + { + assertQuery( + "SELECT * FROM TABLE(gsheets.system.sheet(id => '%s', range => '%s'))".formatted(DATA_SHEET_ID, "number_text"), + "VALUES " + + "('1', 'one')," + + "('2', 'two')," + + "('3', 'three')," + + "('4', 'four')," + + "('5', 'five')"); + } + + @Test + public void testSheetQueryWithSheetAndRangeWithoutHeader() + { + // The range skips the header row, the first row of the range is treated as a header + assertQuery( + "SELECT * FROM TABLE(gsheets.system.sheet(id => '%s', range => '%s'))".formatted(DATA_SHEET_ID, "number_text!A2:B6") + + "WHERE \"1\" = \"1\" and \"one\" = \"one\"", + "VALUES " + + "('2', 'two')," + + "('3', 'three')," + + "('4', 'four')," + + "('5', 'five')"); + } + + @Test + public void testSheetQueryWithSheetAndRowRange() + { + assertQuery( + "SELECT * FROM TABLE(gsheets.system.sheet(id => '%s', range => '%s'))".formatted(DATA_SHEET_ID, "number_text!A1:B4") + + "WHERE number = number and text = text", + "VALUES " + + "('1', 'one')," + + "('2', 'two')," + + "('3', 'three')"); + } + + @Test + public void testSheetQueryWithSheetAndColumnRange() + { + assertQuery( + "SELECT * FROM TABLE(gsheets.system.sheet(id => '%s', range => '%s'))".formatted(DATA_SHEET_ID, "number_text!A1:A6") + + "WHERE number = number", + "VALUES " + + "('1')," + + "('2')," + + "('3')," + + "('4')," + + "('5')"); + } + + @Test + public void testSheetQueryWithSheetAndRowAndColumnRange() + { + assertQuery( + "SELECT * FROM TABLE(gsheets.system.sheet(id => '%s', range => '%s'))".formatted(DATA_SHEET_ID, "number_text!B3:B5") + + "WHERE \"two\" = \"two\"", + "VALUES " + + "('three')," + + "('four')"); + } + + @Test + public void testSheetQueryWithSheetRangeInIdFails() + { + // Sheet ids with "#" are explicitly forbidden since "#" is the sheet separator + assertThatThrownBy(() -> query( + "SELECT * FROM TABLE(gsheets.system.sheet(id => '%s#%s'))".formatted(DATA_SHEET_ID, "number_text"))) + .hasMessageContaining("Google sheet ID %s cannot contain '#'. Provide a range through the 'range' argument.".formatted(DATA_SHEET_ID + "#number_text")); + + // Attempting to put a sheet range in the id fails since the sheet id is invalid + assertThatThrownBy(() -> query( + "SELECT * FROM TABLE(gsheets.system.sheet(id => '%s%s'))".formatted(DATA_SHEET_ID, "number_text"))) + .hasMessageContaining("Failed reading data from sheet: %snumber_text#$1:$10000".formatted(DATA_SHEET_ID)); + } + + @Test + public void testSheetQueryWithInvalidSheetId() + { + assertThatThrownBy(() -> query("SELECT * FROM TABLE(gsheets.system.sheet(id => 'DOESNOTEXIST'))")) + .hasMessageContaining("Failed reading data from sheet: DOESNOTEXIST"); + } } diff --git a/plugin/trino-google-sheets/src/test/java/io/trino/plugin/google/sheets/TestSheetsConnectorTableHandle.java b/plugin/trino-google-sheets/src/test/java/io/trino/plugin/google/sheets/TestSheetsConnectorTableHandle.java index bcb7d0828062..bded6bdf5139 100644 --- a/plugin/trino-google-sheets/src/test/java/io/trino/plugin/google/sheets/TestSheetsConnectorTableHandle.java +++ b/plugin/trino-google-sheets/src/test/java/io/trino/plugin/google/sheets/TestSheetsConnectorTableHandle.java @@ -21,6 +21,7 @@ public class TestSheetsConnectorTableHandle { private final JsonCodec namedCodec = JsonCodec.jsonCodec(SheetsNamedTableHandle.class); + private final JsonCodec sheetCodec = JsonCodec.jsonCodec(SheetsSheetTableHandle.class); @Test public void testRoundTripWithNamedTable() @@ -32,4 +33,15 @@ public void testRoundTripWithNamedTable() assertEquals(actual, expected); } + + @Test + public void testRoundTripWithSheetTable() + { + SheetsSheetTableHandle expected = new SheetsSheetTableHandle("sheetID", "$1:$10000"); + + String json = sheetCodec.toJson(expected); + SheetsSheetTableHandle actual = sheetCodec.fromJson(json); + + assertEquals(actual, expected); + } } diff --git a/plugin/trino-google-sheets/src/test/java/io/trino/plugin/google/sheets/TestSheetsPlugin.java b/plugin/trino-google-sheets/src/test/java/io/trino/plugin/google/sheets/TestSheetsPlugin.java index 053566f99c4e..037e55d0408b 100644 --- a/plugin/trino-google-sheets/src/test/java/io/trino/plugin/google/sheets/TestSheetsPlugin.java +++ b/plugin/trino-google-sheets/src/test/java/io/trino/plugin/google/sheets/TestSheetsPlugin.java @@ -34,6 +34,7 @@ public class TestSheetsPlugin { static final String TEST_METADATA_SHEET_ID = "1Es4HhWALUQjoa-bQh4a8B5HROz7dpGMfq_HbfoaW5LM#Tables"; + static final String DATA_SHEET_ID = "1S625j2oTptRepg6Yci68fCYE1269tdoSjljNOmTgQ3U"; static String getTestCredentialsPath() throws Exception From d38759b16fc5bf1cdfc6114cd373af5f0c0e1fbe Mon Sep 17 00:00:00 2001 From: Grant Nicholas Date: Tue, 24 Jan 2023 15:15:53 -0600 Subject: [PATCH 4/5] Make gsheets.metadata-sheet-id optional with the sheet table function, the metadata sheet is not longer required. --- .../plugin/google/sheets/SheetsClient.java | 12 +++-- .../plugin/google/sheets/SheetsConfig.java | 6 +-- .../google/sheets/SheetsQueryRunner.java | 3 +- .../google/sheets/TestGoogleSheets.java | 7 ++- ...estGoogleSheetsWithoutMetadataSheetId.java | 46 +++++++++++++++++++ .../google/sheets/TestSheetsConfig.java | 4 +- 6 files changed, 67 insertions(+), 11 deletions(-) create mode 100644 plugin/trino-google-sheets/src/test/java/io/trino/plugin/google/sheets/TestGoogleSheetsWithoutMetadataSheetId.java diff --git a/plugin/trino-google-sheets/src/main/java/io/trino/plugin/google/sheets/SheetsClient.java b/plugin/trino-google-sheets/src/main/java/io/trino/plugin/google/sheets/SheetsClient.java index 553b92ec1e7c..4d9e8f7469ec 100644 --- a/plugin/trino-google-sheets/src/main/java/io/trino/plugin/google/sheets/SheetsClient.java +++ b/plugin/trino-google-sheets/src/main/java/io/trino/plugin/google/sheets/SheetsClient.java @@ -75,7 +75,7 @@ public class SheetsClient private final NonEvictableLoadingCache> tableSheetMappingCache; private final NonEvictableLoadingCache>> sheetDataCache; - private final String metadataSheetId; + private final Optional metadataSheetId; private final Sheets sheetsService; @@ -160,9 +160,12 @@ public Optional getTableFromValues(List> values) public Set getTableNames() { + if (metadataSheetId.isEmpty()) { + return ImmutableSet.of(); + } ImmutableSet.Builder tables = ImmutableSet.builder(); try { - List> tableMetadata = sheetDataCache.getUnchecked(metadataSheetId); + List> tableMetadata = sheetDataCache.getUnchecked(metadataSheetId.get()); for (int i = 1; i < tableMetadata.size(); i++) { if (tableMetadata.get(i).size() > 0) { tables.add(String.valueOf(tableMetadata.get(i).get(0))); @@ -218,8 +221,11 @@ private Optional getSheetExpressionForTable(String tableName) private Map> getAllTableSheetExpressionMapping() { + if (metadataSheetId.isEmpty()) { + return ImmutableMap.of(); + } ImmutableMap.Builder> tableSheetMap = ImmutableMap.builder(); - List> data = readAllValuesFromSheetExpression(metadataSheetId); + List> data = readAllValuesFromSheetExpression(metadataSheetId.get()); // first line is assumed to be sheet header for (int i = 1; i < data.size(); i++) { if (data.get(i).size() >= 2) { diff --git a/plugin/trino-google-sheets/src/main/java/io/trino/plugin/google/sheets/SheetsConfig.java b/plugin/trino-google-sheets/src/main/java/io/trino/plugin/google/sheets/SheetsConfig.java index 83f48ab1aee7..a35d8311d68e 100644 --- a/plugin/trino-google-sheets/src/main/java/io/trino/plugin/google/sheets/SheetsConfig.java +++ b/plugin/trino-google-sheets/src/main/java/io/trino/plugin/google/sheets/SheetsConfig.java @@ -32,7 +32,7 @@ public class SheetsConfig { private Optional credentialsFilePath = Optional.empty(); private Optional credentialsKey = Optional.empty(); - private String metadataSheetId; + private Optional metadataSheetId = Optional.empty(); private int sheetsDataMaxCacheSize = 1000; private Duration sheetsDataExpireAfterWrite = new Duration(5, TimeUnit.MINUTES); private Duration readTimeout = new Duration(20, TimeUnit.SECONDS); // 20s is the default timeout of com.google.api.client.http.HttpRequest @@ -74,7 +74,7 @@ public SheetsConfig setCredentialsKey(String credentialsKey) } @NotNull - public String getMetadataSheetId() + public Optional getMetadataSheetId() { return metadataSheetId; } @@ -84,7 +84,7 @@ public String getMetadataSheetId() @ConfigDescription("Metadata sheet id containing table sheet mapping") public SheetsConfig setMetadataSheetId(String metadataSheetId) { - this.metadataSheetId = metadataSheetId; + this.metadataSheetId = Optional.ofNullable(metadataSheetId); return this; } diff --git a/plugin/trino-google-sheets/src/test/java/io/trino/plugin/google/sheets/SheetsQueryRunner.java b/plugin/trino-google-sheets/src/test/java/io/trino/plugin/google/sheets/SheetsQueryRunner.java index 657931385d98..0cb04e8cf647 100644 --- a/plugin/trino-google-sheets/src/test/java/io/trino/plugin/google/sheets/SheetsQueryRunner.java +++ b/plugin/trino-google-sheets/src/test/java/io/trino/plugin/google/sheets/SheetsQueryRunner.java @@ -45,7 +45,6 @@ public static DistributedQueryRunner createSheetsQueryRunner( // note: additional copy via ImmutableList so that if fails on nulls connectorProperties = new HashMap<>(ImmutableMap.copyOf(connectorProperties)); connectorProperties.putIfAbsent("gsheets.credentials-path", getTestCredentialsPath()); - connectorProperties.putIfAbsent("gsheets.metadata-sheet-id", TEST_METADATA_SHEET_ID); connectorProperties.putIfAbsent("gsheets.max-data-cache-size", "1000"); connectorProperties.putIfAbsent("gsheets.data-cache-ttl", "5m"); @@ -75,7 +74,7 @@ public static void main(String[] args) DistributedQueryRunner queryRunner = createSheetsQueryRunner( ImmutableMap.of("http-server.http.port", "8080"), - ImmutableMap.of()); + ImmutableMap.of("gsheets.metadata-sheet-id", TEST_METADATA_SHEET_ID)); Logger log = Logger.get(SheetsQueryRunner.class); log.info("======== SERVER STARTED ========"); diff --git a/plugin/trino-google-sheets/src/test/java/io/trino/plugin/google/sheets/TestGoogleSheets.java b/plugin/trino-google-sheets/src/test/java/io/trino/plugin/google/sheets/TestGoogleSheets.java index 61786a99b513..a9ae156f5938 100644 --- a/plugin/trino-google-sheets/src/test/java/io/trino/plugin/google/sheets/TestGoogleSheets.java +++ b/plugin/trino-google-sheets/src/test/java/io/trino/plugin/google/sheets/TestGoogleSheets.java @@ -20,6 +20,7 @@ import static io.trino.plugin.google.sheets.SheetsQueryRunner.createSheetsQueryRunner; import static io.trino.plugin.google.sheets.TestSheetsPlugin.DATA_SHEET_ID; +import static io.trino.plugin.google.sheets.TestSheetsPlugin.TEST_METADATA_SHEET_ID; import static org.assertj.core.api.Assertions.assertThatThrownBy; import static org.testng.Assert.assertEquals; @@ -30,7 +31,11 @@ public class TestGoogleSheets protected QueryRunner createQueryRunner() throws Exception { - return createSheetsQueryRunner(ImmutableMap.of(), ImmutableMap.of("gsheets.read-timeout", "1m")); + return createSheetsQueryRunner( + ImmutableMap.of(), + ImmutableMap.of( + "gsheets.read-timeout", "1m", + "gsheets.metadata-sheet-id", TEST_METADATA_SHEET_ID)); } @Test diff --git a/plugin/trino-google-sheets/src/test/java/io/trino/plugin/google/sheets/TestGoogleSheetsWithoutMetadataSheetId.java b/plugin/trino-google-sheets/src/test/java/io/trino/plugin/google/sheets/TestGoogleSheetsWithoutMetadataSheetId.java new file mode 100644 index 000000000000..6a6d9db6ef0b --- /dev/null +++ b/plugin/trino-google-sheets/src/test/java/io/trino/plugin/google/sheets/TestGoogleSheetsWithoutMetadataSheetId.java @@ -0,0 +1,46 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.plugin.google.sheets; + +import com.google.common.collect.ImmutableMap; +import io.trino.testing.AbstractTestQueryFramework; +import io.trino.testing.QueryRunner; +import org.testng.annotations.Test; + +import static io.trino.plugin.google.sheets.SheetsQueryRunner.createSheetsQueryRunner; +import static io.trino.plugin.google.sheets.TestSheetsPlugin.DATA_SHEET_ID; + +public class TestGoogleSheetsWithoutMetadataSheetId + extends AbstractTestQueryFramework +{ + @Override + protected QueryRunner createQueryRunner() + throws Exception + { + return createSheetsQueryRunner(ImmutableMap.of(), ImmutableMap.of("gsheets.read-timeout", "1m")); + } + + @Test + public void testSheetQuerySimple() + { + assertQuery( + "SELECT * FROM TABLE(gsheets.system.sheet(id => '%s'))".formatted(DATA_SHEET_ID), + "VALUES " + + "('1', 'one')," + + "('2', 'two')," + + "('3', 'three')," + + "('4', 'four')," + + "('5', 'five')"); + } +} diff --git a/plugin/trino-google-sheets/src/test/java/io/trino/plugin/google/sheets/TestSheetsConfig.java b/plugin/trino-google-sheets/src/test/java/io/trino/plugin/google/sheets/TestSheetsConfig.java index 30711493e6d7..e02be6ee93fd 100644 --- a/plugin/trino-google-sheets/src/test/java/io/trino/plugin/google/sheets/TestSheetsConfig.java +++ b/plugin/trino-google-sheets/src/test/java/io/trino/plugin/google/sheets/TestSheetsConfig.java @@ -72,7 +72,7 @@ public void testExplicitPropertyMappingsCredentialsPath() assertEquals(config.getCredentialsKey(), Optional.empty()); assertEquals(config.getCredentialsFilePath(), Optional.of(credentialsFile.toString())); - assertEquals(config.getMetadataSheetId(), "foo_bar_sheet_id#Sheet1"); + assertEquals(config.getMetadataSheetId(), Optional.of("foo_bar_sheet_id#Sheet1")); assertEquals(config.getSheetsDataMaxCacheSize(), 2000); assertEquals(config.getSheetsDataExpireAfterWrite(), Duration.valueOf("10m")); assertEquals(config.getReadTimeout(), Duration.valueOf("1m")); @@ -94,7 +94,7 @@ public void testExplicitPropertyMappingsCredentialsKey() assertEquals(config.getCredentialsKey(), Optional.of(BASE_64_ENCODED_TEST_KEY)); assertEquals(config.getCredentialsFilePath(), Optional.empty()); - assertEquals(config.getMetadataSheetId(), "foo_bar_sheet_id#Sheet1"); + assertEquals(config.getMetadataSheetId(), Optional.of("foo_bar_sheet_id#Sheet1")); assertEquals(config.getSheetsDataMaxCacheSize(), 2000); assertEquals(config.getSheetsDataExpireAfterWrite(), Duration.valueOf("10m")); assertEquals(config.getReadTimeout(), Duration.valueOf("1m")); From d3ed6c25f1b3add9cdd97d9590780e24ecc3a38d Mon Sep 17 00:00:00 2001 From: Grant Nicholas Date: Mon, 6 Feb 2023 12:22:59 -0600 Subject: [PATCH 5/5] Improve google sheets error message when querying empty sheet range Provide human readable error message instead of: 'InvalidCacheLoadException: CacheLoader returned null for key' --- .../io/trino/plugin/google/sheets/SheetsClient.java | 6 +++++- .../trino/plugin/google/sheets/TestGoogleSheets.java | 12 ++++++++++++ 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/plugin/trino-google-sheets/src/main/java/io/trino/plugin/google/sheets/SheetsClient.java b/plugin/trino-google-sheets/src/main/java/io/trino/plugin/google/sheets/SheetsClient.java index 4d9e8f7469ec..3ace4b901d43 100644 --- a/plugin/trino-google-sheets/src/main/java/io/trino/plugin/google/sheets/SheetsClient.java +++ b/plugin/trino-google-sheets/src/main/java/io/trino/plugin/google/sheets/SheetsClient.java @@ -278,7 +278,11 @@ private List> readAllValuesFromSheetExpression(String sheetExpressi defaultRange = tableOptions[1]; } log.debug("Accessing sheet id [%s] with range [%s]", sheetId, defaultRange); - return sheetsService.spreadsheets().values().get(sheetId, defaultRange).execute().getValues(); + List> values = sheetsService.spreadsheets().values().get(sheetId, defaultRange).execute().getValues(); + if (values == null) { + throw new TrinoException(SHEETS_TABLE_LOAD_ERROR, "No non-empty cells found in sheet: " + sheetExpression); + } + return values; } catch (IOException e) { // TODO: improve error to a {Table|Sheet}NotFoundException diff --git a/plugin/trino-google-sheets/src/test/java/io/trino/plugin/google/sheets/TestGoogleSheets.java b/plugin/trino-google-sheets/src/test/java/io/trino/plugin/google/sheets/TestGoogleSheets.java index a9ae156f5938..1980a38d5eee 100644 --- a/plugin/trino-google-sheets/src/test/java/io/trino/plugin/google/sheets/TestGoogleSheets.java +++ b/plugin/trino-google-sheets/src/test/java/io/trino/plugin/google/sheets/TestGoogleSheets.java @@ -187,6 +187,18 @@ public void testSheetQueryWithSheetRangeInIdFails() .hasMessageContaining("Failed reading data from sheet: %snumber_text#$1:$10000".formatted(DATA_SHEET_ID)); } + @Test + public void testSheetQueryWithNoDataInRangeFails() + { + assertThatThrownBy(() -> query( + "SELECT * FROM TABLE(gsheets.system.sheet(id => '%s', range => '%s'))".formatted(DATA_SHEET_ID, "number_text!D1:D1"))) + .hasMessageContaining("No non-empty cells found in sheet: %s#number_text!D1:D1".formatted(DATA_SHEET_ID)); + + assertThatThrownBy(() -> query( + "SELECT * FROM TABLE(gsheets.system.sheet(id => '%s', range => '%s'))".formatted(DATA_SHEET_ID, "number_text!D12:E13"))) + .hasMessageContaining("No non-empty cells found in sheet: %s#number_text!D12:E13".formatted(DATA_SHEET_ID)); + } + @Test public void testSheetQueryWithInvalidSheetId() {