-
Notifications
You must be signed in to change notification settings - Fork 3.6k
Support sheet table function for google sheets connector #15829
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
6680247
6c79554
3304931
d38759b
d3ed6c2
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -68,8 +68,8 @@ containing the following columns in this order: | |
|
|
||
| * Table Name | ||
| * Sheet ID | ||
| * Owner | ||
| * Notes | ||
| * Owner (optional) | ||
| * Notes (optional) | ||
|
|
||
| See this `example sheet <https://docs.google.com/spreadsheets/d/1Es4HhWALUQjoa-bQh4a8B5HROz7dpGMfq_HbfoaW5LM>`_ | ||
| as a reference. | ||
|
|
@@ -89,10 +89,14 @@ address of the service account. | |
|
|
||
| The sheet needs to be mapped to a Trino table name. Specify a table name | ||
| (column A) and the sheet ID (column B) in the metadata sheet. To refer | ||
| to a specific tab in the sheet, add the tab name after the sheet ID, separated | ||
| with ``#``. If tab name is not provided, connector loads only 10,000 rows by default from | ||
| to a specific range in the sheet, add the range after the sheet ID, separated | ||
| with ``#``. If a range is not provided, the connector loads only 10,000 rows by default from | ||
| the first tab in the sheet. | ||
|
|
||
| The first row of the provided sheet range is used as the header and will determine the column | ||
| names of the Trino table. | ||
| For more details on sheet range syntax see the `google sheets docs <https://developers.google.com/sheets/api/guides/concepts>`_. | ||
|
|
||
| API usage limits | ||
| ---------------- | ||
|
|
||
|
|
@@ -133,3 +137,33 @@ SQL support | |
| The connector provides :ref:`globally available <sql-globally-available>` and | ||
| :ref:`read operation <sql-read-operations>` statements to access data and | ||
| metadata in Google Sheets. | ||
|
|
||
| Table functions | ||
| --------------- | ||
|
|
||
| The connector provides specific :doc:`table functions </functions/table>` to | ||
| access Google Sheets. | ||
|
|
||
| .. _google-sheets-sheet-function: | ||
|
|
||
| ``sheet(id, range) -> table`` | ||
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | ||
|
|
||
| The ``sheet`` function allows you to query a Google Sheet directly without | ||
| specifying it as a named table in the metadata sheet. | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Need to mention that this is the
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. fixed. |
||
|
|
||
| For example, for a catalog named 'example':: | ||
|
|
||
| SELECT * | ||
| FROM | ||
| TABLE(example.system.sheet( | ||
| id => 'googleSheetIdHere')); | ||
|
|
||
| A sheet range or named range can be provided as an optional ``range`` argument. | ||
| The default sheet range is ``$1:$10000`` if one is not provided:: | ||
|
|
||
| SELECT * | ||
| FROM | ||
| TABLE(example.system.sheet( | ||
| id => 'googleSheetIdHere', | ||
| range => 'TabName!A1:B4')); | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,27 @@ | ||
| /* | ||
| * Licensed under the Apache License, Version 2.0 (the "License"); | ||
| * you may not use this file except in compliance with the License. | ||
| * You may obtain a copy of the License at | ||
| * | ||
| * http://www.apache.org/licenses/LICENSE-2.0 | ||
| * | ||
| * Unless required by applicable law or agreed to in writing, software | ||
| * distributed under the License is distributed on an "AS IS" BASIS, | ||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| * See the License for the specific language governing permissions and | ||
| * limitations under the License. | ||
| */ | ||
| package io.trino.plugin.google.sheets; | ||
|
|
||
| import io.trino.spi.connector.NotFoundException; | ||
|
|
||
| import static java.lang.String.format; | ||
|
|
||
| public class SheetNotFoundException | ||
| extends NotFoundException | ||
| { | ||
| public SheetNotFoundException(String sheetExpression) | ||
| { | ||
| super(format("Sheet '%s' not found", sheetExpression)); | ||
|
findepi marked this conversation as resolved.
Outdated
|
||
| } | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -63,6 +63,8 @@ | |
|
|
||
| public class SheetsClient | ||
| { | ||
| public static final String DEFAULT_RANGE = "$1:$10000"; | ||
| public static final String RANGE_SEPARATOR = "#"; | ||
| private static final Logger log = Logger.get(SheetsClient.class); | ||
|
|
||
| private static final String APPLICATION_NAME = "trino google sheets integration"; | ||
|
|
@@ -73,7 +75,7 @@ public class SheetsClient | |
| private final NonEvictableLoadingCache<String, Optional<String>> tableSheetMappingCache; | ||
| private final NonEvictableLoadingCache<String, List<List<Object>>> sheetDataCache; | ||
|
|
||
| private final String metadataSheetId; | ||
| private final Optional<String> metadataSheetId; | ||
|
|
||
| private final Sheets sheetsService; | ||
|
|
||
|
|
@@ -115,14 +117,31 @@ public Map<String, Optional<String>> loadAll(Iterable<? extends String> tableLis | |
| CacheLoader.from(this::readAllValuesFromSheetExpression)); | ||
| } | ||
|
|
||
| public Optional<SheetsTable> getTable(SheetsConnectorTableHandle tableHandle) | ||
| { | ||
| if (tableHandle instanceof SheetsNamedTableHandle namedTableHandle) { | ||
| return getTable(namedTableHandle.getTableName()); | ||
| } | ||
| if (tableHandle instanceof SheetsSheetTableHandle sheetTableHandle) { | ||
| return getTableFromValues(readAllValuesFromSheet(sheetTableHandle.getSheetExpression())); | ||
| } | ||
| throw new IllegalStateException("Found unexpected table handle type " + tableHandle); | ||
| } | ||
|
|
||
| public Optional<SheetsTable> getTable(String tableName) | ||
| { | ||
| List<List<String>> values = convertToStringValues(readAllValues(tableName)); | ||
| if (values.size() > 0) { | ||
| List<List<Object>> values = readAllValues(tableName); | ||
| return getTableFromValues(values); | ||
| } | ||
|
|
||
| public Optional<SheetsTable> getTableFromValues(List<List<Object>> values) | ||
| { | ||
| List<List<String>> stringValues = convertToStringValues(values); | ||
| if (stringValues.size() > 0) { | ||
| ImmutableList.Builder<SheetsColumn> columns = ImmutableList.builder(); | ||
| Set<String> columnNames = new HashSet<>(); | ||
| // Assuming 1st line is always header | ||
| List<String> header = values.get(0); | ||
| List<String> header = stringValues.get(0); | ||
| int count = 0; | ||
| for (String column : header) { | ||
| String columnValue = column.toLowerCase(ENGLISH); | ||
|
|
@@ -133,17 +152,20 @@ public Optional<SheetsTable> getTable(String tableName) | |
| columnNames.add(columnValue); | ||
| columns.add(new SheetsColumn(columnValue, VarcharType.VARCHAR)); | ||
| } | ||
| List<List<String>> dataValues = values.subList(1, values.size()); // removing header info | ||
| return Optional.of(new SheetsTable(tableName, columns.build(), dataValues)); | ||
| List<List<String>> dataValues = stringValues.subList(1, values.size()); // removing header info | ||
| return Optional.of(new SheetsTable(columns.build(), dataValues)); | ||
| } | ||
| return Optional.empty(); | ||
| } | ||
|
|
||
| public Set<String> getTableNames() | ||
| { | ||
| if (metadataSheetId.isEmpty()) { | ||
| return ImmutableSet.of(); | ||
| } | ||
| ImmutableSet.Builder<String> tables = ImmutableSet.builder(); | ||
| try { | ||
| List<List<Object>> tableMetadata = sheetDataCache.getUnchecked(metadataSheetId); | ||
| List<List<Object>> tableMetadata = sheetDataCache.getUnchecked(metadataSheetId.get()); | ||
| for (int i = 1; i < tableMetadata.size(); i++) { | ||
| if (tableMetadata.get(i).size() > 0) { | ||
| tables.add(String.valueOf(tableMetadata.get(i).get(0))); | ||
|
|
@@ -162,14 +184,25 @@ public List<List<Object>> readAllValues(String tableName) | |
| try { | ||
| String sheetExpression = tableSheetMappingCache.getUnchecked(tableName) | ||
| .orElseThrow(() -> new TrinoException(SHEETS_UNKNOWN_TABLE_ERROR, "Sheet expression not found for table " + tableName)); | ||
| return sheetDataCache.getUnchecked(sheetExpression); | ||
| return readAllValuesFromSheet(sheetExpression); | ||
| } | ||
| catch (UncheckedExecutionException e) { | ||
| throwIfInstanceOf(e.getCause(), TrinoException.class); | ||
| throw new TrinoException(SHEETS_TABLE_LOAD_ERROR, "Error loading data for table: " + tableName, e); | ||
|
findepi marked this conversation as resolved.
Outdated
|
||
| } | ||
| } | ||
|
|
||
| public List<List<Object>> readAllValuesFromSheet(String sheetExpression) | ||
|
grantatspothero marked this conversation as resolved.
Outdated
|
||
| { | ||
| try { | ||
| return sheetDataCache.getUnchecked(sheetExpression); | ||
|
ebyhr marked this conversation as resolved.
Outdated
|
||
| } | ||
| catch (UncheckedExecutionException e) { | ||
| throwIfInstanceOf(e.getCause(), TrinoException.class); | ||
| throw new TrinoException(SHEETS_TABLE_LOAD_ERROR, "Error loading data for sheet: " + sheetExpression, e); | ||
| } | ||
| } | ||
|
|
||
| public static List<List<String>> convertToStringValues(List<List<Object>> values) | ||
| { | ||
| return values.stream() | ||
|
|
@@ -188,8 +221,11 @@ private Optional<String> getSheetExpressionForTable(String tableName) | |
|
|
||
| private Map<String, Optional<String>> getAllTableSheetExpressionMapping() | ||
| { | ||
| if (metadataSheetId.isEmpty()) { | ||
| return ImmutableMap.of(); | ||
| } | ||
| ImmutableMap.Builder<String, Optional<String>> tableSheetMap = ImmutableMap.builder(); | ||
| List<List<Object>> data = readAllValuesFromSheetExpression(metadataSheetId); | ||
| List<List<Object>> data = readAllValuesFromSheetExpression(metadataSheetId.get()); | ||
| // first line is assumed to be sheet header | ||
| for (int i = 1; i < data.size(); i++) { | ||
| if (data.get(i).size() >= 2) { | ||
|
|
@@ -235,16 +271,22 @@ private List<List<Object>> readAllValuesFromSheetExpression(String sheetExpressi | |
| { | ||
| try { | ||
| // by default loading up to 10k rows from the first tab of the sheet | ||
| String defaultRange = "$1:$10000"; | ||
| String[] tableOptions = sheetExpression.split("#"); | ||
| String defaultRange = DEFAULT_RANGE; | ||
| String[] tableOptions = sheetExpression.split(RANGE_SEPARATOR); | ||
| String sheetId = tableOptions[0]; | ||
| if (tableOptions.length > 1) { | ||
| defaultRange = tableOptions[1]; | ||
| } | ||
| log.debug("Accessing sheet id [%s] with range [%s]", sheetId, defaultRange); | ||
| return sheetsService.spreadsheets().values().get(sheetId, defaultRange).execute().getValues(); | ||
| List<List<Object>> values = sheetsService.spreadsheets().values().get(sheetId, defaultRange).execute().getValues(); | ||
| if (values == null) { | ||
| throw new TrinoException(SHEETS_TABLE_LOAD_ERROR, "No non-empty cells found in sheet: " + sheetExpression); | ||
| } | ||
| return values; | ||
| } | ||
| catch (IOException e) { | ||
| // TODO: improve error to a {Table|Sheet}NotFoundException | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't understand this TODO
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. See comment from @ebyhr here: #15829 (comment) I did not want to change the error code returned from Updated the comment to make the reasoning more clear.
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. thanks, now i can read the comment at least :) however, i don't understand why we would want to throw "not found" within IOException catch block if we want to detect that IOException conveys "specific sheet doesn't exist", we could do that (but we wouldnt' say TableNotFound even then, because the requested sheet may not be a table) if it was up to me to decide, and if no new information is provided to me, i would just delete this TODO comment and worry about that later, and only if we learn later there is something to worry about |
||
| // is a backwards incompatible error code change from SHEETS_UNKNOWN_TABLE_ERROR -> NOT_FOUND | ||
| throw new TrinoException(SHEETS_UNKNOWN_TABLE_ERROR, "Failed reading data from sheet: " + sheetExpression, e); | ||
| } | ||
| } | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,35 @@ | ||
| /* | ||
| * Licensed under the Apache License, Version 2.0 (the "License"); | ||
| * you may not use this file except in compliance with the License. | ||
| * You may obtain a copy of the License at | ||
| * | ||
| * http://www.apache.org/licenses/LICENSE-2.0 | ||
| * | ||
| * Unless required by applicable law or agreed to in writing, software | ||
| * distributed under the License is distributed on an "AS IS" BASIS, | ||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| * See the License for the specific language governing permissions and | ||
| * limitations under the License. | ||
| */ | ||
| package io.trino.plugin.google.sheets; | ||
|
|
||
| import io.trino.spi.connector.ConnectorTableHandle; | ||
| import io.trino.spi.connector.NotFoundException; | ||
| import io.trino.spi.connector.SchemaTableName; | ||
| import io.trino.spi.connector.TableNotFoundException; | ||
|
|
||
| public sealed interface SheetsConnectorTableHandle | ||
| extends ConnectorTableHandle | ||
| permits SheetsNamedTableHandle, SheetsSheetTableHandle | ||
| { | ||
| static NotFoundException tableNotFound(SheetsConnectorTableHandle tableHandle) | ||
| { | ||
| if (tableHandle instanceof SheetsNamedTableHandle sheetsNamedTableHandle) { | ||
| return new TableNotFoundException(new SchemaTableName(sheetsNamedTableHandle.getSchemaName(), sheetsNamedTableHandle.getTableName())); | ||
| } | ||
| if (tableHandle instanceof SheetsSheetTableHandle sheetsSheetTableHandle) { | ||
| return new SheetNotFoundException(sheetsSheetTableHandle.getSheetExpression()); | ||
| } | ||
| throw new IllegalStateException("Found unexpected table handle type " + tableHandle); | ||
| } | ||
| } |
Uh oh!
There was an error while loading. Please reload this page.