Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 38 additions & 4 deletions docs/src/main/sphinx/connector/googlesheets.rst
Original file line number Diff line number Diff line change
Expand Up @@ -68,8 +68,8 @@ containing the following columns in this order:

* Table Name
* Sheet ID
* Owner
* Notes
* Owner (optional)
* Notes (optional)

See this `example sheet <https://docs.google.com/spreadsheets/d/1Es4HhWALUQjoa-bQh4a8B5HROz7dpGMfq_HbfoaW5LM>`_
as a reference.
Expand All @@ -89,10 +89,14 @@ address of the service account.

The sheet needs to be mapped to a Trino table name. Specify a table name
(column A) and the sheet ID (column B) in the metadata sheet. To refer
to a specific tab in the sheet, add the tab name after the sheet ID, separated
with ``#``. If tab name is not provided, connector loads only 10,000 rows by default from
to a specific range in the sheet, add the range after the sheet ID, separated
with ``#``. If a range is not provided, the connector loads only 10,000 rows by default from
the first tab in the sheet.

The first row of the provided sheet range is used as the header and will determine the column
names of the Trino table.
For more details on sheet range syntax see the `google sheets docs <https://developers.google.com/sheets/api/guides/concepts>`_.

API usage limits
----------------

Expand Down Expand Up @@ -133,3 +137,33 @@ SQL support
The connector provides :ref:`globally available <sql-globally-available>` and
:ref:`read operation <sql-read-operations>` statements to access data and
metadata in Google Sheets.
Comment thread
grantatspothero marked this conversation as resolved.
Outdated

Table functions
---------------

The connector provides specific :doc:`table functions </functions/table>` to
access Google Sheets.

.. _google-sheets-sheet-function:

``sheet(id, range) -> table``
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

The ``sheet`` function allows you to query a Google Sheet directly without
specifying it as a named table in the metadata sheet.
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Need to mention that this is the example catalog..

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

fixed.


For example, for a catalog named 'example'::

SELECT *
FROM
TABLE(example.system.sheet(
id => 'googleSheetIdHere'));

A sheet range or named range can be provided as an optional ``range`` argument.
The default sheet range is ``$1:$10000`` if one is not provided::

SELECT *
FROM
TABLE(example.system.sheet(
id => 'googleSheetIdHere',
range => 'TabName!A1:B4'));
6 changes: 6 additions & 0 deletions plugin/trino-google-sheets/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -183,6 +183,12 @@
<scope>test</scope>
</dependency>

<dependency>
<groupId>org.assertj</groupId>
<artifactId>assertj-core</artifactId>
<scope>test</scope>
</dependency>

<dependency>
<groupId>org.eclipse.jetty.toolchain</groupId>
<artifactId>jetty-servlet-api</artifactId>
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.trino.plugin.google.sheets;

import io.trino.spi.connector.NotFoundException;

import static java.lang.String.format;

public class SheetNotFoundException
extends NotFoundException
{
public SheetNotFoundException(String sheetExpression)
{
super(format("Sheet '%s' not found", sheetExpression));
Comment thread
findepi marked this conversation as resolved.
Outdated
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,8 @@

public class SheetsClient
{
public static final String DEFAULT_RANGE = "$1:$10000";
public static final String RANGE_SEPARATOR = "#";
private static final Logger log = Logger.get(SheetsClient.class);

private static final String APPLICATION_NAME = "trino google sheets integration";
Expand All @@ -73,7 +75,7 @@ public class SheetsClient
private final NonEvictableLoadingCache<String, Optional<String>> tableSheetMappingCache;
private final NonEvictableLoadingCache<String, List<List<Object>>> sheetDataCache;

private final String metadataSheetId;
private final Optional<String> metadataSheetId;

private final Sheets sheetsService;

Expand Down Expand Up @@ -115,14 +117,31 @@ public Map<String, Optional<String>> loadAll(Iterable<? extends String> tableLis
CacheLoader.from(this::readAllValuesFromSheetExpression));
}

public Optional<SheetsTable> getTable(SheetsConnectorTableHandle tableHandle)
{
if (tableHandle instanceof SheetsNamedTableHandle namedTableHandle) {
return getTable(namedTableHandle.getTableName());
}
if (tableHandle instanceof SheetsSheetTableHandle sheetTableHandle) {
return getTableFromValues(readAllValuesFromSheet(sheetTableHandle.getSheetExpression()));
}
throw new IllegalStateException("Found unexpected table handle type " + tableHandle);
}

public Optional<SheetsTable> getTable(String tableName)
{
List<List<String>> values = convertToStringValues(readAllValues(tableName));
if (values.size() > 0) {
List<List<Object>> values = readAllValues(tableName);
return getTableFromValues(values);
}

public Optional<SheetsTable> getTableFromValues(List<List<Object>> values)
{
List<List<String>> stringValues = convertToStringValues(values);
if (stringValues.size() > 0) {
ImmutableList.Builder<SheetsColumn> columns = ImmutableList.builder();
Set<String> columnNames = new HashSet<>();
// Assuming 1st line is always header
List<String> header = values.get(0);
List<String> header = stringValues.get(0);
int count = 0;
for (String column : header) {
String columnValue = column.toLowerCase(ENGLISH);
Expand All @@ -133,17 +152,20 @@ public Optional<SheetsTable> getTable(String tableName)
columnNames.add(columnValue);
columns.add(new SheetsColumn(columnValue, VarcharType.VARCHAR));
}
List<List<String>> dataValues = values.subList(1, values.size()); // removing header info
return Optional.of(new SheetsTable(tableName, columns.build(), dataValues));
List<List<String>> dataValues = stringValues.subList(1, values.size()); // removing header info
return Optional.of(new SheetsTable(columns.build(), dataValues));
}
return Optional.empty();
}

public Set<String> getTableNames()
{
if (metadataSheetId.isEmpty()) {
return ImmutableSet.of();
}
ImmutableSet.Builder<String> tables = ImmutableSet.builder();
try {
List<List<Object>> tableMetadata = sheetDataCache.getUnchecked(metadataSheetId);
List<List<Object>> tableMetadata = sheetDataCache.getUnchecked(metadataSheetId.get());
for (int i = 1; i < tableMetadata.size(); i++) {
if (tableMetadata.get(i).size() > 0) {
tables.add(String.valueOf(tableMetadata.get(i).get(0)));
Expand All @@ -162,14 +184,25 @@ public List<List<Object>> readAllValues(String tableName)
try {
String sheetExpression = tableSheetMappingCache.getUnchecked(tableName)
.orElseThrow(() -> new TrinoException(SHEETS_UNKNOWN_TABLE_ERROR, "Sheet expression not found for table " + tableName));
return sheetDataCache.getUnchecked(sheetExpression);
return readAllValuesFromSheet(sheetExpression);
}
catch (UncheckedExecutionException e) {
throwIfInstanceOf(e.getCause(), TrinoException.class);
throw new TrinoException(SHEETS_TABLE_LOAD_ERROR, "Error loading data for table: " + tableName, e);
Comment thread
findepi marked this conversation as resolved.
Outdated
}
}

public List<List<Object>> readAllValuesFromSheet(String sheetExpression)
Comment thread
grantatspothero marked this conversation as resolved.
Outdated
{
try {
return sheetDataCache.getUnchecked(sheetExpression);
Comment thread
ebyhr marked this conversation as resolved.
Outdated
}
catch (UncheckedExecutionException e) {
throwIfInstanceOf(e.getCause(), TrinoException.class);
throw new TrinoException(SHEETS_TABLE_LOAD_ERROR, "Error loading data for sheet: " + sheetExpression, e);
}
}

public static List<List<String>> convertToStringValues(List<List<Object>> values)
{
return values.stream()
Expand All @@ -188,8 +221,11 @@ private Optional<String> getSheetExpressionForTable(String tableName)

private Map<String, Optional<String>> getAllTableSheetExpressionMapping()
{
if (metadataSheetId.isEmpty()) {
return ImmutableMap.of();
}
ImmutableMap.Builder<String, Optional<String>> tableSheetMap = ImmutableMap.builder();
List<List<Object>> data = readAllValuesFromSheetExpression(metadataSheetId);
List<List<Object>> data = readAllValuesFromSheetExpression(metadataSheetId.get());
// first line is assumed to be sheet header
for (int i = 1; i < data.size(); i++) {
if (data.get(i).size() >= 2) {
Expand Down Expand Up @@ -235,16 +271,22 @@ private List<List<Object>> readAllValuesFromSheetExpression(String sheetExpressi
{
try {
// by default loading up to 10k rows from the first tab of the sheet
String defaultRange = "$1:$10000";
String[] tableOptions = sheetExpression.split("#");
String defaultRange = DEFAULT_RANGE;
String[] tableOptions = sheetExpression.split(RANGE_SEPARATOR);
String sheetId = tableOptions[0];
if (tableOptions.length > 1) {
defaultRange = tableOptions[1];
}
log.debug("Accessing sheet id [%s] with range [%s]", sheetId, defaultRange);
return sheetsService.spreadsheets().values().get(sheetId, defaultRange).execute().getValues();
List<List<Object>> values = sheetsService.spreadsheets().values().get(sheetId, defaultRange).execute().getValues();
if (values == null) {
throw new TrinoException(SHEETS_TABLE_LOAD_ERROR, "No non-empty cells found in sheet: " + sheetExpression);
}
return values;
}
catch (IOException e) {
// TODO: improve error to a {Table|Sheet}NotFoundException
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't understand this TODO

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

See comment from @ebyhr here: #15829 (comment)

I did not want to change the error code returned from SHEETS_UNKNOWN_TABLE_ERROR to NOT_FOUND since this would be backwards incompatible.

Updated the comment to make the reasoning more clear.

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

thanks, now i can read the comment at least :)

however, i don't understand why we would want to throw "not found" within IOException catch block
to me, the current SHEETS_UNKNOWN_TABLE_ERROR looks fine (the IOException is just some error)

if we want to detect that IOException conveys "specific sheet doesn't exist", we could do that (but we wouldnt' say TableNotFound even then, because the requested sheet may not be a table)

if it was up to me to decide, and if no new information is provided to me, i would just delete this TODO comment and worry about that later, and only if we learn later there is something to worry about

// is a backwards incompatible error code change from SHEETS_UNKNOWN_TABLE_ERROR -> NOT_FOUND
throw new TrinoException(SHEETS_UNKNOWN_TABLE_ERROR, "Failed reading data from sheet: " + sheetExpression, e);
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ public class SheetsConfig
{
private Optional<String> credentialsFilePath = Optional.empty();
private Optional<String> credentialsKey = Optional.empty();
private String metadataSheetId;
private Optional<String> metadataSheetId = Optional.empty();
private int sheetsDataMaxCacheSize = 1000;
private Duration sheetsDataExpireAfterWrite = new Duration(5, TimeUnit.MINUTES);
private Duration readTimeout = new Duration(20, TimeUnit.SECONDS); // 20s is the default timeout of com.google.api.client.http.HttpRequest
Expand Down Expand Up @@ -74,7 +74,7 @@ public SheetsConfig setCredentialsKey(String credentialsKey)
}

@NotNull
public String getMetadataSheetId()
public Optional<String> getMetadataSheetId()
{
return metadataSheetId;
}
Expand All @@ -84,7 +84,7 @@ public String getMetadataSheetId()
@ConfigDescription("Metadata sheet id containing table sheet mapping")
public SheetsConfig setMetadataSheetId(String metadataSheetId)
{
this.metadataSheetId = metadataSheetId;
this.metadataSheetId = Optional.ofNullable(metadataSheetId);
return this;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,17 +13,21 @@
*/
package io.trino.plugin.google.sheets;

import com.google.common.collect.ImmutableSet;
import io.airlift.bootstrap.LifeCycleManager;
import io.trino.spi.connector.Connector;
import io.trino.spi.connector.ConnectorMetadata;
import io.trino.spi.connector.ConnectorRecordSetProvider;
import io.trino.spi.connector.ConnectorSession;
import io.trino.spi.connector.ConnectorSplitManager;
import io.trino.spi.connector.ConnectorTransactionHandle;
import io.trino.spi.ptf.ConnectorTableFunction;
import io.trino.spi.transaction.IsolationLevel;

import javax.inject.Inject;

import java.util.Set;

import static io.trino.plugin.google.sheets.SheetsTransactionHandle.INSTANCE;
import static java.util.Objects.requireNonNull;

Expand All @@ -34,18 +38,21 @@ public class SheetsConnector
private final SheetsMetadata metadata;
private final SheetsSplitManager splitManager;
private final SheetsRecordSetProvider recordSetProvider;
private final Set<ConnectorTableFunction> connectorTableFunctions;

@Inject
public SheetsConnector(
LifeCycleManager lifeCycleManager,
SheetsMetadata metadata,
SheetsSplitManager splitManager,
SheetsRecordSetProvider recordSetProvider)
SheetsRecordSetProvider recordSetProvider,
Set<ConnectorTableFunction> connectorTableFunctions)
{
this.lifeCycleManager = requireNonNull(lifeCycleManager, "lifeCycleManager is null");
this.metadata = requireNonNull(metadata, "metadata is null");
this.splitManager = requireNonNull(splitManager, "splitManager is null");
this.recordSetProvider = requireNonNull(recordSetProvider, "recordSetProvider is null");
this.connectorTableFunctions = ImmutableSet.copyOf(requireNonNull(connectorTableFunctions, "connectorTableFunctions is null"));
}

@Override
Expand All @@ -72,6 +79,12 @@ public ConnectorRecordSetProvider getRecordSetProvider()
return recordSetProvider;
}

@Override
public Set<ConnectorTableFunction> getTableFunctions()
{
return connectorTableFunctions;
}

@Override
public final void shutdown()
{
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.trino.plugin.google.sheets;

import io.trino.spi.connector.ConnectorTableHandle;
import io.trino.spi.connector.NotFoundException;
import io.trino.spi.connector.SchemaTableName;
import io.trino.spi.connector.TableNotFoundException;

public sealed interface SheetsConnectorTableHandle
extends ConnectorTableHandle
permits SheetsNamedTableHandle, SheetsSheetTableHandle
{
static NotFoundException tableNotFound(SheetsConnectorTableHandle tableHandle)
{
if (tableHandle instanceof SheetsNamedTableHandle sheetsNamedTableHandle) {
return new TableNotFoundException(new SchemaTableName(sheetsNamedTableHandle.getSchemaName(), sheetsNamedTableHandle.getTableName()));
}
if (tableHandle instanceof SheetsSheetTableHandle sheetsSheetTableHandle) {
return new SheetNotFoundException(sheetsSheetTableHandle.getSheetExpression());
}
throw new IllegalStateException("Found unexpected table handle type " + tableHandle);
}
}
Loading