diff --git a/plugin/trino-bigquery/src/main/java/io/trino/plugin/bigquery/BigQueryClient.java b/plugin/trino-bigquery/src/main/java/io/trino/plugin/bigquery/BigQueryClient.java index 5847e2d3f6de..1e900cfef744 100644 --- a/plugin/trino-bigquery/src/main/java/io/trino/plugin/bigquery/BigQueryClient.java +++ b/plugin/trino-bigquery/src/main/java/io/trino/plugin/bigquery/BigQueryClient.java @@ -92,7 +92,6 @@ public class BigQueryClient { private static final Logger log = Logger.get(BigQueryClient.class); - private static final int PAGE_SIZE = 100; // BigQuery has different table_type in `INFORMATION_SCHEMA` than API responses that returns TableDefinition.Type // see https://cloud.google.com/bigquery/docs/information-schema-tables#schema @@ -110,6 +109,7 @@ public class BigQueryClient private final ViewMaterializationCache materializationCache; private final boolean caseInsensitiveNameMatching; private final LoadingCache> remoteDatasetIdCache; + private final int metadataPageSize; private final Cache remoteDatasetCaseInsensitiveCache; private final Cache remoteTableCaseInsensitiveCache; private final Optional configProjectId; @@ -122,6 +122,7 @@ public BigQueryClient( Duration caseInsensitiveNameMatchingCacheTtl, ViewMaterializationCache materializationCache, Duration metadataCacheTtl, + int metadataPageSize, Optional configProjectId) { this.bigQuery = requireNonNull(bigQuery, "bigQuery is null"); @@ -133,6 +134,7 @@ public BigQueryClient( .expireAfterWrite(metadataCacheTtl.toMillis(), MILLISECONDS) .shareNothingWhenDisabled() .build(CacheLoader.from(this::listDatasetIdsFromBigQuery)); + this.metadataPageSize = metadataPageSize; this.remoteDatasetCaseInsensitiveCache = buildCache(caseInsensitiveNameMatchingCacheTtl); this.remoteTableCaseInsensitiveCache = buildCache(caseInsensitiveNameMatchingCacheTtl); this.configProjectId = requireNonNull(configProjectId, "projectId is null"); @@ -323,7 +325,7 @@ public List listDatasetIds(String projectId) private List listDatasetIdsFromBigQuery(String projectId) { // BigQuery.listDatasets returns partial information on each dataset. See javadoc for more details. - return stream(bigQuery.listDatasets(projectId, BigQuery.DatasetListOption.pageSize(PAGE_SIZE)).iterateAll()) + return stream(bigQuery.listDatasets(projectId, BigQuery.DatasetListOption.pageSize(metadataPageSize)).iterateAll()) .map(Dataset::getDatasetId) .collect(toImmutableList()); } @@ -333,7 +335,7 @@ public Iterable listTableIds(DatasetId remoteDatasetId) // BigQuery.listTables returns partial information on each table. See javadoc for more details. Iterable allTables; try { - allTables = bigQuery.listTables(remoteDatasetId, BigQuery.TableListOption.pageSize(PAGE_SIZE)).iterateAll(); + allTables = bigQuery.listTables(remoteDatasetId, BigQuery.TableListOption.pageSize(metadataPageSize)).iterateAll(); } catch (BigQueryException e) { throw new TrinoException(BIGQUERY_LISTING_TABLE_ERROR, "Failed to retrieve tables from BigQuery", e); diff --git a/plugin/trino-bigquery/src/main/java/io/trino/plugin/bigquery/BigQueryClientFactory.java b/plugin/trino-bigquery/src/main/java/io/trino/plugin/bigquery/BigQueryClientFactory.java index 33902e145b69..6f236037c3b2 100644 --- a/plugin/trino-bigquery/src/main/java/io/trino/plugin/bigquery/BigQueryClientFactory.java +++ b/plugin/trino-bigquery/src/main/java/io/trino/plugin/bigquery/BigQueryClientFactory.java @@ -41,6 +41,7 @@ public class BigQueryClientFactory private final NonEvictableCache clientCache; private final Duration metadataCacheTtl; + private final int metadataPageSize; private final Set optionsConfigurers; @Inject @@ -61,6 +62,7 @@ public BigQueryClientFactory( this.materializationCache = requireNonNull(materializationCache, "materializationCache is null"); this.labelFactory = requireNonNull(labelFactory, "labelFactory is null"); this.metadataCacheTtl = bigQueryConfig.getMetadataCacheTtl(); + this.metadataPageSize = bigQueryConfig.getMetadataPageSize(); this.optionsConfigurers = requireNonNull(optionsConfigurers, "optionsConfigurers is null"); CacheBuilder cacheBuilder = CacheBuilder.newBuilder() @@ -85,6 +87,7 @@ protected BigQueryClient createBigQueryClient(ConnectorSession session) caseInsensitiveNameMatchingCacheTtl, materializationCache, metadataCacheTtl, + metadataPageSize, projectId); } diff --git a/plugin/trino-bigquery/src/main/java/io/trino/plugin/bigquery/BigQueryConfig.java b/plugin/trino-bigquery/src/main/java/io/trino/plugin/bigquery/BigQueryConfig.java index 2e7d34c3ad2e..bdca06a7c82a 100644 --- a/plugin/trino-bigquery/src/main/java/io/trino/plugin/bigquery/BigQueryConfig.java +++ b/plugin/trino-bigquery/src/main/java/io/trino/plugin/bigquery/BigQueryConfig.java @@ -52,6 +52,7 @@ public class BigQueryConfig private Optional viewMaterializationProject = Optional.empty(); private Optional viewMaterializationDataset = Optional.empty(); private int maxReadRowsRetries = DEFAULT_MAX_READ_ROWS_RETRIES; + private int metadataPageSize = 1000; private boolean caseInsensitiveNameMatching; private Duration caseInsensitiveNameMatchingCacheTtl = new Duration(0, MILLISECONDS); private Duration viewsCacheTtl = new Duration(15, MINUTES); @@ -64,7 +65,7 @@ public class BigQueryConfig private String queryLabelFormat; private boolean proxyEnabled; private boolean projectionPushDownEnabled = true; - private int metadataParallelism = 2; + private int metadataParallelism = Runtime.getRuntime().availableProcessors(); public Optional getProjectId() { @@ -198,6 +199,20 @@ public BigQueryConfig setMaxReadRowsRetries(int maxReadRowsRetries) return this; } + @Min(1) + public int getMetadataPageSize() + { + return metadataPageSize; + } + + @Config("bigquery.metadata-page-size") + @ConfigDescription("The number of metadata entries retrieved per API request") + public BigQueryConfig setMetadataPageSize(int metadataPageSize) + { + this.metadataPageSize = metadataPageSize; + return this; + } + public boolean isCaseInsensitiveNameMatching() { return caseInsensitiveNameMatching; diff --git a/plugin/trino-bigquery/src/test/java/io/trino/plugin/bigquery/TestBigQueryConfig.java b/plugin/trino-bigquery/src/test/java/io/trino/plugin/bigquery/TestBigQueryConfig.java index 8ba85db57dea..ea45a4b80416 100644 --- a/plugin/trino-bigquery/src/test/java/io/trino/plugin/bigquery/TestBigQueryConfig.java +++ b/plugin/trino-bigquery/src/test/java/io/trino/plugin/bigquery/TestBigQueryConfig.java @@ -42,6 +42,7 @@ public void testDefaults() .setViewMaterializationProject(null) .setViewMaterializationDataset(null) .setMaxReadRowsRetries(3) + .setMetadataPageSize(1000) .setCaseInsensitiveNameMatching(false) .setCaseInsensitiveNameMatchingCacheTtl(new Duration(0, MILLISECONDS)) .setViewsCacheTtl(new Duration(15, MINUTES)) @@ -55,7 +56,7 @@ public void testDefaults() .setQueryLabelFormat(null) .setProxyEnabled(false) .setProjectionPushdownEnabled(true) - .setMetadataParallelism(2)); + .setMetadataParallelism(Runtime.getRuntime().availableProcessors())); } @Test @@ -72,6 +73,7 @@ public void testExplicitPropertyMappingsWithCredentialsKey() .put("bigquery.view-materialization-project", "vmproject") .put("bigquery.view-materialization-dataset", "vmdataset") .put("bigquery.max-read-rows-retries", "10") + .put("bigquery.metadata-page-size", "100") .put("bigquery.case-insensitive-name-matching", "true") .put("bigquery.case-insensitive-name-matching.cache-ttl", "1h") .put("bigquery.views-cache-ttl", "1m") @@ -97,6 +99,7 @@ public void testExplicitPropertyMappingsWithCredentialsKey() .setViewMaterializationProject("vmproject") .setViewMaterializationDataset("vmdataset") .setMaxReadRowsRetries(10) + .setMetadataPageSize(100) .setCaseInsensitiveNameMatching(true) .setCaseInsensitiveNameMatchingCacheTtl(new Duration(1, HOURS)) .setViewsCacheTtl(new Duration(1, MINUTES))