Skip to content

Commit 366daee

Browse files
adkharatagrawalreetika
authored andcommitted
Enable case-sensitive identifier support for BigQuery connector
1 parent acb5422 commit 366daee

File tree

5 files changed

+39
-3
lines changed

5 files changed

+39
-3
lines changed

presto-bigquery/src/main/java/com/facebook/presto/plugin/bigquery/BigQueryClient.java

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ public class BigQueryClient
5050
private final Optional<String> viewMaterializationProject;
5151
private final Optional<String> viewMaterializationDataset;
5252
private final String tablePrefix = "_pbc_";
53+
private final boolean caseSensitiveNameMatching;
5354

5455
// presto converts the dataset and table names to lower case, while BigQuery is case sensitive
5556
private final ConcurrentMap<TableId, TableId> tableIds = new ConcurrentHashMap<>();
@@ -60,6 +61,7 @@ public class BigQueryClient
6061
this.bigQuery = requireNonNull(bigQuery, "bigQuery is null");
6162
this.viewMaterializationProject = requireNonNull(config.getViewMaterializationProject(), "viewMaterializationProject is null");
6263
this.viewMaterializationDataset = requireNonNull(config.getViewMaterializationDataset(), "viewMaterializationDataset is null");
64+
this.caseSensitiveNameMatching = config.isCaseSensitiveNameMatching();
6365
}
6466

6567
public TableInfo getTable(TableId tableId)
@@ -108,7 +110,7 @@ private void addTableMappingIfNeeded(DatasetId datasetID, Table table)
108110
private Dataset addDataSetMappingIfNeeded(Dataset dataset)
109111
{
110112
DatasetId bigQueryDatasetId = dataset.getDatasetId();
111-
DatasetId prestoDatasetId = DatasetId.of(bigQueryDatasetId.getProject(), bigQueryDatasetId.getDataset().toLowerCase(ENGLISH));
113+
DatasetId prestoDatasetId = DatasetId.of(bigQueryDatasetId.getProject(), bigQueryDatasetId.getDataset());
112114
datasetIds.putIfAbsent(prestoDatasetId, bigQueryDatasetId);
113115
return dataset;
114116
}
@@ -123,7 +125,8 @@ protected TableId createDestinationTable(TableId tableId)
123125

124126
private String createTableName()
125127
{
126-
return format(tablePrefix + "%s", randomUUID().toString().toLowerCase(ENGLISH).replace("-", ""));
128+
String uuid = randomUUID().toString().replace("-", "");
129+
return caseSensitiveNameMatching ? format("%s%s", tablePrefix, uuid) : format("%s%s", tablePrefix, uuid).toLowerCase(ENGLISH);
127130
}
128131

129132
private DatasetId mapIfNeeded(String project, String dataset)

presto-bigquery/src/main/java/com/facebook/presto/plugin/bigquery/BigQueryConfig.java

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ public class BigQueryConfig
3535
private Optional<String> parentProjectId = Optional.empty();
3636
private OptionalInt parallelism = OptionalInt.empty();
3737
private boolean viewsEnabled;
38+
private boolean caseSensitiveNameMatching;
3839
private Optional<String> viewMaterializationProject = Optional.empty();
3940
private Optional<String> viewMaterializationDataset = Optional.empty();
4041
private int maxReadRowsRetries = DEFAULT_MAX_READ_ROWS_RETRIES;
@@ -181,6 +182,22 @@ public BigQueryConfig setMaxReadRowsRetries(int maxReadRowsRetries)
181182
return this;
182183
}
183184

185+
public boolean isCaseSensitiveNameMatching()
186+
{
187+
return caseSensitiveNameMatching;
188+
}
189+
190+
@Config("case-sensitive-name-matching")
191+
@ConfigDescription(
192+
"Case sensitivity for schema and table name matching. " +
193+
"true = preserve case and require exact matches; " +
194+
"false (default) = normalize to lower case and match case-insensitively.")
195+
public BigQueryConfig setCaseSensitiveNameMatching(boolean caseSensitiveNameMatching)
196+
{
197+
this.caseSensitiveNameMatching = caseSensitiveNameMatching;
198+
return this;
199+
}
200+
184201
ReadSessionCreatorConfig createReadSessionCreatorConfig()
185202
{
186203
return new ReadSessionCreatorConfig(

presto-bigquery/src/main/java/com/facebook/presto/plugin/bigquery/BigQueryMetadata.java

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@
5151
import static com.google.cloud.bigquery.TableDefinition.Type.TABLE;
5252
import static com.google.cloud.bigquery.TableDefinition.Type.VIEW;
5353
import static com.google.common.collect.ImmutableList.toImmutableList;
54+
import static java.util.Locale.ENGLISH;
5455
import static java.util.Objects.requireNonNull;
5556
import static java.util.stream.Collectors.toMap;
5657

@@ -63,12 +64,14 @@ public class BigQueryMetadata
6364
private static final Logger log = Logger.get(BigQueryMetadata.class);
6465
private final BigQueryClient bigQueryClient;
6566
private final String projectId;
67+
private final boolean caseSensitiveNameMatching;
6668

6769
@Inject
6870
public BigQueryMetadata(BigQueryClient bigQueryClient, BigQueryConfig config)
6971
{
7072
this.bigQueryClient = bigQueryClient;
7173
this.projectId = config.getProjectId().orElse(bigQueryClient.getProjectId());
74+
this.caseSensitiveNameMatching = config.isCaseSensitiveNameMatching();
7275
}
7376

7477
@Override
@@ -233,4 +236,10 @@ private List<SchemaTableName> listTables(ConnectorSession session, SchemaTablePr
233236
ImmutableList.of(tableName) :
234237
ImmutableList.of(); // table does not exist
235238
}
239+
240+
@Override
241+
public String normalizeIdentifier(ConnectorSession session, String identifier)
242+
{
243+
return caseSensitiveNameMatching ? identifier : identifier.toLowerCase(ENGLISH);
244+
}
236245
}

presto-bigquery/src/test/java/com/facebook/presto/plugin/bigquery/TestBigQueryConfig.java

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,8 @@ public void testDefaults()
3636
.setParallelism(20)
3737
.setViewMaterializationProject("vmproject")
3838
.setViewMaterializationDataset("vmdataset")
39-
.setMaxReadRowsRetries(10);
39+
.setMaxReadRowsRetries(10)
40+
.setCaseSensitiveNameMatching(false);
4041

4142
assertEquals(config.getCredentialsKey(), Optional.of("ckey"));
4243
assertEquals(config.getCredentialsFile(), Optional.of("cfile"));
@@ -46,6 +47,7 @@ public void testDefaults()
4647
assertEquals(config.getViewMaterializationProject(), Optional.of("vmproject"));
4748
assertEquals(config.getViewMaterializationDataset(), Optional.of("vmdataset"));
4849
assertEquals(config.getMaxReadRowsRetries(), 10);
50+
assertEquals(config.isCaseSensitiveNameMatching(), false);
4951
}
5052

5153
@Test
@@ -59,6 +61,7 @@ public void testExplicitPropertyMappingsWithCredentialsKey()
5961
.put("bigquery.view-materialization-project", "vmproject")
6062
.put("bigquery.view-materialization-dataset", "vmdataset")
6163
.put("bigquery.max-read-rows-retries", "10")
64+
.put("case-sensitive-name-matching", "true")
6265
.build();
6366

6467
ConfigurationFactory configurationFactory = new ConfigurationFactory(properties);
@@ -71,6 +74,7 @@ public void testExplicitPropertyMappingsWithCredentialsKey()
7174
assertEquals(config.getViewMaterializationProject(), Optional.of("vmproject"));
7275
assertEquals(config.getViewMaterializationDataset(), Optional.of("vmdataset"));
7376
assertEquals(config.getMaxReadRowsRetries(), 10);
77+
assertEquals(config.isCaseSensitiveNameMatching(), true);
7478
}
7579

7680
@Test

presto-docs/src/main/sphinx/connector/bigquery.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,9 @@ Property Description
137137
``bigquery.max-read-rows-retries`` The number of retries in case of retryable server issues ``3``
138138
``bigquery.credentials-key`` credentials key (base64 encoded) None. See `authentication <#authentication>`_
139139
``bigquery.credentials-file`` JSON credentials file path None. See `authentication <#authentication>`_
140+
``case-sensitive-name-matching`` Enable case sensitive identifier support for schema and table ``false``
141+
names for the connector. When disabled, names are matched
142+
case-insensitively using lowercase normalization.
140143
========================================= ============================================================== ==============================================
141144

142145
Data Types

0 commit comments

Comments
 (0)