Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add columnNameCharacterMap to LoadJobConfiguration #3356

Merged
merged 3 commits into from
Jun 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ public final class LoadJobConfiguration extends JobConfiguration implements Load

private final List<String> sourceUris;
private final String fileSetSpecType;
private final String columnNameCharacterMap;
private final TableId destinationTable;
private final List<String> decimalTargetTypes;
private final EncryptionConfiguration destinationEncryptionConfiguration;
Expand Down Expand Up @@ -69,6 +70,8 @@ public static final class Builder extends JobConfiguration.Builder<LoadJobConfig

private List<String> sourceUris;
private String fileSetSpecType;
private String columnNameCharacterMap;

private TableId destinationTable;
private List<String> decimalTargetTypes;
private EncryptionConfiguration destinationEncryptionConfiguration;
Expand Down Expand Up @@ -110,6 +113,7 @@ private Builder(LoadJobConfiguration loadConfiguration) {
this.ignoreUnknownValues = loadConfiguration.ignoreUnknownValues;
this.sourceUris = loadConfiguration.sourceUris;
this.fileSetSpecType = loadConfiguration.fileSetSpecType;
this.columnNameCharacterMap = loadConfiguration.columnNameCharacterMap;
this.schemaUpdateOptions = loadConfiguration.schemaUpdateOptions;
this.autodetect = loadConfiguration.autodetect;
this.destinationEncryptionConfiguration =
Expand Down Expand Up @@ -181,6 +185,9 @@ private Builder(com.google.api.services.bigquery.model.JobConfiguration configur
if (loadConfigurationPb.getFileSetSpecType() != null) {
this.fileSetSpecType = loadConfigurationPb.getFileSetSpecType();
}
if (loadConfigurationPb.getColumnNameCharacterMap() != null) {
this.columnNameCharacterMap = loadConfigurationPb.getColumnNameCharacterMap();
}
if (loadConfigurationPb.getSchemaUpdateOptions() != null) {
ImmutableList.Builder<JobInfo.SchemaUpdateOption> schemaUpdateOptionsBuilder =
new ImmutableList.Builder<>();
Expand Down Expand Up @@ -323,6 +330,20 @@ public Builder setFileSetSpecType(String fileSetSpecType) {
return this;
}

/**
* [Optional] Character map supported for column names in CSV/Parquet loads. Defaults to STRICT
* and can be overridden by Project Config Service. Using this option with unsupporting load
* formats will result in an error.
*
* @see <a
* href="https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#columnnamecharactermap">
* ColumnNameCharacterMap</a>
*/
public Builder setColumnNameCharacterMap(String columnNameCharacterMap) {
this.columnNameCharacterMap = columnNameCharacterMap;
return this;
}

/**
* Defines the list of possible SQL data types to which the source decimal values are converted.
* This list and the precision and the scale parameters of the decimal field determine the
Expand Down Expand Up @@ -421,6 +442,7 @@ private LoadJobConfiguration(Builder builder) {
super(builder);
this.sourceUris = builder.sourceUris;
this.fileSetSpecType = builder.fileSetSpecType;
this.columnNameCharacterMap = builder.columnNameCharacterMap;
this.destinationTable = builder.destinationTable;
this.decimalTargetTypes = builder.decimalTargetTypes;
this.createDisposition = builder.createDisposition;
Expand Down Expand Up @@ -519,6 +541,17 @@ public String getFileSetSpecType() {
return fileSetSpecType;
}

/**
* Returns the column name character map used in CSV/Parquet loads.
*
* @see <a
* href="https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#columnnamecharactermap">
* ColumnNameCharacterMap</a>
*/
public String getColumnNameCharacterMap() {
return columnNameCharacterMap;
}

public List<String> getDecimalTargetTypes() {
return decimalTargetTypes;
}
Expand Down Expand Up @@ -598,6 +631,7 @@ ToStringHelper toStringHelper() {
.add("ignoreUnknownValue", ignoreUnknownValues)
.add("sourceUris", sourceUris)
.add("fileSetSpecType", fileSetSpecType)
.add("columnNameCharacterMap", columnNameCharacterMap)
.add("schemaUpdateOptions", schemaUpdateOptions)
.add("autodetect", autodetect)
.add("timePartitioning", timePartitioning)
Expand Down Expand Up @@ -681,6 +715,9 @@ com.google.api.services.bigquery.model.JobConfiguration toPb() {
if (fileSetSpecType != null) {
loadConfigurationPb.setFileSetSpecType(fileSetSpecType);
}
if (columnNameCharacterMap != null) {
loadConfigurationPb.setColumnNameCharacterMap(columnNameCharacterMap);
}
if (decimalTargetTypes != null) {
loadConfigurationPb.setDecimalTargetTypes(ImmutableList.copyOf(decimalTargetTypes));
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,7 @@ public class LoadJobConfigurationTest {
.setWriteDisposition(WRITE_DISPOSITION)
.setFormatOptions(CSV_OPTIONS)
.setFileSetSpecType("FILE_SET_SPEC_TYPE_FILE_SYSTEM_MATCH")
.setColumnNameCharacterMap("STRICT")
.setIgnoreUnknownValues(IGNORE_UNKNOWN_VALUES)
.setMaxBadRecords(MAX_BAD_RECORDS)
.setSchema(TABLE_SCHEMA)
Expand Down Expand Up @@ -242,6 +243,7 @@ private void compareLoadJobConfiguration(
assertEquals(expected, value);
assertEquals(expected.hashCode(), value.hashCode());
assertEquals(expected.getFileSetSpecType(), value.getFileSetSpecType());
assertEquals(expected.getColumnNameCharacterMap(), value.getColumnNameCharacterMap());
assertEquals(expected.toString(), value.toString());
assertEquals(expected.getDestinationTable(), value.getDestinationTable());
assertEquals(expected.getDecimalTargetTypes(), value.getDecimalTargetTypes());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -586,6 +586,8 @@ public class ITBigQueryTest {
RangePartitioning.newBuilder().setField("IntegerField").setRange(RANGE).build();
private static final String LOAD_FILE = "load.csv";
private static final String LOAD_FILE_LARGE = "load_large.csv";

private static final String LOAD_FILE_FLEXIBLE_COLUMN_NAME = "load_flexible_column_name.csv";
private static final String JSON_LOAD_FILE = "load.json";
private static final String JSON_LOAD_FILE_BQ_RESULTSET = "load_bq_resultset.json";
private static final String JSON_LOAD_FILE_SIMPLE = "load_simple.json";
Expand All @@ -601,6 +603,7 @@ public class ITBigQueryTest {
private static final TableId TABLE_ID_FASTQUERY_BQ_RESULTSET =
TableId.of(DATASET, "fastquery_testing_bq_resultset");
private static final String CSV_CONTENT = "StringValue1\nStringValue2\n";
private static final String CSV_CONTENT_FLEXIBLE_COLUMN = "name,&ampersand\nrow_name,1";

private static final String JSON_CONTENT =
"{"
Expand Down Expand Up @@ -1019,6 +1022,11 @@ public static void beforeClass() throws InterruptedException, IOException {
storage.create(
BlobInfo.newBuilder(BUCKET, LOAD_FILE).setContentType("text/plain").build(),
CSV_CONTENT.getBytes(StandardCharsets.UTF_8));
storage.create(
BlobInfo.newBuilder(BUCKET, LOAD_FILE_FLEXIBLE_COLUMN_NAME)
.setContentType("text/plain")
.build(),
CSV_CONTENT_FLEXIBLE_COLUMN.getBytes(StandardCharsets.UTF_8));
storage.create(
BlobInfo.newBuilder(BUCKET, JSON_LOAD_FILE).setContentType("application/json").build(),
JSON_CONTENT.getBytes(StandardCharsets.UTF_8));
Expand Down Expand Up @@ -6934,4 +6942,60 @@ public void testQueryExportStatistics() throws InterruptedException {
assertEquals(1L, queryStatistics.getExportDataStats().getFileCount().longValue());
assertEquals(3L, queryStatistics.getExportDataStats().getRowCount().longValue());
}

@Test
public void testLoadConfigurationFlexibleColumnName() throws InterruptedException {
// See https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#columnnamecharactermap for
// mapping.

// Test v1 mapping.
String v1TableName = "flexible_column_name_data_testing_table_v1";
TableId v1TableId = TableId.of(DATASET, v1TableName);
try {
LoadJobConfiguration loadJobConfigurationV1 =
LoadJobConfiguration.newBuilder(
v1TableId,
"gs://" + BUCKET + "/" + LOAD_FILE_FLEXIBLE_COLUMN_NAME,
FormatOptions.csv())
.setCreateDisposition(JobInfo.CreateDisposition.CREATE_IF_NEEDED)
.setAutodetect(true)
.setColumnNameCharacterMap("V1")
.build();
Job jobV1 = bigquery.create(JobInfo.of(loadJobConfigurationV1));
jobV1 = jobV1.waitFor();
assertNull(jobV1.getStatus().getError());

Table remoteTableV1 = bigquery.getTable(DATASET, v1TableName);
assertNotNull(remoteTableV1);
assertEquals(
"_ampersand", remoteTableV1.getDefinition().getSchema().getFields().get(1).getName());
} finally {
bigquery.delete(v1TableId);
}

// Test v2 mapping.
String v2TableName = "flexible_column_name_data_testing_table_v2";
TableId v2TableId = TableId.of(DATASET, v2TableName);
try {
LoadJobConfiguration loadJobConfigurationV2 =
LoadJobConfiguration.newBuilder(
v2TableId,
"gs://" + BUCKET + "/" + LOAD_FILE_FLEXIBLE_COLUMN_NAME,
FormatOptions.csv())
.setCreateDisposition(JobInfo.CreateDisposition.CREATE_IF_NEEDED)
.setAutodetect(true)
.setColumnNameCharacterMap("V2")
.build();
Job jobV2 = bigquery.create(JobInfo.of(loadJobConfigurationV2));
jobV2 = jobV2.waitFor();
assertNull(jobV2.getStatus().getError());

Table remoteTableV2 = bigquery.getTable(DATASET, v2TableName);
assertNotNull(remoteTableV2);
assertEquals(
"&ampersand", remoteTableV2.getDefinition().getSchema().getFields().get(1).getName());
} finally {
bigquery.delete(v2TableId);
}
}
}
Loading