Skip to content

Commit

Permalink
feat: add columnNameCharacterMap to LoadJobConfiguration (#3356)
Browse files Browse the repository at this point in the history
* feat: add columnNameCharacterMap to LoadJobConfiguration

* fix: unintentional empty line delete

* fix: lint/format
  • Loading branch information
PhongChuong authored Jun 22, 2024
1 parent 7b7e52b commit 2f3cbe3
Show file tree
Hide file tree
Showing 3 changed files with 103 additions and 0 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ public final class LoadJobConfiguration extends JobConfiguration implements Load

private final List<String> sourceUris;
private final String fileSetSpecType;
private final String columnNameCharacterMap;
private final TableId destinationTable;
private final List<String> decimalTargetTypes;
private final EncryptionConfiguration destinationEncryptionConfiguration;
Expand Down Expand Up @@ -69,6 +70,8 @@ public static final class Builder extends JobConfiguration.Builder<LoadJobConfig

private List<String> sourceUris;
private String fileSetSpecType;
private String columnNameCharacterMap;

private TableId destinationTable;
private List<String> decimalTargetTypes;
private EncryptionConfiguration destinationEncryptionConfiguration;
Expand Down Expand Up @@ -110,6 +113,7 @@ private Builder(LoadJobConfiguration loadConfiguration) {
this.ignoreUnknownValues = loadConfiguration.ignoreUnknownValues;
this.sourceUris = loadConfiguration.sourceUris;
this.fileSetSpecType = loadConfiguration.fileSetSpecType;
this.columnNameCharacterMap = loadConfiguration.columnNameCharacterMap;
this.schemaUpdateOptions = loadConfiguration.schemaUpdateOptions;
this.autodetect = loadConfiguration.autodetect;
this.destinationEncryptionConfiguration =
Expand Down Expand Up @@ -181,6 +185,9 @@ private Builder(com.google.api.services.bigquery.model.JobConfiguration configur
if (loadConfigurationPb.getFileSetSpecType() != null) {
this.fileSetSpecType = loadConfigurationPb.getFileSetSpecType();
}
if (loadConfigurationPb.getColumnNameCharacterMap() != null) {
this.columnNameCharacterMap = loadConfigurationPb.getColumnNameCharacterMap();
}
if (loadConfigurationPb.getSchemaUpdateOptions() != null) {
ImmutableList.Builder<JobInfo.SchemaUpdateOption> schemaUpdateOptionsBuilder =
new ImmutableList.Builder<>();
Expand Down Expand Up @@ -323,6 +330,20 @@ public Builder setFileSetSpecType(String fileSetSpecType) {
return this;
}

/**
* [Optional] Character map supported for column names in CSV/Parquet loads. Defaults to STRICT
* and can be overridden by Project Config Service. Using this option with unsupporting load
* formats will result in an error.
*
* @see <a
* href="https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#columnnamecharactermap">
* ColumnNameCharacterMap</a>
*/
public Builder setColumnNameCharacterMap(String columnNameCharacterMap) {
this.columnNameCharacterMap = columnNameCharacterMap;
return this;
}

/**
* Defines the list of possible SQL data types to which the source decimal values are converted.
* This list and the precision and the scale parameters of the decimal field determine the
Expand Down Expand Up @@ -421,6 +442,7 @@ private LoadJobConfiguration(Builder builder) {
super(builder);
this.sourceUris = builder.sourceUris;
this.fileSetSpecType = builder.fileSetSpecType;
this.columnNameCharacterMap = builder.columnNameCharacterMap;
this.destinationTable = builder.destinationTable;
this.decimalTargetTypes = builder.decimalTargetTypes;
this.createDisposition = builder.createDisposition;
Expand Down Expand Up @@ -519,6 +541,17 @@ public String getFileSetSpecType() {
return fileSetSpecType;
}

/**
* Returns the column name character map used in CSV/Parquet loads.
*
* @see <a
* href="https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#columnnamecharactermap">
* ColumnNameCharacterMap</a>
*/
public String getColumnNameCharacterMap() {
return columnNameCharacterMap;
}

public List<String> getDecimalTargetTypes() {
return decimalTargetTypes;
}
Expand Down Expand Up @@ -598,6 +631,7 @@ ToStringHelper toStringHelper() {
.add("ignoreUnknownValue", ignoreUnknownValues)
.add("sourceUris", sourceUris)
.add("fileSetSpecType", fileSetSpecType)
.add("columnNameCharacterMap", columnNameCharacterMap)
.add("schemaUpdateOptions", schemaUpdateOptions)
.add("autodetect", autodetect)
.add("timePartitioning", timePartitioning)
Expand Down Expand Up @@ -681,6 +715,9 @@ com.google.api.services.bigquery.model.JobConfiguration toPb() {
if (fileSetSpecType != null) {
loadConfigurationPb.setFileSetSpecType(fileSetSpecType);
}
if (columnNameCharacterMap != null) {
loadConfigurationPb.setColumnNameCharacterMap(columnNameCharacterMap);
}
if (decimalTargetTypes != null) {
loadConfigurationPb.setDecimalTargetTypes(ImmutableList.copyOf(decimalTargetTypes));
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,7 @@ public class LoadJobConfigurationTest {
.setWriteDisposition(WRITE_DISPOSITION)
.setFormatOptions(CSV_OPTIONS)
.setFileSetSpecType("FILE_SET_SPEC_TYPE_FILE_SYSTEM_MATCH")
.setColumnNameCharacterMap("STRICT")
.setIgnoreUnknownValues(IGNORE_UNKNOWN_VALUES)
.setMaxBadRecords(MAX_BAD_RECORDS)
.setSchema(TABLE_SCHEMA)
Expand Down Expand Up @@ -242,6 +243,7 @@ private void compareLoadJobConfiguration(
assertEquals(expected, value);
assertEquals(expected.hashCode(), value.hashCode());
assertEquals(expected.getFileSetSpecType(), value.getFileSetSpecType());
assertEquals(expected.getColumnNameCharacterMap(), value.getColumnNameCharacterMap());
assertEquals(expected.toString(), value.toString());
assertEquals(expected.getDestinationTable(), value.getDestinationTable());
assertEquals(expected.getDecimalTargetTypes(), value.getDecimalTargetTypes());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -586,6 +586,8 @@ public class ITBigQueryTest {
RangePartitioning.newBuilder().setField("IntegerField").setRange(RANGE).build();
private static final String LOAD_FILE = "load.csv";
private static final String LOAD_FILE_LARGE = "load_large.csv";

private static final String LOAD_FILE_FLEXIBLE_COLUMN_NAME = "load_flexible_column_name.csv";
private static final String JSON_LOAD_FILE = "load.json";
private static final String JSON_LOAD_FILE_BQ_RESULTSET = "load_bq_resultset.json";
private static final String JSON_LOAD_FILE_SIMPLE = "load_simple.json";
Expand All @@ -601,6 +603,7 @@ public class ITBigQueryTest {
private static final TableId TABLE_ID_FASTQUERY_BQ_RESULTSET =
TableId.of(DATASET, "fastquery_testing_bq_resultset");
private static final String CSV_CONTENT = "StringValue1\nStringValue2\n";
private static final String CSV_CONTENT_FLEXIBLE_COLUMN = "name,&ampersand\nrow_name,1";

private static final String JSON_CONTENT =
"{"
Expand Down Expand Up @@ -1019,6 +1022,11 @@ public static void beforeClass() throws InterruptedException, IOException {
storage.create(
BlobInfo.newBuilder(BUCKET, LOAD_FILE).setContentType("text/plain").build(),
CSV_CONTENT.getBytes(StandardCharsets.UTF_8));
storage.create(
BlobInfo.newBuilder(BUCKET, LOAD_FILE_FLEXIBLE_COLUMN_NAME)
.setContentType("text/plain")
.build(),
CSV_CONTENT_FLEXIBLE_COLUMN.getBytes(StandardCharsets.UTF_8));
storage.create(
BlobInfo.newBuilder(BUCKET, JSON_LOAD_FILE).setContentType("application/json").build(),
JSON_CONTENT.getBytes(StandardCharsets.UTF_8));
Expand Down Expand Up @@ -6934,4 +6942,60 @@ public void testQueryExportStatistics() throws InterruptedException {
assertEquals(1L, queryStatistics.getExportDataStats().getFileCount().longValue());
assertEquals(3L, queryStatistics.getExportDataStats().getRowCount().longValue());
}

@Test
public void testLoadConfigurationFlexibleColumnName() throws InterruptedException {
// See https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#columnnamecharactermap for
// mapping.

// Test v1 mapping.
String v1TableName = "flexible_column_name_data_testing_table_v1";
TableId v1TableId = TableId.of(DATASET, v1TableName);
try {
LoadJobConfiguration loadJobConfigurationV1 =
LoadJobConfiguration.newBuilder(
v1TableId,
"gs://" + BUCKET + "/" + LOAD_FILE_FLEXIBLE_COLUMN_NAME,
FormatOptions.csv())
.setCreateDisposition(JobInfo.CreateDisposition.CREATE_IF_NEEDED)
.setAutodetect(true)
.setColumnNameCharacterMap("V1")
.build();
Job jobV1 = bigquery.create(JobInfo.of(loadJobConfigurationV1));
jobV1 = jobV1.waitFor();
assertNull(jobV1.getStatus().getError());

Table remoteTableV1 = bigquery.getTable(DATASET, v1TableName);
assertNotNull(remoteTableV1);
assertEquals(
"_ampersand", remoteTableV1.getDefinition().getSchema().getFields().get(1).getName());
} finally {
bigquery.delete(v1TableId);
}

// Test v2 mapping.
String v2TableName = "flexible_column_name_data_testing_table_v2";
TableId v2TableId = TableId.of(DATASET, v2TableName);
try {
LoadJobConfiguration loadJobConfigurationV2 =
LoadJobConfiguration.newBuilder(
v2TableId,
"gs://" + BUCKET + "/" + LOAD_FILE_FLEXIBLE_COLUMN_NAME,
FormatOptions.csv())
.setCreateDisposition(JobInfo.CreateDisposition.CREATE_IF_NEEDED)
.setAutodetect(true)
.setColumnNameCharacterMap("V2")
.build();
Job jobV2 = bigquery.create(JobInfo.of(loadJobConfigurationV2));
jobV2 = jobV2.waitFor();
assertNull(jobV2.getStatus().getError());

Table remoteTableV2 = bigquery.getTable(DATASET, v2TableName);
assertNotNull(remoteTableV2);
assertEquals(
"&ampersand", remoteTableV2.getDefinition().getSchema().getFields().get(1).getName());
} finally {
bigquery.delete(v2TableId);
}
}
}

0 comments on commit 2f3cbe3

Please sign in to comment.