diff --git a/google-cloud-bigquery/src/main/java/com/google/cloud/bigquery/HivePartitioningOptions.java b/google-cloud-bigquery/src/main/java/com/google/cloud/bigquery/HivePartitioningOptions.java new file mode 100644 index 000000000..6c831db0d --- /dev/null +++ b/google-cloud-bigquery/src/main/java/com/google/cloud/bigquery/HivePartitioningOptions.java @@ -0,0 +1,130 @@ +/* + * Copyright 2020 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.google.cloud.bigquery; + +import com.google.common.base.MoreObjects; +import java.util.Objects; + +/** HivePartitioningOptions currently supported types include: AVRO, CSV, JSON, ORC and Parquet. */ +public final class HivePartitioningOptions { + + private final String mode; + private final String sourceUriPrefix; + + public static final class Builder { + + private String mode; + private String sourceUriPrefix; + + private Builder() {} + + private Builder(HivePartitioningOptions options) { + this.mode = options.mode; + this.sourceUriPrefix = options.sourceUriPrefix; + } + + /** + * [Optional] When set, what mode of hive partitioning to use when reading data. Two modes are + * supported. (1) AUTO: automatically infer partition key name(s) and type(s). (2) STRINGS: + * automatically infer partition key name(s). All types are interpreted as strings. Not all + * storage formats support hive partitioning. Requesting hive partitioning on an unsupported + * format will lead to an error. Currently supported types include: AVRO, CSV, JSON, ORC and + * Parquet. + */ + public Builder setMode(String mode) { + this.mode = mode; + return this; + } + + /** + * [Optional] When hive partition detection is requested, a common prefix for all source uris + * should be supplied. The prefix must end immediately before the partition key encoding begins. + * For example, consider files following this data layout. + * gs://bucket/path_to_table/dt=2019-01-01/country=BR/id=7/file.avro + * gs://bucket/path_to_table/dt=2018-12-31/country=CA/id=3/file.avro When hive partitioning is + * requested with either AUTO or STRINGS detection, the common prefix can be either of + * gs://bucket/path_to_table or gs://bucket/path_to_table/ (trailing slash does not matter). + */ + public Builder setSourceUriPrefix(String sourceUriPrefix) { + this.sourceUriPrefix = sourceUriPrefix; + return this; + } + + /** Creates a {@link HivePartitioningOptions} object. */ + public HivePartitioningOptions build() { + return new HivePartitioningOptions(this); + } + } + + private HivePartitioningOptions(Builder builder) { + this.mode = builder.mode; + this.sourceUriPrefix = builder.sourceUriPrefix; + } + + /* Returns the mode of hive partitioning */ + public String getMode() { + return mode; + } + + /* Returns the sourceUriPrefix of hive partitioning */ + public String getSourceUriPrefix() { + return sourceUriPrefix; + } + + /** Returns a builder for the {@link HivePartitioningOptions} object. */ + public Builder toBuilder() { + return new Builder(this); + } + + /** Returns a builder for the {@link HivePartitioningOptions} object. */ + public static Builder newBuilder() { + return new Builder(); + } + + @Override + public String toString() { + return MoreObjects.toStringHelper(this) + .add("mode", mode) + .add("sourceUriPrefix", sourceUriPrefix) + .toString(); + } + + @Override + public int hashCode() { + return Objects.hash(mode, sourceUriPrefix); + } + + com.google.api.services.bigquery.model.HivePartitioningOptions toPb() { + com.google.api.services.bigquery.model.HivePartitioningOptions options = + new com.google.api.services.bigquery.model.HivePartitioningOptions(); + options.setMode(mode); + options.setSourceUriPrefix(sourceUriPrefix); + return options; + } + + static HivePartitioningOptions fromPb( + com.google.api.services.bigquery.model.HivePartitioningOptions options) { + Builder builder = newBuilder(); + if (options.getMode() != null) { + builder.setMode(options.getMode()); + } + if (options.getSourceUriPrefix() != null) { + builder.setSourceUriPrefix(options.getSourceUriPrefix()); + } + return builder.build(); + } +} diff --git a/google-cloud-bigquery/src/main/java/com/google/cloud/bigquery/LoadJobConfiguration.java b/google-cloud-bigquery/src/main/java/com/google/cloud/bigquery/LoadJobConfiguration.java index c68e3f3b9..0eae67bd6 100644 --- a/google-cloud-bigquery/src/main/java/com/google/cloud/bigquery/LoadJobConfiguration.java +++ b/google-cloud-bigquery/src/main/java/com/google/cloud/bigquery/LoadJobConfiguration.java @@ -54,6 +54,7 @@ public final class LoadJobConfiguration extends JobConfiguration implements Load private final Map labels; private final Long jobTimeoutMs; private final RangePartitioning rangePartitioning; + private final HivePartitioningOptions hivePartitioningOptions; public static final class Builder extends JobConfiguration.Builder implements LoadConfiguration.Builder { @@ -77,6 +78,7 @@ public static final class Builder extends JobConfiguration.Builder labels; private Long jobTimeoutMs; private RangePartitioning rangePartitioning; + private HivePartitioningOptions hivePartitioningOptions; private Builder() { super(Type.LOAD); @@ -103,6 +105,7 @@ private Builder(LoadJobConfiguration loadConfiguration) { this.labels = loadConfiguration.labels; this.jobTimeoutMs = loadConfiguration.jobTimeoutMs; this.rangePartitioning = loadConfiguration.rangePartitioning; + this.hivePartitioningOptions = loadConfiguration.hivePartitioningOptions; } private Builder(com.google.api.services.bigquery.model.JobConfiguration configurationPb) { @@ -186,6 +189,10 @@ private Builder(com.google.api.services.bigquery.model.JobConfiguration configur this.rangePartitioning = RangePartitioning.fromPb(loadConfigurationPb.getRangePartitioning()); } + if (loadConfigurationPb.getHivePartitioningOptions() != null) { + this.hivePartitioningOptions = + HivePartitioningOptions.fromPb(loadConfigurationPb.getHivePartitioningOptions()); + } } @Override @@ -319,6 +326,11 @@ public Builder setRangePartitioning(RangePartitioning rangePartitioning) { return this; } + public Builder setHivePartitioningOptions(HivePartitioningOptions hivePartitioningOptions) { + this.hivePartitioningOptions = hivePartitioningOptions; + return this; + } + @Override public LoadJobConfiguration build() { return new LoadJobConfiguration(this); @@ -345,6 +357,7 @@ private LoadJobConfiguration(Builder builder) { this.labels = builder.labels; this.jobTimeoutMs = builder.jobTimeoutMs; this.rangePartitioning = builder.rangePartitioning; + this.hivePartitioningOptions = builder.hivePartitioningOptions; } @Override @@ -452,6 +465,10 @@ public RangePartitioning getRangePartitioning() { return rangePartitioning; } + public HivePartitioningOptions getHivePartitioningOptions() { + return hivePartitioningOptions; + } + @Override public Builder toBuilder() { return new Builder(this); @@ -477,7 +494,8 @@ ToStringHelper toStringHelper() { .add("useAvroLogicalTypes", useAvroLogicalTypes) .add("labels", labels) .add("jobTimeoutMs", jobTimeoutMs) - .add("rangePartitioning", rangePartitioning); + .add("rangePartitioning", rangePartitioning) + .add("hivePartitioningOptions", hivePartitioningOptions); } @Override @@ -570,6 +588,9 @@ com.google.api.services.bigquery.model.JobConfiguration toPb() { if (rangePartitioning != null) { loadConfigurationPb.setRangePartitioning(rangePartitioning.toPb()); } + if (hivePartitioningOptions != null) { + loadConfigurationPb.setHivePartitioningOptions(hivePartitioningOptions.toPb()); + } jobConfiguration.setLoad(loadConfigurationPb); return jobConfiguration; } diff --git a/google-cloud-bigquery/src/test/java/com/google/cloud/bigquery/HivePartitioningOptionsTest.java b/google-cloud-bigquery/src/test/java/com/google/cloud/bigquery/HivePartitioningOptionsTest.java new file mode 100644 index 000000000..6c2aa5427 --- /dev/null +++ b/google-cloud-bigquery/src/test/java/com/google/cloud/bigquery/HivePartitioningOptionsTest.java @@ -0,0 +1,69 @@ +/* + * Copyright 2020 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.google.cloud.bigquery; + +import static com.google.common.truth.Truth.assertThat; + +import org.junit.Test; + +public class HivePartitioningOptionsTest { + + private static final String MODE = "STRING"; + private static final String SOURCE_URI_PREFIX = "gs://bucket/path_to_table"; + private static final HivePartitioningOptions HIVE_PARTITIONING_OPTIONS = + HivePartitioningOptions.newBuilder() + .setMode(MODE) + .setSourceUriPrefix(SOURCE_URI_PREFIX) + .build(); + + @Test + public void testToBuilder() { + compareHivePartitioningOptions( + HIVE_PARTITIONING_OPTIONS, HIVE_PARTITIONING_OPTIONS.toBuilder().build()); + HivePartitioningOptions options = HIVE_PARTITIONING_OPTIONS.toBuilder().setMode("AUTO").build(); + assertThat(options.getMode()).isEqualTo("AUTO"); + options = HIVE_PARTITIONING_OPTIONS.toBuilder().setMode(MODE).build(); + compareHivePartitioningOptions(HIVE_PARTITIONING_OPTIONS, options); + } + + @Test + public void testToBuilderIncomplete() { + HivePartitioningOptions options = HivePartitioningOptions.newBuilder().build(); + compareHivePartitioningOptions(options, options.toBuilder().build()); + } + + @Test + public void testBuilder() { + assertThat(HIVE_PARTITIONING_OPTIONS.getMode()).isEqualTo(MODE); + assertThat(HIVE_PARTITIONING_OPTIONS.getSourceUriPrefix()).isEqualTo(SOURCE_URI_PREFIX); + } + + @Test + public void testToAndFromPb() { + compareHivePartitioningOptions( + HIVE_PARTITIONING_OPTIONS, + HivePartitioningOptions.fromPb(HIVE_PARTITIONING_OPTIONS.toPb())); + } + + private void compareHivePartitioningOptions( + HivePartitioningOptions expected, HivePartitioningOptions value) { + assertThat(value.getMode()).isEqualTo(expected.getMode()); + assertThat(value.getSourceUriPrefix()).isEqualTo(expected.getSourceUriPrefix()); + assertThat(value.toString()).isEqualTo(expected.toString()); + assertThat(value.hashCode()).isEqualTo(expected.hashCode()); + } +}