diff --git a/hudi-common/src/test/java/org/apache/hudi/common/util/SchemaTestUtil.java b/hudi-common/src/test/java/org/apache/hudi/common/util/SchemaTestUtil.java index 2b3de27595540..18d6b73e2d318 100644 --- a/hudi-common/src/test/java/org/apache/hudi/common/util/SchemaTestUtil.java +++ b/hudi-common/src/test/java/org/apache/hudi/common/util/SchemaTestUtil.java @@ -169,6 +169,10 @@ public static Schema getComplexEvolvedSchema() throws IOException { return new Schema.Parser().parse(SchemaTestUtil.class.getResourceAsStream("/complex-test-evolved.avsc")); } + public static Schema getTimestampEvolvedSchema() throws IOException { + return new Schema.Parser().parse(SchemaTestUtil.class.getResourceAsStream("/timestamp-test-evolved.avsc")); + } + public static GenericRecord generateAvroRecordFromJson(Schema schema, int recordNumber, String commitTime, String fileId) throws IOException { TestRecord record = new TestRecord(commitTime, recordNumber, fileId); diff --git a/hudi-common/src/test/resources/timestamp-test-evolved.avsc b/hudi-common/src/test/resources/timestamp-test-evolved.avsc new file mode 100644 index 0000000000000..421c6722cd842 --- /dev/null +++ b/hudi-common/src/test/resources/timestamp-test-evolved.avsc @@ -0,0 +1,26 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +{ + "namespace": "example.avro", + "type": "record", + "name": "User", + "fields": [ + {"name": "field1", "type": ["null", "string"], "default": null}, + {"name": "createTime", "type": ["null", "string"], "default": null} + ] +} \ No newline at end of file diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/keygen/TimestampBasedKeyGenerator.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/keygen/TimestampBasedKeyGenerator.java index 7964a45dafa6a..6ed493d74b5e9 100644 --- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/keygen/TimestampBasedKeyGenerator.java +++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/keygen/TimestampBasedKeyGenerator.java @@ -51,6 +51,10 @@ enum TimestampType implements Serializable { private final String outputDateFormat; + // TimeZone detailed settings reference + // https://docs.oracle.com/javase/8/docs/api/java/util/TimeZone.html + private final TimeZone timeZone; + /** * Supported configs. */ @@ -62,6 +66,8 @@ static class Config { "hoodie.deltastreamer.keygen.timebased.input.dateformat"; private static final String TIMESTAMP_OUTPUT_DATE_FORMAT_PROP = "hoodie.deltastreamer.keygen.timebased.output.dateformat"; + private static final String TIMESTAMP_TIMEZONE_FORMAT_PROP = + "hoodie.deltastreamer.keygen.timebased.timezone"; } public TimestampBasedKeyGenerator(TypedProperties config) { @@ -70,12 +76,13 @@ public TimestampBasedKeyGenerator(TypedProperties config) { Arrays.asList(Config.TIMESTAMP_TYPE_FIELD_PROP, Config.TIMESTAMP_OUTPUT_DATE_FORMAT_PROP)); this.timestampType = TimestampType.valueOf(config.getString(Config.TIMESTAMP_TYPE_FIELD_PROP)); this.outputDateFormat = config.getString(Config.TIMESTAMP_OUTPUT_DATE_FORMAT_PROP); + this.timeZone = TimeZone.getTimeZone(config.getString(Config.TIMESTAMP_TIMEZONE_FORMAT_PROP, "GMT")); if (timestampType == TimestampType.DATE_STRING || timestampType == TimestampType.MIXED) { DataSourceUtils.checkRequiredProperties(config, Collections.singletonList(Config.TIMESTAMP_INPUT_DATE_FORMAT_PROP)); this.inputDateFormat = new SimpleDateFormat(config.getString(Config.TIMESTAMP_INPUT_DATE_FORMAT_PROP)); - this.inputDateFormat.setTimeZone(TimeZone.getTimeZone("GMT")); + this.inputDateFormat.setTimeZone(timeZone); } } @@ -83,7 +90,7 @@ public TimestampBasedKeyGenerator(TypedProperties config) { public HoodieKey getKey(GenericRecord record) { Object partitionVal = DataSourceUtils.getNestedFieldVal(record, partitionPathField); SimpleDateFormat partitionPathFormat = new SimpleDateFormat(outputDateFormat); - partitionPathFormat.setTimeZone(TimeZone.getTimeZone("GMT")); + partitionPathFormat.setTimeZone(timeZone); try { long unixTime; diff --git a/hudi-utilities/src/test/java/org/apache/hudi/utilities/TestTimestampBasedKeyGenerator.java b/hudi-utilities/src/test/java/org/apache/hudi/utilities/TestTimestampBasedKeyGenerator.java new file mode 100644 index 0000000000000..cb0c82244ef0b --- /dev/null +++ b/hudi-utilities/src/test/java/org/apache/hudi/utilities/TestTimestampBasedKeyGenerator.java @@ -0,0 +1,83 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hudi.utilities; + +import org.apache.avro.Schema; +import org.apache.avro.generic.GenericRecord; +import org.apache.hudi.DataSourceWriteOptions; +import org.apache.hudi.common.model.HoodieKey; +import org.apache.hudi.common.util.SchemaTestUtil; +import org.apache.hudi.common.util.TypedProperties; +import org.apache.hudi.utilities.keygen.TimestampBasedKeyGenerator; +import org.junit.Before; +import org.junit.Test; + +import java.io.IOException; + +import static org.junit.Assert.assertEquals; + +public class TestTimestampBasedKeyGenerator { + private Schema schema; + private GenericRecord baseRecord; + private TypedProperties properties = new TypedProperties(); + + @Before + public void initialize() throws IOException { + schema = SchemaTestUtil.getTimestampEvolvedSchema(); + baseRecord = SchemaTestUtil + .generateAvroRecordFromJson(schema, 1, "001", "f1"); + + properties.setProperty(DataSourceWriteOptions.RECORDKEY_FIELD_OPT_KEY(), "field1"); + properties.setProperty(DataSourceWriteOptions.PARTITIONPATH_FIELD_OPT_KEY(), "createTime"); + properties.setProperty(DataSourceWriteOptions.HIVE_STYLE_PARTITIONING_OPT_KEY(), "false"); + } + + private TypedProperties getBaseKeyConfig(String timestampType, String dateFormat, String timezone) { + properties.setProperty("hoodie.deltastreamer.keygen.timebased.timestamp.type", timestampType); + properties.setProperty("hoodie.deltastreamer.keygen.timebased.output.dateformat", dateFormat); + properties.setProperty("hoodie.deltastreamer.keygen.timebased.timezone", timezone); + return properties; + } + + @Test + public void testTimestampBasedKeyGenerator() { + // timezone is GMT+8:00 + baseRecord.put("createTime", 1578283932000L); + properties = getBaseKeyConfig("EPOCHMILLISECONDS", "yyyy-MM-dd hh", "GMT+8:00"); + HoodieKey hk1 = new TimestampBasedKeyGenerator(properties).getKey(baseRecord); + assertEquals(hk1.getPartitionPath(), "2020-01-06 12"); + + // timezone is GMT + properties = getBaseKeyConfig("EPOCHMILLISECONDS", "yyyy-MM-dd hh", "GMT"); + HoodieKey hk2 = new TimestampBasedKeyGenerator(properties).getKey(baseRecord); + assertEquals(hk2.getPartitionPath(), "2020-01-06 04"); + + // timestamp is DATE_STRING, timezone is GMT+8:00 + baseRecord.put("createTime", "2020-01-06 12:12:12"); + properties = getBaseKeyConfig("DATE_STRING", "yyyy-MM-dd hh", "GMT+8:00"); + properties.setProperty("hoodie.deltastreamer.keygen.timebased.input.dateformat", "yyyy-MM-dd hh:mm:ss"); + HoodieKey hk3 = new TimestampBasedKeyGenerator(properties).getKey(baseRecord); + assertEquals(hk3.getPartitionPath(), "2020-01-06 12"); + + // timezone is GMT + properties = getBaseKeyConfig("DATE_STRING", "yyyy-MM-dd hh", "GMT"); + HoodieKey hk4 = new TimestampBasedKeyGenerator(properties).getKey(baseRecord); + assertEquals(hk4.getPartitionPath(), "2020-01-06 12"); + } +}