Skip to content

Commit 736a940

Browse files
[HUDI-1274] Make hive synchronization supports hourly partition (#2122)
1 parent e109a61 commit 736a940

File tree

2 files changed

+105
-0
lines changed

2 files changed

+105
-0
lines changed
Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*/
18+
19+
package org.apache.hudi.hive;
20+
21+
import org.joda.time.DateTime;
22+
import org.joda.time.format.DateTimeFormat;
23+
import org.joda.time.format.DateTimeFormatter;
24+
25+
import java.util.Collections;
26+
import java.util.List;
27+
28+
/**
29+
* HDFS Path contain hive partition values for the keys it is partitioned on. This mapping is not straight forward and
30+
* requires a pluggable implementation to extract the partition value from HDFS path.
31+
* <p>
32+
* This implementation extracts datestr=yyyy-mm-dd-HH from path of type /yyyy/mm/dd/HH
33+
*/
34+
public class SlashEncodedHourPartitionValueExtractor implements PartitionValueExtractor {
35+
36+
private static final long serialVersionUID = 1L;
37+
private transient DateTimeFormatter dtfOut;
38+
39+
public SlashEncodedHourPartitionValueExtractor() {
40+
this.dtfOut = DateTimeFormat.forPattern("yyyy-MM-dd-HH");
41+
}
42+
43+
private DateTimeFormatter getDtfOut() {
44+
if (dtfOut == null) {
45+
dtfOut = DateTimeFormat.forPattern("yyyy-MM-dd-HH");
46+
}
47+
return dtfOut;
48+
}
49+
50+
@Override
51+
public List<String> extractPartitionValuesInPath(String partitionPath) {
52+
// partition path is expected to be in this format yyyy/mm/dd/HH
53+
String[] splits = partitionPath.split("/");
54+
if (splits.length != 4) {
55+
throw new IllegalArgumentException("Partition path " + partitionPath + " is not in the form yyyy/mm/dd/HH");
56+
}
57+
//Hive style partitions need to contain '='
58+
int year = Integer.parseInt(splits[0].contains("=") ? splits[0].split("=")[1] : splits[0]);
59+
int mm = Integer.parseInt(splits[1].contains("=") ? splits[1].split("=")[1] : splits[1]);
60+
int dd = Integer.parseInt(splits[2].contains("=") ? splits[2].split("=")[1] : splits[2]);
61+
int hh = Integer.parseInt(splits[3].contains("=") ? splits[3].split("=")[1] : splits[3]);
62+
63+
DateTime dateTime = new DateTime(year, mm, dd, hh, 0);
64+
65+
return Collections.singletonList(getDtfOut().print(dateTime));
66+
}
67+
}
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*/
18+
19+
package org.apache.hudi.hive;
20+
21+
import org.junit.jupiter.api.Test;
22+
import java.util.ArrayList;
23+
import java.util.List;
24+
25+
import static org.junit.jupiter.api.Assertions.assertEquals;
26+
import static org.junit.jupiter.api.Assertions.assertThrows;
27+
28+
public class TestPartitionValueExtractor {
29+
@Test
30+
public void testHourPartition() {
31+
SlashEncodedHourPartitionValueExtractor hourPartition = new SlashEncodedHourPartitionValueExtractor();
32+
List<String> list = new ArrayList<>();
33+
list.add("2020-12-20-01");
34+
assertEquals(hourPartition.extractPartitionValuesInPath("2020/12/20/01"), list);
35+
assertThrows(IllegalArgumentException.class, () -> hourPartition.extractPartitionValuesInPath("2020/12/20"));
36+
assertEquals(hourPartition.extractPartitionValuesInPath("update_time=2020/12/20/01"), list);
37+
}
38+
}

0 commit comments

Comments
 (0)