From 2227d98a76c74d94538a57467fe4d72f0a0daeae Mon Sep 17 00:00:00 2001 From: yuezhang Date: Tue, 14 Dec 2021 15:53:30 +0800 Subject: [PATCH 1/4] add SparkRegexMatchPartitionsClusteringPlanStrategy --- ...MatchPartitionsClusteringPlanStrategy.java | 70 +++++++++++++++++ ...SparkRegexMatchClusteringPlanStrategy.java | 75 +++++++++++++++++++ 2 files changed, 145 insertions(+) create mode 100644 hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/clustering/plan/strategy/SparkRegexMatchPartitionsClusteringPlanStrategy.java create mode 100644 hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/clustering/plan/strategy/TestSparkRegexMatchClusteringPlanStrategy.java diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/clustering/plan/strategy/SparkRegexMatchPartitionsClusteringPlanStrategy.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/clustering/plan/strategy/SparkRegexMatchPartitionsClusteringPlanStrategy.java new file mode 100644 index 0000000000000..7dfb50e133eed --- /dev/null +++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/clustering/plan/strategy/SparkRegexMatchPartitionsClusteringPlanStrategy.java @@ -0,0 +1,70 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hudi.client.clustering.plan.strategy; + +import static org.apache.hudi.config.HoodieClusteringConfig.CLUSTERING_STRATEGY_PARAM_PREFIX; + +import org.apache.hudi.client.common.HoodieSparkEngineContext; +import org.apache.hudi.common.model.HoodieRecordPayload; +import org.apache.hudi.common.util.StringUtils; +import org.apache.hudi.common.util.ValidationUtils; +import org.apache.hudi.config.HoodieWriteConfig; +import org.apache.hudi.table.HoodieSparkCopyOnWriteTable; +import org.apache.hudi.table.HoodieSparkMergeOnReadTable; +import org.apache.log4j.LogManager; +import org.apache.log4j.Logger; + +import java.util.List; +import java.util.regex.Pattern; +import java.util.stream.Collectors; + +/** + * Clustering Strategy to filter just specified partitions from regex pattern. + */ +public class SparkRegexMatchPartitionsClusteringPlanStrategy> + extends SparkSizeBasedClusteringPlanStrategy { + private static final Logger LOG = LogManager.getLogger(SparkRegexMatchPartitionsClusteringPlanStrategy.class); + + public static final String CONF_REGEX_PATTERN = CLUSTERING_STRATEGY_PARAM_PREFIX + "cluster.partition.regex.pattern"; + + public SparkRegexMatchPartitionsClusteringPlanStrategy(HoodieSparkCopyOnWriteTable table, + HoodieSparkEngineContext engineContext, + HoodieWriteConfig writeConfig) { + super(table, engineContext, writeConfig); + } + + public SparkRegexMatchPartitionsClusteringPlanStrategy(HoodieSparkMergeOnReadTable table, + HoodieSparkEngineContext engineContext, + HoodieWriteConfig writeConfig) { + super(table, engineContext, writeConfig); + } + + @Override + protected List filterPartitionPaths(List partitionPaths) { + String partitionRegexPattern = getWriteConfig().getProps().getProperty(CONF_REGEX_PATTERN); + + ValidationUtils.checkArgument(!StringUtils.isNullOrEmpty(partitionRegexPattern), + "Please set " + CONF_REGEX_PATTERN + " when using " + this.getClass().getName()); + List filteredPartitions = partitionPaths.stream() + .filter(partition -> Pattern.matches(partitionRegexPattern, partition)) + .collect(Collectors.toList()); + LOG.info("Filtered to the following partitions: " + filteredPartitions); + return filteredPartitions; + } +} diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/clustering/plan/strategy/TestSparkRegexMatchClusteringPlanStrategy.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/clustering/plan/strategy/TestSparkRegexMatchClusteringPlanStrategy.java new file mode 100644 index 0000000000000..8f339ec11315d --- /dev/null +++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/clustering/plan/strategy/TestSparkRegexMatchClusteringPlanStrategy.java @@ -0,0 +1,75 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hudi.client.clustering.plan.strategy; + +import org.apache.hudi.client.common.HoodieSparkEngineContext; +import org.apache.hudi.config.HoodieClusteringConfig; +import org.apache.hudi.config.HoodieWriteConfig; +import org.apache.hudi.table.HoodieSparkCopyOnWriteTable; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.mockito.Mock; + +import java.util.ArrayList; +import java.util.List; +import java.util.Properties; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; + +public class TestSparkRegexMatchClusteringPlanStrategy { + @Mock + HoodieSparkCopyOnWriteTable table; + @Mock + HoodieSparkEngineContext context; + HoodieWriteConfig hoodieWriteConfig; + + @BeforeEach + public void setUp() { + Properties props = new Properties(); + props.setProperty("hoodie.clustering.plan.strategy.cluster.partition.regex.pattern", "2021072.*"); + this.hoodieWriteConfig = HoodieWriteConfig + .newBuilder() + .withPath("Fake_Table_Path") + .withClusteringConfig(HoodieClusteringConfig + .newBuilder() + .withClusteringPlanStrategyClass("org.apache.hudi.client.clustering.plan.strategy.SparkRegexMatchPartitionsClusteringPlanStrategy") + .fromProperties(props) + .build()) + .build(); + } + + @Test + public void testFilterPartitionPaths() { + SparkRegexMatchPartitionsClusteringPlanStrategy sg = new SparkRegexMatchPartitionsClusteringPlanStrategy(table, context, hoodieWriteConfig); + + ArrayList fakeTimeBasedPartitionsPath = new ArrayList<>(); + fakeTimeBasedPartitionsPath.add("20210718"); + fakeTimeBasedPartitionsPath.add("20210723"); + fakeTimeBasedPartitionsPath.add("20210716"); + fakeTimeBasedPartitionsPath.add("20210719"); + fakeTimeBasedPartitionsPath.add("20210721"); + + List list = sg.filterPartitionPaths(fakeTimeBasedPartitionsPath); + + assertEquals(2, list.size()); + assertTrue(list.contains("20210721")); + assertTrue(list.contains("20210723")); + } +} From 581a84a74410db60a691c73a8937fd11b8409f22 Mon Sep 17 00:00:00 2001 From: yuezhang Date: Tue, 28 Dec 2021 18:03:23 +0800 Subject: [PATCH 2/4] code review --- .../hudi/config/HoodieClusteringConfig.java | 11 +++ .../apache/hudi/config/HoodieWriteConfig.java | 4 ++ .../PartitionAwareClusteringPlanStrategy.java | 14 ++++ ...PartitionAwareClusteringPlanStrategy.java} | 42 ++++++++--- ...MatchPartitionsClusteringPlanStrategy.java | 70 ------------------- 5 files changed, 60 insertions(+), 81 deletions(-) rename hudi-client/{hudi-spark-client/src/test/java/org/apache/hudi/client/clustering/plan/strategy/TestSparkRegexMatchClusteringPlanStrategy.java => hudi-client-common/src/test/java/org/apache/hudi/table/action/cluster/strategy/TestPartitionAwareClusteringPlanStrategy.java} (64%) delete mode 100644 hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/clustering/plan/strategy/SparkRegexMatchPartitionsClusteringPlanStrategy.java diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieClusteringConfig.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieClusteringConfig.java index 4f80b6608f3e6..d787969638bc3 100644 --- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieClusteringConfig.java +++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieClusteringConfig.java @@ -70,6 +70,12 @@ public class HoodieClusteringConfig extends HoodieConfig { .sinceVersion("0.7.0") .withDocumentation("Files smaller than the size specified here are candidates for clustering"); + public static final ConfigProperty PARTITION_REGEX_PATTERN = ConfigProperty + .key(CLUSTERING_STRATEGY_PARAM_PREFIX + "cluster.partition.regex.pattern") + .noDefaultValue() + .sinceVersion("0.11.0") + .withDocumentation("Filter clustering partitions that matched regex pattern"); + public static final ConfigProperty PLAN_STRATEGY_CLASS_NAME = ConfigProperty .key("hoodie.clustering.plan.strategy.class") .defaultValue(SPARK_SIZED_BASED_CLUSTERING_PLAN_STRATEGY) @@ -391,6 +397,11 @@ public Builder withClusteringTargetPartitions(int clusteringTargetPartitions) { return this; } + public Builder withClusteringPartitionRegexPattern(String pattern) { + clusteringConfig.setValue(PARTITION_REGEX_PATTERN, pattern); + return this; + } + public Builder withClusteringSkipPartitionsFromLatest(int clusteringSkipPartitionsFromLatest) { clusteringConfig.setValue(PLAN_STRATEGY_SKIP_PARTITIONS_FROM_LATEST, String.valueOf(clusteringSkipPartitionsFromLatest)); return this; diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieWriteConfig.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieWriteConfig.java index 3571da17231cb..dfebfd98a0383 100644 --- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieWriteConfig.java +++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieWriteConfig.java @@ -1226,6 +1226,10 @@ public long getClusteringSmallFileLimit() { return getLong(HoodieClusteringConfig.PLAN_STRATEGY_SMALL_FILE_LIMIT); } + public String getClusteringPartitionFilterRegexPattern() { + return getString(HoodieClusteringConfig.PARTITION_REGEX_PATTERN); + } + public int getClusteringMaxNumGroups() { return getInt(HoodieClusteringConfig.PLAN_STRATEGY_MAX_GROUPS); } diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/cluster/strategy/PartitionAwareClusteringPlanStrategy.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/cluster/strategy/PartitionAwareClusteringPlanStrategy.java index 4d916362fa4e9..63e7bb6ebf98d 100644 --- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/cluster/strategy/PartitionAwareClusteringPlanStrategy.java +++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/cluster/strategy/PartitionAwareClusteringPlanStrategy.java @@ -27,12 +27,14 @@ import org.apache.hudi.common.model.HoodieRecordPayload; import org.apache.hudi.common.table.HoodieTableMetaClient; import org.apache.hudi.common.util.Option; +import org.apache.hudi.common.util.StringUtils; import org.apache.hudi.config.HoodieWriteConfig; import org.apache.hudi.table.HoodieTable; import org.apache.log4j.LogManager; import org.apache.log4j.Logger; import java.util.List; +import java.util.regex.Pattern; import java.util.stream.Collectors; import java.util.stream.Stream; @@ -66,6 +68,8 @@ public Option generateClusteringPlan() { HoodieWriteConfig config = getWriteConfig(); List partitionPaths = FSUtils.getAllPartitionPaths(getEngineContext(), config.getMetadataConfig(), metaClient.getBasePath()); + // get regex matched partitions if set + partitionPaths = getRegexPatternMatchedPartitions(config, partitionPaths); // filter the partition paths if needed to reduce list status partitionPaths = filterPartitionPaths(partitionPaths); @@ -104,4 +108,14 @@ public Option generateClusteringPlan() { .setPreserveHoodieMetadata(getWriteConfig().isPreserveHoodieCommitMetadata()) .build()); } + + public List getRegexPatternMatchedPartitions(HoodieWriteConfig config, List partitionPaths) { + String pattern = config.getClusteringPartitionFilterRegexPattern(); + if (!StringUtils.isNullOrEmpty(pattern)) { + partitionPaths = partitionPaths.stream() + .filter(partition -> Pattern.matches(pattern, partition)) + .collect(Collectors.toList()); + } + return partitionPaths; + } } diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/clustering/plan/strategy/TestSparkRegexMatchClusteringPlanStrategy.java b/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/table/action/cluster/strategy/TestPartitionAwareClusteringPlanStrategy.java similarity index 64% rename from hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/clustering/plan/strategy/TestSparkRegexMatchClusteringPlanStrategy.java rename to hudi-client/hudi-client-common/src/test/java/org/apache/hudi/table/action/cluster/strategy/TestPartitionAwareClusteringPlanStrategy.java index 8f339ec11315d..878d93b89685e 100644 --- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/clustering/plan/strategy/TestSparkRegexMatchClusteringPlanStrategy.java +++ b/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/table/action/cluster/strategy/TestPartitionAwareClusteringPlanStrategy.java @@ -16,28 +16,33 @@ * limitations under the License. */ -package org.apache.hudi.client.clustering.plan.strategy; +package org.apache.hudi.table.action.cluster.strategy; -import org.apache.hudi.client.common.HoodieSparkEngineContext; +import org.apache.hudi.avro.model.HoodieClusteringGroup; +import org.apache.hudi.common.engine.HoodieEngineContext; import org.apache.hudi.config.HoodieClusteringConfig; import org.apache.hudi.config.HoodieWriteConfig; -import org.apache.hudi.table.HoodieSparkCopyOnWriteTable; +import org.apache.hudi.table.HoodieTable; + import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; import org.mockito.Mock; import java.util.ArrayList; import java.util.List; +import java.util.Map; import java.util.Properties; +import java.util.stream.Stream; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertTrue; -public class TestSparkRegexMatchClusteringPlanStrategy { +public class TestPartitionAwareClusteringPlanStrategy { + @Mock - HoodieSparkCopyOnWriteTable table; + HoodieTable table; @Mock - HoodieSparkEngineContext context; + HoodieEngineContext context; HoodieWriteConfig hoodieWriteConfig; @BeforeEach @@ -46,10 +51,9 @@ public void setUp() { props.setProperty("hoodie.clustering.plan.strategy.cluster.partition.regex.pattern", "2021072.*"); this.hoodieWriteConfig = HoodieWriteConfig .newBuilder() - .withPath("Fake_Table_Path") + .withPath("dummy_Table_Path") .withClusteringConfig(HoodieClusteringConfig .newBuilder() - .withClusteringPlanStrategyClass("org.apache.hudi.client.clustering.plan.strategy.SparkRegexMatchPartitionsClusteringPlanStrategy") .fromProperties(props) .build()) .build(); @@ -57,7 +61,7 @@ public void setUp() { @Test public void testFilterPartitionPaths() { - SparkRegexMatchPartitionsClusteringPlanStrategy sg = new SparkRegexMatchPartitionsClusteringPlanStrategy(table, context, hoodieWriteConfig); + PartitionAwareClusteringPlanStrategy sg = new DummyPartitionAwareClusteringPlanStrategy(table, context, hoodieWriteConfig); ArrayList fakeTimeBasedPartitionsPath = new ArrayList<>(); fakeTimeBasedPartitionsPath.add("20210718"); @@ -66,10 +70,26 @@ public void testFilterPartitionPaths() { fakeTimeBasedPartitionsPath.add("20210719"); fakeTimeBasedPartitionsPath.add("20210721"); - List list = sg.filterPartitionPaths(fakeTimeBasedPartitionsPath); - + List list = sg.getRegexPatternMatchedPartitions(hoodieWriteConfig, fakeTimeBasedPartitionsPath); assertEquals(2, list.size()); assertTrue(list.contains("20210721")); assertTrue(list.contains("20210723")); } + + class DummyPartitionAwareClusteringPlanStrategy extends PartitionAwareClusteringPlanStrategy { + + public DummyPartitionAwareClusteringPlanStrategy(HoodieTable table, HoodieEngineContext engineContext, HoodieWriteConfig writeConfig) { + super(table, engineContext, writeConfig); + } + + @Override + protected Stream buildClusteringGroupsForPartition(String partitionPath, List list) { + return null; + } + + @Override + protected Map getStrategyParams() { + return null; + } + } } diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/clustering/plan/strategy/SparkRegexMatchPartitionsClusteringPlanStrategy.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/clustering/plan/strategy/SparkRegexMatchPartitionsClusteringPlanStrategy.java deleted file mode 100644 index 7dfb50e133eed..0000000000000 --- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/clustering/plan/strategy/SparkRegexMatchPartitionsClusteringPlanStrategy.java +++ /dev/null @@ -1,70 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hudi.client.clustering.plan.strategy; - -import static org.apache.hudi.config.HoodieClusteringConfig.CLUSTERING_STRATEGY_PARAM_PREFIX; - -import org.apache.hudi.client.common.HoodieSparkEngineContext; -import org.apache.hudi.common.model.HoodieRecordPayload; -import org.apache.hudi.common.util.StringUtils; -import org.apache.hudi.common.util.ValidationUtils; -import org.apache.hudi.config.HoodieWriteConfig; -import org.apache.hudi.table.HoodieSparkCopyOnWriteTable; -import org.apache.hudi.table.HoodieSparkMergeOnReadTable; -import org.apache.log4j.LogManager; -import org.apache.log4j.Logger; - -import java.util.List; -import java.util.regex.Pattern; -import java.util.stream.Collectors; - -/** - * Clustering Strategy to filter just specified partitions from regex pattern. - */ -public class SparkRegexMatchPartitionsClusteringPlanStrategy> - extends SparkSizeBasedClusteringPlanStrategy { - private static final Logger LOG = LogManager.getLogger(SparkRegexMatchPartitionsClusteringPlanStrategy.class); - - public static final String CONF_REGEX_PATTERN = CLUSTERING_STRATEGY_PARAM_PREFIX + "cluster.partition.regex.pattern"; - - public SparkRegexMatchPartitionsClusteringPlanStrategy(HoodieSparkCopyOnWriteTable table, - HoodieSparkEngineContext engineContext, - HoodieWriteConfig writeConfig) { - super(table, engineContext, writeConfig); - } - - public SparkRegexMatchPartitionsClusteringPlanStrategy(HoodieSparkMergeOnReadTable table, - HoodieSparkEngineContext engineContext, - HoodieWriteConfig writeConfig) { - super(table, engineContext, writeConfig); - } - - @Override - protected List filterPartitionPaths(List partitionPaths) { - String partitionRegexPattern = getWriteConfig().getProps().getProperty(CONF_REGEX_PATTERN); - - ValidationUtils.checkArgument(!StringUtils.isNullOrEmpty(partitionRegexPattern), - "Please set " + CONF_REGEX_PATTERN + " when using " + this.getClass().getName()); - List filteredPartitions = partitionPaths.stream() - .filter(partition -> Pattern.matches(partitionRegexPattern, partition)) - .collect(Collectors.toList()); - LOG.info("Filtered to the following partitions: " + filteredPartitions); - return filteredPartitions; - } -} From b044a184215c5e858c7bee055466c30c4c651a52 Mon Sep 17 00:00:00 2001 From: yuezhang Date: Mon, 10 Jan 2022 16:15:34 +0800 Subject: [PATCH 3/4] code review --- .../java/org/apache/hudi/config/HoodieClusteringConfig.java | 2 +- .../strategy/TestPartitionAwareClusteringPlanStrategy.java | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieClusteringConfig.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieClusteringConfig.java index 39f6033623e84..057b4a6f61299 100644 --- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieClusteringConfig.java +++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieClusteringConfig.java @@ -88,7 +88,7 @@ public class HoodieClusteringConfig extends HoodieConfig { .withDocumentation("Files smaller than the size specified here are candidates for clustering"); public static final ConfigProperty PARTITION_REGEX_PATTERN = ConfigProperty - .key(CLUSTERING_STRATEGY_PARAM_PREFIX + "cluster.partition.regex.pattern") + .key(CLUSTERING_STRATEGY_PARAM_PREFIX + "partition.regex.pattern") .noDefaultValue() .sinceVersion("0.11.0") .withDocumentation("Filter clustering partitions that matched regex pattern"); diff --git a/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/table/action/cluster/strategy/TestPartitionAwareClusteringPlanStrategy.java b/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/table/action/cluster/strategy/TestPartitionAwareClusteringPlanStrategy.java index 878d93b89685e..717b8b4f9ba8a 100644 --- a/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/table/action/cluster/strategy/TestPartitionAwareClusteringPlanStrategy.java +++ b/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/table/action/cluster/strategy/TestPartitionAwareClusteringPlanStrategy.java @@ -48,7 +48,7 @@ public class TestPartitionAwareClusteringPlanStrategy { @BeforeEach public void setUp() { Properties props = new Properties(); - props.setProperty("hoodie.clustering.plan.strategy.cluster.partition.regex.pattern", "2021072.*"); + props.setProperty("hoodie.clustering.plan.strategy.partition.regex.pattern", "2021072.*"); this.hoodieWriteConfig = HoodieWriteConfig .newBuilder() .withPath("dummy_Table_Path") @@ -61,7 +61,7 @@ public void setUp() { @Test public void testFilterPartitionPaths() { - PartitionAwareClusteringPlanStrategy sg = new DummyPartitionAwareClusteringPlanStrategy(table, context, hoodieWriteConfig); + PartitionAwareClusteringPlanStrategy strategyTestRegexPattern = new DummyPartitionAwareClusteringPlanStrategy(table, context, hoodieWriteConfig); ArrayList fakeTimeBasedPartitionsPath = new ArrayList<>(); fakeTimeBasedPartitionsPath.add("20210718"); @@ -70,7 +70,7 @@ public void testFilterPartitionPaths() { fakeTimeBasedPartitionsPath.add("20210719"); fakeTimeBasedPartitionsPath.add("20210721"); - List list = sg.getRegexPatternMatchedPartitions(hoodieWriteConfig, fakeTimeBasedPartitionsPath); + List list = strategyTestRegexPattern.getRegexPatternMatchedPartitions(hoodieWriteConfig, fakeTimeBasedPartitionsPath); assertEquals(2, list.size()); assertTrue(list.contains("20210721")); assertTrue(list.contains("20210723")); From a91258dc075469c45cca1b878ef1c1df892c8c1a Mon Sep 17 00:00:00 2001 From: yuezhang Date: Mon, 10 Jan 2022 16:41:24 +0800 Subject: [PATCH 4/4] code review --- .../strategy/TestPartitionAwareClusteringPlanStrategy.java | 1 + 1 file changed, 1 insertion(+) diff --git a/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/table/action/cluster/strategy/TestPartitionAwareClusteringPlanStrategy.java b/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/table/action/cluster/strategy/TestPartitionAwareClusteringPlanStrategy.java index 717b8b4f9ba8a..a053a9611050c 100644 --- a/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/table/action/cluster/strategy/TestPartitionAwareClusteringPlanStrategy.java +++ b/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/table/action/cluster/strategy/TestPartitionAwareClusteringPlanStrategy.java @@ -65,6 +65,7 @@ public void testFilterPartitionPaths() { ArrayList fakeTimeBasedPartitionsPath = new ArrayList<>(); fakeTimeBasedPartitionsPath.add("20210718"); + fakeTimeBasedPartitionsPath.add("20210715"); fakeTimeBasedPartitionsPath.add("20210723"); fakeTimeBasedPartitionsPath.add("20210716"); fakeTimeBasedPartitionsPath.add("20210719");