-
Notifications
You must be signed in to change notification settings - Fork 2.5k
[HUDI-3935] Adding config to fallback to enabled Partition Values extraction from Partition path #5377
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[HUDI-3935] Adding config to fallback to enabled Partition Values extraction from Partition path #5377
Changes from all commits
5bce2e1
0a16db7
e63b627
d67ba65
1c27cac
d5591b5
662af81
274bdd0
8554a83
3ec7e0f
d46188d
2596fc2
1a865d9
51333eb
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -18,14 +18,16 @@ | |
| package org.apache.hudi | ||
|
|
||
| import org.apache.hudi.DataSourceReadOptions.{QUERY_TYPE, QUERY_TYPE_READ_OPTIMIZED_OPT_VAL, QUERY_TYPE_SNAPSHOT_OPT_VAL} | ||
| import org.apache.hudi.HoodieConversionUtils.toScalaOption | ||
| import org.apache.hudi.common.config.{ConfigProperty, HoodieConfig} | ||
| import org.apache.hudi.common.fs.ConsistencyGuardConfig | ||
| import org.apache.hudi.common.model.{HoodieTableType, WriteOperationType} | ||
| import org.apache.hudi.common.table.HoodieTableConfig | ||
| import org.apache.hudi.common.util.Option | ||
| import org.apache.hudi.common.util.ValidationUtils.checkState | ||
| import org.apache.hudi.config.{HoodieClusteringConfig, HoodieWriteConfig} | ||
| import org.apache.hudi.hive.util.ConfigUtils | ||
| import org.apache.hudi.hive.{HiveSyncConfig, HiveSyncTool, MultiPartKeysValueExtractor, NonPartitionedExtractor, SlashEncodedDayPartitionValueExtractor} | ||
| import org.apache.hudi.hive.{HiveSyncConfig, HiveSyncTool} | ||
| import org.apache.hudi.keygen.constant.KeyGeneratorOptions | ||
| import org.apache.hudi.keygen.{ComplexKeyGenerator, CustomKeyGenerator, NonpartitionedKeyGenerator, SimpleKeyGenerator} | ||
| import org.apache.hudi.sync.common.HoodieSyncConfig | ||
|
|
@@ -45,6 +47,7 @@ import scala.language.implicitConversions | |
| * Options supported for reading hoodie tables. | ||
| */ | ||
| object DataSourceReadOptions { | ||
| import DataSourceOptionsHelper._ | ||
|
|
||
| val QUERY_TYPE_SNAPSHOT_OPT_VAL = "snapshot" | ||
| val QUERY_TYPE_READ_OPTIMIZED_OPT_VAL = "read_optimized" | ||
|
|
@@ -124,6 +127,15 @@ object DataSourceReadOptions { | |
| .withDocumentation("Enables data-skipping allowing queries to leverage indexes to reduce the search space by " + | ||
| "skipping over files") | ||
|
|
||
| val EXTRACT_PARTITION_VALUES_FROM_PARTITION_PATH: ConfigProperty[Boolean] = | ||
| ConfigProperty.key("hoodie.datasource.read.extract.partition.values.from.path") | ||
| .defaultValue(false) | ||
| .sinceVersion("0.11.0") | ||
| .withDocumentation("When set to true, values for partition columns (partition values) will be extracted" + | ||
| " from physical partition path (default Spark behavior). When set to false partition values will be" + | ||
| " read from the data file (in Hudi partition columns are persisted by default)." + | ||
| " This config is a fallback allowing to preserve existing behavior, and should not be used otherwise.") | ||
|
|
||
| val INCREMENTAL_FALLBACK_TO_FULL_TABLE_SCAN_FOR_NON_EXISTING_FILES: ConfigProperty[String] = ConfigProperty | ||
| .key("hoodie.datasource.read.incr.fallback.fulltablescan.enable") | ||
| .defaultValue("false") | ||
|
|
@@ -185,6 +197,8 @@ object DataSourceReadOptions { | |
| */ | ||
| object DataSourceWriteOptions { | ||
|
|
||
| import DataSourceOptionsHelper._ | ||
|
|
||
| val BULK_INSERT_OPERATION_OPT_VAL = WriteOperationType.BULK_INSERT.value | ||
| val INSERT_OPERATION_OPT_VAL = WriteOperationType.INSERT.value | ||
| val UPSERT_OPERATION_OPT_VAL = WriteOperationType.UPSERT.value | ||
|
|
@@ -471,10 +485,7 @@ object DataSourceWriteOptions { | |
| .sinceVersion("0.9.0") | ||
| .withDocumentation("This class is used by kafka client to deserialize the records") | ||
|
|
||
| val DROP_PARTITION_COLUMNS: ConfigProperty[Boolean] = ConfigProperty | ||
| .key(HoodieTableConfig.DROP_PARTITION_COLUMNS.key()) | ||
| .defaultValue(HoodieTableConfig.DROP_PARTITION_COLUMNS.defaultValue().booleanValue()) | ||
| .withDocumentation(HoodieTableConfig.DROP_PARTITION_COLUMNS.doc()) | ||
| val DROP_PARTITION_COLUMNS: ConfigProperty[Boolean] = HoodieTableConfig.DROP_PARTITION_COLUMNS | ||
|
|
||
| /** @deprecated Use {@link HIVE_ASSUME_DATE_PARTITION} and its methods instead */ | ||
| @Deprecated | ||
|
|
@@ -774,4 +785,23 @@ object DataSourceOptionsHelper { | |
| override def apply (input: From): To = function (input) | ||
| } | ||
| } | ||
|
|
||
| implicit def convert[T, U](prop: ConfigProperty[T])(implicit converter: T => U): ConfigProperty[U] = { | ||
| checkState(prop.hasDefaultValue) | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this could implicitly break when add a new config with no default. i see this improves code quality but we should avoid nice-to-have changes in the last min patch before release.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If this will break it will break when the class is loaded, meaning that all the tests using class would be broken, which is very easy to diagnose |
||
| var newProp: ConfigProperty[U] = ConfigProperty.key(prop.key()) | ||
| .defaultValue(converter(prop.defaultValue())) | ||
| .withDocumentation(prop.doc()) | ||
| .withAlternatives(prop.getAlternatives.asScala: _*) | ||
|
|
||
| newProp = toScalaOption(prop.getSinceVersion) match { | ||
| case Some(version) => newProp.sinceVersion(version) | ||
| case None => newProp | ||
| } | ||
| newProp = toScalaOption(prop.getDeprecatedVersion) match { | ||
| case Some(version) => newProp.deprecatedAfter(version) | ||
| case None => newProp | ||
| } | ||
|
|
||
| newProp | ||
| } | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
having "write" in the name makes is clear. If not, one could read it as "should drop partition columns when reading". So, I feel we can leave it as is.