-
Notifications
You must be signed in to change notification settings - Fork 29k
[SPARK-9853][Core][Follow-up] Regularize all the shuffle configurations related to adaptive execution #26147
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -349,53 +349,43 @@ object SQLConf { | |
| .checkValue(_ > 0, "The value of spark.sql.shuffle.partitions must be positive") | ||
| .createWithDefault(200) | ||
|
|
||
| val ADAPTIVE_EXECUTION_ENABLED = buildConf("spark.sql.adaptive.enabled") | ||
| .doc("When true, enable adaptive query execution.") | ||
| .booleanConf | ||
| .createWithDefault(false) | ||
|
|
||
| val SHUFFLE_TARGET_POSTSHUFFLE_INPUT_SIZE = | ||
| buildConf("spark.sql.adaptive.shuffle.targetPostShuffleInputSize") | ||
| .doc("The target post-shuffle input size in bytes of a task.") | ||
| .bytesConf(ByteUnit.BYTE) | ||
| .createWithDefault(64 * 1024 * 1024) | ||
|
|
||
|
|
||
| val FETCH_SHUFFLE_BLOCKS_IN_BATCH_ENABLED = | ||
xuanyuanking marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| buildConf("spark.sql.adaptive.fetchShuffleBlocksInBatch.enabled") | ||
| buildConf("spark.sql.adaptive.shuffle.fetchShuffleBlocksInBatch.enabled") | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It seems we need to improve the documentation because it doesn't seem to support the old shuffle service:
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We should improve the error message as well. cc @xuanyuanking
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks for reporting this, fix it in #26663. |
||
| .doc("Whether to fetch the continuous shuffle blocks in batch. Instead of fetching blocks " + | ||
| "one by one, fetching continuous shuffle blocks for the same map task in batch can " + | ||
| "reduce IO and improve performance. Note, this feature also depends on a relocatable " + | ||
| "serializer and the concatenation support codec in use.") | ||
| .booleanConf | ||
| .createWithDefault(true) | ||
|
|
||
| val ADAPTIVE_EXECUTION_ENABLED = buildConf("spark.sql.adaptive.enabled") | ||
| .doc("When true, enable adaptive query execution.") | ||
| .booleanConf | ||
| .createWithDefault(false) | ||
|
|
||
| val NON_EMPTY_PARTITION_RATIO_FOR_BROADCAST_JOIN = | ||
| buildConf("spark.sql.adaptive.nonEmptyPartitionRatioForBroadcastJoin") | ||
| .doc("The relation with a non-empty partition ratio lower than this config will not be " + | ||
| "considered as the build side of a broadcast-hash join in adaptive execution regardless " + | ||
| "of its size.") | ||
| .doubleConf | ||
| .checkValue(_ >= 0, "The non-empty partition ratio must be positive number.") | ||
| .createWithDefault(0.2) | ||
|
|
||
| val REDUCE_POST_SHUFFLE_PARTITIONS_ENABLED = | ||
| buildConf("spark.sql.adaptive.reducePostShufflePartitions.enabled") | ||
| buildConf("spark.sql.adaptive.shuffle.reducePostShufflePartitions.enabled") | ||
| .doc("When true and adaptive execution is enabled, this enables reducing the number of " + | ||
| "post-shuffle partitions based on map output statistics.") | ||
| .booleanConf | ||
| .createWithDefault(true) | ||
|
|
||
| val SHUFFLE_MIN_NUM_POSTSHUFFLE_PARTITIONS = | ||
| buildConf("spark.sql.adaptive.minNumPostShufflePartitions") | ||
| buildConf("spark.sql.adaptive.shuffle.minNumPostShufflePartitions") | ||
| .doc("The advisory minimum number of post-shuffle partitions used in adaptive execution.") | ||
| .intConf | ||
| .checkValue(_ > 0, "The minimum shuffle partition number " + | ||
| "must be a positive integer.") | ||
| .createWithDefault(1) | ||
|
|
||
| val SHUFFLE_MAX_NUM_POSTSHUFFLE_PARTITIONS = | ||
| buildConf("spark.sql.adaptive.maxNumPostShufflePartitions") | ||
| buildConf("spark.sql.adaptive.shuffle.maxNumPostShufflePartitions") | ||
| .doc("The advisory maximum number of post-shuffle partitions used in adaptive execution. " + | ||
| "This is used as the initial number of pre-shuffle partitions. By default it equals to " + | ||
| "spark.sql.shuffle.partitions") | ||
|
|
@@ -405,13 +395,22 @@ object SQLConf { | |
| .createOptional | ||
|
|
||
| val OPTIMIZE_LOCAL_SHUFFLE_READER_ENABLED = | ||
| buildConf("spark.sql.adaptive.optimizedLocalShuffleReader.enabled") | ||
| buildConf("spark.sql.adaptive.shuffle.optimizedLocalShuffleReader.enabled") | ||
| .doc("When true and adaptive execution is enabled, this enables the optimization of" + | ||
| " converting the shuffle reader to local shuffle reader for the shuffle exchange" + | ||
| " of the broadcast hash join in probe side.") | ||
| .booleanConf | ||
| .createWithDefault(true) | ||
|
|
||
| val NON_EMPTY_PARTITION_RATIO_FOR_BROADCAST_JOIN = | ||
| buildConf("spark.sql.adaptive.nonEmptyPartitionRatioForBroadcastJoin") | ||
| .doc("The relation with a non-empty partition ratio lower than this config will not be " + | ||
| "considered as the build side of a broadcast-hash join in adaptive execution regardless " + | ||
| "of its size.") | ||
| .doubleConf | ||
| .checkValue(_ >= 0, "The non-empty partition ratio must be positive number.") | ||
| .createWithDefault(0.2) | ||
|
|
||
| val SUBEXPRESSION_ELIMINATION_ENABLED = | ||
| buildConf("spark.sql.subexpressionElimination.enabled") | ||
| .internal() | ||
|
|
@@ -2148,21 +2147,18 @@ class SQLConf extends Serializable with Logging { | |
|
|
||
| def numShufflePartitions: Int = getConf(SHUFFLE_PARTITIONS) | ||
|
|
||
| def targetPostShuffleInputSize: Long = | ||
| getConf(SHUFFLE_TARGET_POSTSHUFFLE_INPUT_SIZE) | ||
| def adaptiveExecutionEnabled: Boolean = getConf(ADAPTIVE_EXECUTION_ENABLED) | ||
|
|
||
| def fetchShuffleBlocksInBatchEnabled: Boolean = | ||
| getConf(FETCH_SHUFFLE_BLOCKS_IN_BATCH_ENABLED) | ||
| def targetPostShuffleInputSize: Long = getConf(SHUFFLE_TARGET_POSTSHUFFLE_INPUT_SIZE) | ||
|
|
||
| def adaptiveExecutionEnabled: Boolean = getConf(ADAPTIVE_EXECUTION_ENABLED) | ||
| def fetchShuffleBlocksInBatchEnabled: Boolean = getConf(FETCH_SHUFFLE_BLOCKS_IN_BATCH_ENABLED) | ||
|
|
||
| def nonEmptyPartitionRatioForBroadcastJoin: Double = | ||
| getConf(NON_EMPTY_PARTITION_RATIO_FOR_BROADCAST_JOIN) | ||
|
|
||
| def reducePostShufflePartitionsEnabled: Boolean = getConf(REDUCE_POST_SHUFFLE_PARTITIONS_ENABLED) | ||
|
|
||
| def minNumPostShufflePartitions: Int = | ||
| getConf(SHUFFLE_MIN_NUM_POSTSHUFFLE_PARTITIONS) | ||
| def minNumPostShufflePartitions: Int = getConf(SHUFFLE_MIN_NUM_POSTSHUFFLE_PARTITIONS) | ||
|
|
||
| def maxNumPostShufflePartitions: Int = | ||
| getConf(SHUFFLE_MAX_NUM_POSTSHUFFLE_PARTITIONS).getOrElse(numShufflePartitions) | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We should update/add the corresponding description for all the SQLConf that are affected by this conf. Otherwise, end users might not know the relation between these confs.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks, done in #26664.