Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -77,14 +77,6 @@ case class BaseFileOnlyRelation(override val sqlContext: SQLContext,
override def updatePrunedDataSchema(prunedSchema: StructType): Relation =
this.copy(prunedDataSchema = Some(prunedSchema))

override def imbueConfigs(sqlContext: SQLContext): Unit = {
super.imbueConfigs(sqlContext)
// TODO Issue with setting this to true in spark 332
if (HoodieSparkUtils.gteqSpark3_4 || !HoodieSparkUtils.gteqSpark3_3_2) {
sqlContext.sparkSession.sessionState.conf.setConfString("spark.sql.parquet.enableVectorizedReader", "true")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should not use contains.
spark always inject this config.

The reason why this place is forcibly set to true is because the parent class of BaseFileOnlyRelationship previously forced set spark.sql.parquet.enableVectorizedReader=false. users unable to turn on vectorization, resulting in a significant decrease in query performance. So at that time, it was forcibly set to true in this location.

however https://issues.apache.org/jira/browse/HUDI-3639 This PR removes the behavior of the parent class of BaseFileOnlyRelationship.
Therefore, we no longer need to force set spark.sql.parquet.enableVectorizedReader=true in BaseFileOnlyRelationship; Whether to enable vectorization is left to the user for decision

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

just remove line 83. to line 85

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for reminder. Updated.

}
}

protected override def composeRDD(fileSplits: Seq[HoodieBaseFileSplit],
tableSchema: HoodieTableSchema,
requiredSchema: HoodieTableSchema,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -67,12 +67,6 @@ case class HoodieBootstrapMORRelation(override val sqlContext: SQLContext,

override lazy val mandatoryFields: Seq[String] = mandatoryFieldsForMerging

override def imbueConfigs(sqlContext: SQLContext): Unit = {
super.imbueConfigs(sqlContext)
sqlContext.sparkSession.sessionState.conf.setConfString("spark.sql.parquet.enableVectorizedReader", "true")
}


protected override def getFileSlices(partitionFilters: Seq[Expression], dataFilters: Seq[Expression]): Seq[FileSlice] = {
if (globPaths.isEmpty) {
fileIndex.listFileSlices(HoodieFileIndex.
Expand Down