Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,6 @@

package org.apache.spark.sql.execution.datasources.orc

import java.util.Locale

import scala.collection.JavaConverters._

import org.apache.hadoop.conf.Configuration
Expand All @@ -29,7 +27,7 @@ import org.apache.spark.SparkException
import org.apache.spark.deploy.SparkHadoopUtil
import org.apache.spark.internal.Logging
import org.apache.spark.sql.SparkSession
import org.apache.spark.sql.catalyst.analysis.caseSensitiveResolution
import org.apache.spark.sql.catalyst.analysis.{caseInsensitiveResolution, caseSensitiveResolution}
import org.apache.spark.sql.catalyst.parser.CatalystSqlParser
import org.apache.spark.sql.types._

Expand Down Expand Up @@ -117,29 +115,16 @@ object OrcUtils extends Logging {
}
})
} else {
if (isCaseSensitive) {
Some(requiredSchema.fieldNames.map { name =>
orcFieldNames.indexWhere(caseSensitiveResolution(_, name))
})
} else {
// Do case-insensitive resolution only if in case-insensitive mode
val caseInsensitiveOrcFieldMap =
orcFieldNames.zipWithIndex.groupBy(_._1.toLowerCase(Locale.ROOT))
Some(requiredSchema.fieldNames.map { requiredFieldName =>
caseInsensitiveOrcFieldMap
.get(requiredFieldName.toLowerCase(Locale.ROOT))
.map { matchedOrcFields =>
if (matchedOrcFields.size > 1) {
// Need to fail if there is ambiguity, i.e. more than one field is matched.
val matchedOrcFieldsString = matchedOrcFields.map(_._1).mkString("[", ", ", "]")
throw new RuntimeException(s"""Found duplicate field(s) "$requiredFieldName": """
+ s"$matchedOrcFieldsString in case-insensitive mode")
} else {
matchedOrcFields.head._2
}
}.getOrElse(-1)
})
val resolver = if (isCaseSensitive) caseSensitiveResolution else caseInsensitiveResolution
// Need to fail if there is ambiguity, i.e. more than one field is matched.
requiredSchema.fieldNames.foreach { requiredFieldName =>
val matchedOrcFields = orcFieldNames.filter(resolver(_, requiredFieldName))
if (matchedOrcFields.size > 1) {
throw new RuntimeException(s"""Found duplicate field(s) "$requiredFieldName": """ +
s"${matchedOrcFields.mkString("[", ", ", "]")} in case-insensitive mode")
Copy link
Copy Markdown
Author

@dongjoon-hyun dongjoon-hyun Sep 7, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Actually, we do this in both modes, but orcFieldNames doesn't have case-sensitively duplicated names.

}
}
Some(requiredSchema.fieldNames.map { name => orcFieldNames.indexWhere(resolver(_, name)) })
}
}
}
Expand Down