-
Notifications
You must be signed in to change notification settings - Fork 29k
[SPARK-22282][SQL] Rename OrcRelation to OrcFileFormat and remove ORC_COMPRESSION #19502
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
7416e4a
3d18d75
a554112
b7335ac
2b0a092
02792d9
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||
|---|---|---|---|---|
|
|
@@ -32,6 +32,7 @@ import org.apache.hadoop.io.{NullWritable, Writable} | |||
| import org.apache.hadoop.mapred.{JobConf, OutputFormat => MapRedOutputFormat, RecordWriter, Reporter} | ||||
| import org.apache.hadoop.mapreduce._ | ||||
| import org.apache.hadoop.mapreduce.lib.input.{FileInputFormat, FileSplit} | ||||
| import org.apache.orc.OrcConf.COMPRESS | ||||
|
|
||||
| import org.apache.spark.TaskContext | ||||
| import org.apache.spark.sql.SparkSession | ||||
|
|
@@ -72,7 +73,7 @@ class OrcFileFormat extends FileFormat with DataSourceRegister with Serializable | |||
|
|
||||
| val configuration = job.getConfiguration | ||||
|
|
||||
| configuration.set(OrcRelation.ORC_COMPRESSION, orcOptions.compressionCodec) | ||||
| configuration.set(COMPRESS.getAttribute, orcOptions.compressionCodec) | ||||
| configuration match { | ||||
| case conf: JobConf => | ||||
| conf.setOutputFormat(classOf[OrcOutputFormat]) | ||||
|
|
@@ -93,7 +94,7 @@ class OrcFileFormat extends FileFormat with DataSourceRegister with Serializable | |||
|
|
||||
| override def getFileExtension(context: TaskAttemptContext): String = { | ||||
| val compressionExtension: String = { | ||||
| val name = context.getConfiguration.get(OrcRelation.ORC_COMPRESSION) | ||||
| val name = context.getConfiguration.get(COMPRESS.getAttribute) | ||||
| OrcRelation.extensionsForCompressionCodecNames.getOrElse(name, "") | ||||
| } | ||||
|
|
||||
|
|
@@ -256,9 +257,6 @@ private[orc] class OrcOutputWriter( | |||
| } | ||||
|
|
||||
| private[orc] object OrcRelation extends HiveInspectors { | ||||
| // The references of Hive's classes will be minimized. | ||||
| val ORC_COMPRESSION = "orc.compress" | ||||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We have documented
Now we depends on the configuration name from an external library. But I think the configuration name should not be changed at all. So looks should be fine.
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yep. I agree. I don't think Apache ORC changes this in the future since this is a primitive configuration.
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Oh, right, the |
||||
|
|
||||
| // This constant duplicates `OrcInputFormat.SARG_PUSHDOWN`, which is unfortunately not public. | ||||
| private[orc] val SARG_PUSHDOWN = "sarg.pushdown" | ||||
|
|
||||
|
|
||||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -19,6 +19,8 @@ package org.apache.spark.sql.hive.orc | |
|
|
||
| import java.util.Locale | ||
|
|
||
| import org.apache.orc.OrcConf.COMPRESS | ||
|
|
||
| import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap | ||
| import org.apache.spark.sql.internal.SQLConf | ||
|
|
||
|
|
@@ -42,7 +44,7 @@ private[orc] class OrcOptions( | |
| val compressionCodec: String = { | ||
| // `compression`, `orc.compress`, and `spark.sql.orc.compression.codec` are | ||
| // in order of precedence from highest to lowest. | ||
|
||
| val orcCompressionConf = parameters.get(OrcRelation.ORC_COMPRESSION) | ||
| val orcCompressionConf = parameters.get(COMPRESS.getAttribute) | ||
| val codecName = parameters | ||
| .get("compression") | ||
| .orElse(orcCompressionConf) | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -22,6 +22,7 @@ import java.sql.Timestamp | |
|
|
||
| import org.apache.hadoop.conf.Configuration | ||
| import org.apache.hadoop.hive.ql.io.orc.{OrcStruct, SparkOrcNewRecordReader} | ||
| import org.apache.orc.OrcConf.COMPRESS | ||
| import org.scalatest.BeforeAndAfterAll | ||
|
|
||
| import org.apache.spark.sql._ | ||
|
|
@@ -180,7 +181,7 @@ class OrcQuerySuite extends QueryTest with BeforeAndAfterAll with OrcTest { | |
| // Respect `orc.compress`. | ||
|
||
| withTempPath { file => | ||
| spark.range(0, 10).write | ||
| .option("orc.compress", "ZLIB") | ||
| .option(COMPRESS.getAttribute, "ZLIB") | ||
| .orc(file.getCanonicalPath) | ||
| val expectedCompressionKind = | ||
| OrcFileOperator.getFileReader(file.getCanonicalPath).get.getCompression | ||
|
|
@@ -191,7 +192,7 @@ class OrcQuerySuite extends QueryTest with BeforeAndAfterAll with OrcTest { | |
| withTempPath { file => | ||
| spark.range(0, 10).write | ||
| .option("compression", "ZLIB") | ||
| .option("orc.compress", "SNAPPY") | ||
| .option(COMPRESS.getAttribute, "SNAPPY") | ||
| .orc(file.getCanonicalPath) | ||
| val expectedCompressionKind = | ||
| OrcFileOperator.getFileReader(file.getCanonicalPath).get.getCompression | ||
|
|
||
| Original file line number | Diff line number | Diff line change | ||
|---|---|---|---|---|
|
|
@@ -19,6 +19,7 @@ package org.apache.spark.sql.hive.orc | |||
|
|
||||
| import java.io.File | ||||
|
|
||||
| import org.apache.orc.OrcConf.COMPRESS | ||||
| import org.scalatest.BeforeAndAfterAll | ||||
|
|
||||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Another test in spark/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcSourceSuite.scala Line 153 in d8f4540
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is case sensitivity test case. We should not change this.
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Oh, I was thinking something like
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If you want, no problem at all. :)
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Just like to be consistent. Thanks. :)
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yep! |
||||
| import org.apache.spark.sql.{QueryTest, Row} | ||||
|
|
@@ -205,8 +206,8 @@ abstract class OrcSuite extends QueryTest with TestHiveSingleton with BeforeAndA | |||
| // `compression` -> `orc.compression` -> `spark.sql.orc.compression.codec` | ||||
| withSQLConf(SQLConf.ORC_COMPRESSION.key -> "uncompressed") { | ||||
| assert(new OrcOptions(Map.empty[String, String], conf).compressionCodec == "NONE") | ||||
| val map1 = Map("orc.compress" -> "zlib") | ||||
| val map2 = Map("orc.compress" -> "zlib", "compression" -> "lzo") | ||||
| val map1 = Map(COMPRESS.getAttribute -> "zlib") | ||||
| val map2 = Map(COMPRESS.getAttribute -> "zlib", "compression" -> "lzo") | ||||
| assert(new OrcOptions(map1, conf).compressionCodec == "ZLIB") | ||||
| assert(new OrcOptions(map2, conf).compressionCodec == "LZO") | ||||
| } | ||||
|
|
||||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@dongjoon-hyun, mind changing this to
OrcFileFormatwhile we are here (see #14529)?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Sure. No problem. Thank you for review, @HyukjinKwon !