-
Notifications
You must be signed in to change notification settings - Fork 29.1k
[SPARK-21533][SQL] Print warning messages when override function configure found in Hive UDFs
#18768
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[SPARK-21533][SQL] Print warning messages when override function configure found in Hive UDFs
#18768
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -22,15 +22,16 @@ import java.rmi.server.UID | |
|
|
||
| import scala.collection.JavaConverters._ | ||
| import scala.language.implicitConversions | ||
| import scala.reflect.ClassTag | ||
| import scala.reflect.{classTag, ClassTag} | ||
| import scala.util.control.NonFatal | ||
|
|
||
| import com.google.common.base.Objects | ||
| import org.apache.avro.Schema | ||
| import org.apache.hadoop.conf.Configuration | ||
| import org.apache.hadoop.fs.Path | ||
| import org.apache.hadoop.hive.ql.exec.{UDF, Utilities} | ||
| import org.apache.hadoop.hive.ql.exec.{MapredContext, UDF, Utilities} | ||
| import org.apache.hadoop.hive.ql.plan.{FileSinkDesc, TableDesc} | ||
| import org.apache.hadoop.hive.ql.udf.generic.GenericUDFMacro | ||
| import org.apache.hadoop.hive.ql.udf.generic.{GenericUDF, GenericUDFMacro, GenericUDTF} | ||
| import org.apache.hadoop.hive.serde2.ColumnProjectionUtils | ||
| import org.apache.hadoop.hive.serde2.avro.{AvroGenericRecordWritable, AvroSerdeUtils} | ||
| import org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveDecimalObjectInspector | ||
|
|
@@ -42,7 +43,7 @@ import org.apache.spark.internal.Logging | |
| import org.apache.spark.sql.types.Decimal | ||
| import org.apache.spark.util.Utils | ||
|
|
||
| private[hive] object HiveShim { | ||
| private[hive] object HiveShim extends Logging { | ||
| // Precision and scale to pass for unlimited decimals; these are the same as the precision and | ||
| // scale Hive 0.13 infers for BigDecimals from sources that don't specify them (e.g. UDFs) | ||
| val UNLIMITED_DECIMAL_PRECISION = 38 | ||
|
|
@@ -111,6 +112,32 @@ private[hive] object HiveShim { | |
| } | ||
| } | ||
|
|
||
| private def hasInheritanceOf[UDFType: ClassTag](func: String, clazz: Class[_]): Boolean = { | ||
| val parentClazz = classTag[UDFType].runtimeClass | ||
| if (parentClazz.isAssignableFrom(clazz)) { | ||
| try { | ||
| val funcClass = clazz.getMethod(func, classOf[MapredContext]) | ||
| // If a given `func` not overridden, `Method.getDeclaringClass` returns | ||
| // a parent Class object. | ||
| funcClass.getDeclaringClass != parentClazz | ||
| } catch { | ||
| case NonFatal(_) => false | ||
| } | ||
| } else { | ||
| false | ||
| } | ||
| } | ||
|
|
||
| def validateHiveUserDefinedFunction(udfClass: Class[_]): Unit = { | ||
| if (hasInheritanceOf[GenericUDF]("configure", udfClass) || | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. When https://hive.apache.org/javadocs/r0.10.0/api/org/apache/hadoop/hive/ql/udf/generic/GenericUDF.html
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I checked it seemed this API had been implemented in v0.11.0 (https://github.com/apache/hive/blob/release-0.11.0/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDF.java). This is the commit for this API.
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If ran with a Hive version that
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. IIUC spark always refers to
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Oh, that's right. As you said, it is still good to catch the exception. |
||
| hasInheritanceOf[GenericUDTF]("configure", udfClass)) { | ||
| logWarning(s"Found an overridden method `configure` in ${udfClass.getSimpleName}, but " + | ||
| "Spark does not call the method during initialization because Spark does not use " + | ||
| "MapredContext inside (See SPARK-21533). So, you might reconsider the implementation of " + | ||
| s"${udfClass.getSimpleName}.") | ||
| } | ||
| } | ||
|
|
||
| /** | ||
| * This class provides the UDF creation and also the UDF instance serialization and | ||
| * de-serialization cross process boundary. | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I am neural to introduce a warning message for this case. Not sure how helpful the warning message will be.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
ok. So, I think it's ok to revisit this again if we have more reports from users.