Adds fixes from review:

willb · willb · commit ec35c8016c63 · 2014-07-14T17:56:15.000-05:00
* orders imports in stringOperations.scala * Substring.dataType throws exception if children are unresolved * inlines Substring.slice (~11.5% performance improvement on microbenchmark runs) * adds a special `toString` case for two-argument SUBSTR expressions * removes spurious I_ prefix to SUBSTR(ING) in HiveQL.scala Thanks to @concretevitamin for prompt and useful feedback!
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringOperations.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringOperations.scala
@@ -21,10 +21,9 @@ import java.util.regex.Pattern
 
 import scala.collection.IndexedSeqOptimized
 
-import org.apache.spark.sql.catalyst.types.DataType
-import org.apache.spark.sql.catalyst.types.StringType
-import org.apache.spark.sql.catalyst.types.BinaryType
-import org.apache.spark.sql.catalyst.types.BooleanType
+
+import org.apache.spark.sql.catalyst.analysis.UnresolvedException
+import org.apache.spark.sql.catalyst.types.{BinaryType, BooleanType, DataType, StringType}
 
 trait StringRegexExpression {
   self: BinaryExpression =>
@@ -219,13 +218,17 @@ case class Substring(str: Expression, pos: Expression, len: Expression) extends
   
   def nullable: Boolean = true
   def dataType: DataType = {
+    if (!resolved) {
+      throw new UnresolvedException(this, s"Cannot resolve since $children are not resolved")
+    }
     if (str.dataType == BinaryType) str.dataType else StringType
   }
   
   def references = children.flatMap(_.references).toSet
   
   override def children = str :: pos :: len :: Nil
   
+  @inline
   def slice[T, C <% IndexedSeqOptimized[T,_]](str: C, startPos: Int, sliceLen: Int): Any = {
     val len = str.length
     // Hive and SQL use one-based indexing for SUBSTR arguments but also accept zero and
@@ -267,5 +270,8 @@ case class Substring(str: Expression, pos: Expression, len: Expression) extends
     }
   }
   
-  override def toString = s"SUBSTR($str, $pos, $len)"
+  override def toString = len match {
+    case max if max == Integer.MAX_VALUE => s"SUBSTR($str, $pos)"
+    case _ => s"SUBSTR($str, $pos, $len)"
+  }
 }
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala
@@ -860,7 +860,7 @@ private[hive] object HiveQl {
   val BETWEEN = "(?i)BETWEEN".r
   val WHEN = "(?i)WHEN".r
   val CASE = "(?i)CASE".r
-  val SUBSTR = "(?i)I_SUBSTR(?:ING)?".r
+  val SUBSTR = "(?i)SUBSTR(?:ING)?".r
 
   protected def nodeToExpr(node: Node): Expression = node match {
     /* Attribute References */