apache · yaooqinn · Jun 11, 2020 · Jun 11, 2020 · gengliangwang · Jun 11, 2020
diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala
@@ -480,7 +480,6 @@ object SparkParallelTestGrouping {
     "org.apache.spark.sql.hive.thriftserver.SparkSQLEnvSuite",
     "org.apache.spark.sql.hive.thriftserver.ui.ThriftServerPageSuite",
     "org.apache.spark.sql.hive.thriftserver.ui.HiveThriftServer2ListenerSuite",
-    "org.apache.spark.sql.hive.thriftserver.ThriftServerWithSparkContextSuite",
     "org.apache.spark.sql.kafka010.KafkaDelegationTokenSuite"
   )
 

diff --git a/sql/hive-thriftserver/src/test/resources/log4j.properties b/sql/hive-thriftserver/src/test/resources/log4j.properties
@@ -0,0 +1,65 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# Set everything to be logged to the file hive-thriftserver/target/unit-tests.log
+log4j.rootLogger=DEBUG, CA, FA
+
+#Console Appender
+log4j.appender.CA=org.apache.log4j.ConsoleAppender
+log4j.appender.CA.layout=org.apache.log4j.PatternLayout
+log4j.appender.CA.layout.ConversionPattern=%d{HH:mm:ss.SSS} %p %c: %m%n
+log4j.appender.CA.Threshold = WARN
+
+
+#File Appender
+log4j.appender.FA=org.apache.log4j.FileAppender
+log4j.appender.FA.append=false
+log4j.appender.FA.file=target/unit-tests.log
+log4j.appender.FA.layout=org.apache.log4j.PatternLayout
+log4j.appender.FA.layout.ConversionPattern=%d{HH:mm:ss.SSS} %t %p %c{1}: %m%n
+
+# Set the logger level of File Appender to WARN
+log4j.appender.FA.Threshold = DEBUG
+
+# Some packages are noisy for no good reason.
+log4j.additivity.org.apache.hadoop.hive.serde2.lazy.LazyStruct=false
+log4j.logger.org.apache.hadoop.hive.serde2.lazy.LazyStruct=OFF
+
+log4j.additivity.org.apache.hadoop.hive.metastore.RetryingHMSHandler=false
+log4j.logger.org.apache.hadoop.hive.metastore.RetryingHMSHandler=OFF
+
+log4j.additivity.hive.log=false
+log4j.logger.hive.log=OFF
+
+log4j.additivity.parquet.hadoop.ParquetRecordReader=false
+log4j.logger.parquet.hadoop.ParquetRecordReader=OFF
+
+log4j.additivity.org.apache.parquet.hadoop.ParquetRecordReader=false
+log4j.logger.org.apache.parquet.hadoop.ParquetRecordReader=OFF
+
+log4j.additivity.org.apache.parquet.hadoop.ParquetOutputCommitter=false
+log4j.logger.org.apache.parquet.hadoop.ParquetOutputCommitter=OFF
+
+log4j.additivity.hive.ql.metadata.Hive=false
+log4j.logger.hive.ql.metadata.Hive=OFF
+
+log4j.additivity.org.apache.hadoop.hive.ql.io.RCFile=false
+log4j.logger.org.apache.hadoop.hive.ql.io.RCFile=ERROR
+
+# Parquet related logging
+log4j.logger.org.apache.parquet.CorruptStatistics=ERROR
+log4j.logger.parquet.CorruptStatistics=ERROR
diff --git a/...riftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SharedThriftServer.scala b/...riftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SharedThriftServer.scala
@@ -24,6 +24,7 @@ import scala.concurrent.duration._
 import scala.util.Try
 
 import org.apache.hadoop.hive.conf.HiveConf.ConfVars
+import org.apache.hadoop.hive.ql.session.SessionState
 import org.apache.hive.service.cli.thrift.ThriftCLIService
 
 import org.apache.spark.sql.test.SharedSparkSession
@@ -33,6 +34,8 @@ trait SharedThriftServer extends SharedSparkSession {
   private var hiveServer2: HiveThriftServer2 = _
   private var serverPort: Int = 0
 
+  def mode: ServerMode.Value
+
   override def beforeAll(): Unit = {
     super.beforeAll()
     // Retries up to 3 times with different port numbers if the server fails to start
@@ -50,14 +53,21 @@ trait SharedThriftServer extends SharedSparkSession {
       hiveServer2.stop()
     } finally {
       super.afterAll()
+      SessionState.detachSession()
     }
   }
 
+  protected def jdbcUri: String = if (mode == ServerMode.http) {
+    s"jdbc:hive2://localhost:$serverPort/default;transportMode=http;httpPath=cliservice"
+  } else {
+    s"jdbc:hive2://localhost:$serverPort/"
+  }
+
   protected def withJdbcStatement(fs: (Statement => Unit)*): Unit = {
     val user = System.getProperty("user.name")
     require(serverPort != 0, "Failed to bind an actual port for HiveThriftServer2")
     val connections =
-      fs.map { _ => DriverManager.getConnection(s"jdbc:hive2://localhost:$serverPort", user, "") }
+      fs.map { _ => DriverManager.getConnection(jdbcUri, user, "") }
     val statements = connections.map(_.createStatement())
 
     try {
@@ -69,23 +79,35 @@ trait SharedThriftServer extends SharedSparkSession {
   }
 
   private def startThriftServer(attempt: Int): Unit = {
-    logInfo(s"Trying to start HiveThriftServer2:, attempt=$attempt")
+    logInfo(s"Trying to start HiveThriftServer2: mode=$mode, attempt=$attempt")
     val sqlContext = spark.newSession().sqlContext
-    // Set the HIVE_SERVER2_THRIFT_PORT to 0, so it could randomly pick any free port to use.
+    // Set the HIVE_SERVER2_THRIFT_PORT and HIVE_SERVER2_THRIFT_HTTP_PORT to 0, so it could
+    // randomly pick any free port to use.
     // It's much more robust than set a random port generated by ourselves ahead
     sqlContext.setConf(ConfVars.HIVE_SERVER2_THRIFT_PORT.varname, "0")
-    hiveServer2 = HiveThriftServer2.startWithContext(sqlContext)
-    hiveServer2.getServices.asScala.foreach {
-      case t: ThriftCLIService if t.getPortNumber != 0 =>
-        serverPort = t.getPortNumber
-        logInfo(s"Started HiveThriftServer2: port=$serverPort, attempt=$attempt")
-      case _ =>
-    }
+    sqlContext.setConf(ConfVars.HIVE_SERVER2_THRIFT_HTTP_PORT.varname, "0")
+    sqlContext.setConf(ConfVars.HIVE_SERVER2_TRANSPORT_MODE.varname, mode.toString)
+
+    try {
+      hiveServer2 = HiveThriftServer2.startWithContext(sqlContext)
+      hiveServer2.getServices.asScala.foreach {
+        case t: ThriftCLIService =>
+          serverPort = t.getPortNumber
+          logInfo(s"Started HiveThriftServer2: mode=$mode, port=$serverPort, attempt=$attempt")
+        case _ =>
+      }
 
-    // Wait for thrift server to be ready to serve the query, via executing simple query
-    // till the query succeeds. See SPARK-30345 for more details.
-    eventually(timeout(30.seconds), interval(1.seconds)) {
-      withJdbcStatement { _.execute("SELECT 1") }
+      // Wait for thrift server to be ready to serve the query, via executing simple query
+      // till the query succeeds. See SPARK-30345 for more details.
+      eventually(timeout(30.seconds), interval(1.seconds)) {
+        withJdbcStatement { _.execute("SELECT 1") }
+      }
+    } catch {
+      case e: Exception =>
+        logError("Error start hive server with Context ", e)
+        if (hiveServer2 != null) {
+          hiveServer2.stop()
+        }
     }
   }
 }
diff --git a/...er/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerQueryTestSuite.scala b/...er/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerQueryTestSuite.scala
@@ -54,6 +54,9 @@ import org.apache.spark.sql.types._
  */
 class ThriftServerQueryTestSuite extends SQLQueryTestSuite with SharedThriftServer {
 
+
+  override def mode: ServerMode.Value = ServerMode.binary
+
   override protected def testFile(fileName: String): String = {
     val url = Thread.currentThread().getContextClassLoader.getResource(fileName)
     // Copy to avoid URISyntaxException during accessing the resources in `sql/core`

diff --git a/...test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerWithSparkContextSuite.scala b/...test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerWithSparkContextSuite.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.sql.hive.thriftserver
 
-class ThriftServerWithSparkContextSuite extends SharedThriftServer {
+trait ThriftServerWithSparkContextSuite extends SharedThriftServer {
 
   test("SPARK-29911: Uncache cached tables when session closed") {
     val cacheManager = spark.sharedState.cacheManager
@@ -42,3 +42,12 @@ class ThriftServerWithSparkContextSuite extends SharedThriftServer {
     }
   }
 }
+
+
+class ThriftServerWithSparkContextInBinarySuite extends ThriftServerWithSparkContextSuite {
+  override def mode: ServerMode.Value = ServerMode.binary
+}
+
+class ThriftServerWithSparkContextInHttpSuite extends ThriftServerWithSparkContextSuite {
+  override def mode: ServerMode.Value = ServerMode.http
+}
diff --git a/...tserver/v1.2/src/main/java/org/apache/hive/service/cli/thrift/ThriftBinaryCLIService.java b/...tserver/v1.2/src/main/java/org/apache/hive/service/cli/thrift/ThriftBinaryCLIService.java
@@ -28,6 +28,7 @@
 import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
 import org.apache.hadoop.hive.shims.ShimLoader;
+import org.apache.hive.service.ServiceException;
 import org.apache.hive.service.auth.HiveAuthFactory;
 import org.apache.hive.service.cli.CLIService;
 import org.apache.hive.service.server.ThreadFactoryWithGarbageCleanup;
@@ -45,7 +46,7 @@ public ThriftBinaryCLIService(CLIService cliService) {
   }
 
   @Override
-  public void run() {
+  protected void initializeServer() {
     try {
       // Server thread pool
       String threadPoolName = "HiveServer2-Handler-Pool";
@@ -100,6 +101,14 @@ public void run() {
       String msg = "Starting " + ThriftBinaryCLIService.class.getSimpleName() + " on port "
           + serverSocket.getServerSocket().getLocalPort() + " with " + minWorkerThreads + "..." + maxWorkerThreads + " worker threads";
       LOG.info(msg);
+    } catch (Exception t) {
+      throw new ServiceException("Error initializing " + getName(), t);
+    }
+  }
+
+  @Override
+  public void run() {
+    try {
       server.serve();
     } catch (Throwable t) {
       LOG.fatal(

diff --git a/...-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/thrift/ThriftCLIService.java b/...-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/thrift/ThriftCLIService.java
@@ -175,6 +175,7 @@ public synchronized void init(HiveConf hiveConf) {
   public synchronized void start() {
     super.start();
     if (!isStarted && !isEmbedded) {
+      initializeServer();
       new Thread(this).start();
       isStarted = true;
     }
@@ -633,6 +634,8 @@ public TFetchResultsResp FetchResults(TFetchResultsReq req) throws TException {
     return resp;
   }
 
+  protected abstract void initializeServer();
+
   @Override
   public abstract void run();
 

diff --git a/...iftserver/v1.2/src/main/java/org/apache/hive/service/cli/thrift/ThriftHttpCLIService.java b/...iftserver/v1.2/src/main/java/org/apache/hive/service/cli/thrift/ThriftHttpCLIService.java
@@ -28,6 +28,7 @@
 import org.apache.hadoop.hive.shims.ShimLoader;
 import org.apache.hadoop.security.UserGroupInformation;
 import org.apache.hadoop.util.Shell;
+import org.apache.hive.service.ServiceException;
 import org.apache.hive.service.auth.HiveAuthFactory;
 import org.apache.hive.service.cli.CLIService;
 import org.apache.hive.service.cli.thrift.TCLIService.Iface;
@@ -53,13 +54,8 @@ public ThriftHttpCLIService(CLIService cliService) {
     super(cliService, ThriftHttpCLIService.class.getSimpleName());
   }
 
-  /**
-   * Configure Jetty to serve http requests. Example of a client connection URL:
-   * http://localhost:10000/servlets/thrifths2/ A gateway may cause actual target URL to differ,
-   * e.g. http://gateway:port/hive2/servlets/thrifths2/
-   */
   @Override
-  public void run() {
+  protected void initializeServer() {
     try {
       // Server thread pool
       // Start with minWorkerThreads, expand till maxWorkerThreads and reject subsequent requests
@@ -150,6 +146,19 @@ public void run() {
           + " mode on port " + connector.getLocalPort()+ " path=" + httpPath + " with " + minWorkerThreads + "..."
           + maxWorkerThreads + " worker threads";
       LOG.info(msg);
+    } catch (Exception t) {
+      throw new ServiceException("Error initializing " + getName(), t);
+    }
+  }
+
+  /**
+   * Configure Jetty to serve http requests. Example of a client connection URL:
+   * http://localhost:10000/servlets/thrifths2/ A gateway may cause actual target URL to differ,
+   * e.g. http://gateway:port/hive2/servlets/thrifths2/
+   */
+  @Override
+  public void run() {
+    try {
       httpServer.join();
     } catch (Throwable t) {
       LOG.fatal(

diff --git a/...tserver/v2.3/src/main/java/org/apache/hive/service/cli/thrift/ThriftBinaryCLIService.java b/...tserver/v2.3/src/main/java/org/apache/hive/service/cli/thrift/ThriftBinaryCLIService.java
@@ -29,6 +29,7 @@
 import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
 import org.apache.hadoop.hive.shims.ShimLoader;
+import org.apache.hive.service.ServiceException;
 import org.apache.hive.service.auth.HiveAuthFactory;
 import org.apache.hive.service.cli.CLIService;
 import org.apache.hive.service.server.ThreadFactoryWithGarbageCleanup;
@@ -46,7 +47,7 @@ public ThriftBinaryCLIService(CLIService cliService) {
   }
 
   @Override
-  public void run() {
+  protected void initializeServer() {
     try {
       // Server thread pool
       String threadPoolName = "HiveServer2-Handler-Pool";
@@ -101,6 +102,14 @@ public void run() {
       String msg = "Starting " + ThriftBinaryCLIService.class.getSimpleName() + " on port "
           + portNum + " with " + minWorkerThreads + "..." + maxWorkerThreads + " worker threads";
       LOG.info(msg);
+    } catch (Exception t) {
+      throw new ServiceException("Error initializing " + getName(), t);
+    }
+  }
+
+  @Override
+  public void run() {
+    try {
       server.serve();
     } catch (Throwable t) {
       LOG.error(

diff --git a/...-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/thrift/ThriftCLIService.java b/...-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/thrift/ThriftCLIService.java
@@ -176,6 +176,7 @@ public synchronized void init(HiveConf hiveConf) {
   public synchronized void start() {
     super.start();
     if (!isStarted && !isEmbedded) {
+      initializeServer();
       new Thread(this).start();
       isStarted = true;
     }
@@ -670,6 +671,8 @@ public TGetCrossReferenceResp GetCrossReference(TGetCrossReferenceReq req)
     return resp;
   }
 
+  protected abstract void initializeServer();
+
   @Override
   public abstract void run();
 

diff --git a/...iftserver/v2.3/src/main/java/org/apache/hive/service/cli/thrift/ThriftHttpCLIService.java b/...iftserver/v2.3/src/main/java/org/apache/hive/service/cli/thrift/ThriftHttpCLIService.java
@@ -28,6 +28,7 @@
 import org.apache.hadoop.hive.shims.ShimLoader;
 import org.apache.hadoop.security.UserGroupInformation;
 import org.apache.hadoop.util.Shell;
+import org.apache.hive.service.ServiceException;
 import org.apache.hive.service.auth.HiveAuthFactory;
 import org.apache.hive.service.cli.CLIService;
 import org.apache.hive.service.rpc.thrift.TCLIService;
@@ -54,13 +55,8 @@ public ThriftHttpCLIService(CLIService cliService) {
     super(cliService, ThriftHttpCLIService.class.getSimpleName());
   }
 
-  /**
-   * Configure Jetty to serve http requests. Example of a client connection URL:
-   * http://localhost:10000/servlets/thrifths2/ A gateway may cause actual target URL to differ,
-   * e.g. http://gateway:port/hive2/servlets/thrifths2/
-   */
   @Override
-  public void run() {
+  protected void initializeServer() {
     try {
       // Server thread pool
       // Start with minWorkerThreads, expand till maxWorkerThreads and reject subsequent requests
@@ -151,6 +147,19 @@ public void run() {
           + " mode on port " + portNum + " path=" + httpPath + " with " + minWorkerThreads + "..."
           + maxWorkerThreads + " worker threads";
       LOG.info(msg);
+    } catch (Exception t) {
+      throw new ServiceException("Error initializing " + getName(), t);
+    }
+  }
+
+  /**
+   * Configure Jetty to serve http requests. Example of a client connection URL:
+   * http://localhost:10000/servlets/thrifths2/ A gateway may cause actual target URL to differ,
+   * e.g. http://gateway:port/hive2/servlets/thrifths2/
+   */
+  @Override
+  public void run() {
+    try {
       httpServer.join();
     } catch (Throwable t) {
       LOG.error(