From 389e181494d57c56a72fe6c179e511796b0ce686 Mon Sep 17 00:00:00 2001 From: Cheng Lian Date: Fri, 29 Aug 2014 21:45:50 -0700 Subject: [PATCH 1/8] Refined Thrift server test suite --- .../thriftserver/HiveThriftServer2Suite.scala | 39 ++++++++++--------- 1 file changed, 20 insertions(+), 19 deletions(-) diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2Suite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2Suite.scala index 38977ff16209..563c8be104ea 100644 --- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2Suite.scala +++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2Suite.scala @@ -32,7 +32,7 @@ import org.apache.hadoop.hive.conf.HiveConf.ConfVars import org.apache.hive.jdbc.HiveDriver import org.scalatest.FunSuite -import org.apache.spark.Logging +import org.apache.spark.{SparkException, Logging} import org.apache.spark.sql.catalyst.util.getTempFilePath /** @@ -41,22 +41,20 @@ import org.apache.spark.sql.catalyst.util.getTempFilePath class HiveThriftServer2Suite extends FunSuite with Logging { Class.forName(classOf[HiveDriver].getCanonicalName) - private val listeningHost = "localhost" - private val listeningPort = { - // Let the system to choose a random available port to avoid collision with other parallel - // builds. - val socket = new ServerSocket(0) - val port = socket.getLocalPort - socket.close() - port - } - - private val warehousePath = getTempFilePath("warehouse") - private val metastorePath = getTempFilePath("metastore") - private val metastoreJdbcUri = s"jdbc:derby:;databaseName=$metastorePath;create=true" - def startThriftServerWithin(timeout: FiniteDuration = 30.seconds)(f: Statement => Unit) { val serverScript = "../../sbin/start-thriftserver.sh".split("/").mkString(File.separator) + val warehousePath = getTempFilePath("warehouse") + val metastorePath = getTempFilePath("metastore") + val metastoreJdbcUri = s"jdbc:derby:;databaseName=$metastorePath;create=true" + val listeningHost = "localhost" + val listeningPort = { + // Let the system to choose a random available port to avoid collision with other parallel + // builds. + val socket = new ServerSocket(0) + val port = socket.getLocalPort + socket.close() + port + } val command = s"""$serverScript @@ -68,13 +66,13 @@ class HiveThriftServer2Suite extends FunSuite with Logging { | --hiveconf ${ConfVars.HIVE_SERVER2_THRIFT_PORT}=$listeningPort """.stripMargin.split("\\s+").toSeq - val serverStarted = Promise[Unit]() + val serverRunning = Promise[Unit]() val buffer = new ArrayBuffer[String]() def captureOutput(source: String)(line: String) { buffer += s"$source> $line" if (line.contains("ThriftBinaryCLIService listening on")) { - serverStarted.success(()) + serverRunning.success(()) } } @@ -83,14 +81,16 @@ class HiveThriftServer2Suite extends FunSuite with Logging { Future { val exitValue = process.exitValue() - logInfo(s"Spark SQL Thrift server process exit value: $exitValue") + // Stop waiting for server start in case the Thrift server exits prematurely. + serverRunning.tryFailure( + new SparkException(s"Spark SQL Thrift server process exit value: $exitValue")) } val jdbcUri = s"jdbc:hive2://$listeningHost:$listeningPort/" val user = System.getProperty("user.name") try { - Await.result(serverStarted.future, timeout) + Await.result(serverRunning.future, timeout) val connection = DriverManager.getConnection(jdbcUri, user, "") val statement = connection.createStatement() @@ -122,6 +122,7 @@ class HiveThriftServer2Suite extends FunSuite with Logging { |End HiveThriftServer2Suite failure output |========================================= """.stripMargin, cause) + throw cause } finally { warehousePath.delete() metastorePath.delete() From 83d88c763793a4ce8bd48596ec4a07343edff8e0 Mon Sep 17 00:00:00 2001 From: Cheng Lian Date: Tue, 2 Sep 2014 20:37:34 -0700 Subject: [PATCH 2/8] Similar fixes for CliSuite --- .../spark/sql/hive/thriftserver/CliSuite.scala | 12 +++++++++--- .../hive/thriftserver/HiveThriftServer2Suite.scala | 5 +++++ 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala index 3475c2c9db08..50e5964e3759 100644 --- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala +++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala @@ -30,10 +30,12 @@ import java.util.concurrent.atomic.AtomicInteger import org.apache.hadoop.hive.conf.HiveConf.ConfVars import org.scalatest.{BeforeAndAfterAll, FunSuite} -import org.apache.spark.Logging +import org.apache.spark.{SparkException, Logging} import org.apache.spark.sql.catalyst.util.getTempFilePath class CliSuite extends FunSuite with BeforeAndAfterAll with Logging { + val verbose = Option(System.getenv("SPARK_SQL_TEST_VERBOSE")).isDefined + def runCliWithin( timeout: FiniteDuration, extraArgs: Seq[String] = Seq.empty)( @@ -61,6 +63,9 @@ class CliSuite extends FunSuite with BeforeAndAfterAll with Logging { val buffer = new ArrayBuffer[String]() def captureOutput(source: String)(line: String) { + if (verbose) { + logInfo(s"$source> $line") + } buffer += s"$source> $line" if (line.contains(expectedAnswers(next.get()))) { if (next.incrementAndGet() == expectedAnswers.size) { @@ -75,7 +80,8 @@ class CliSuite extends FunSuite with BeforeAndAfterAll with Logging { Future { val exitValue = process.exitValue() - logInfo(s"Spark SQL CLI process exit value: $exitValue") + foundAllExpectedAnswers.tryFailure( + new SparkException(s"Spark SQL CLI process exit value: $exitValue")) } try { @@ -118,7 +124,7 @@ class CliSuite extends FunSuite with BeforeAndAfterAll with Logging { -> "Time taken: ", "SELECT COUNT(*) FROM hive_test;" -> "5", - "DROP TABLE hive_test" + "DROP TABLE hive_test;" -> "Time taken: " ) } diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2Suite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2Suite.scala index 563c8be104ea..3644dadbb4f2 100644 --- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2Suite.scala +++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2Suite.scala @@ -41,6 +41,8 @@ import org.apache.spark.sql.catalyst.util.getTempFilePath class HiveThriftServer2Suite extends FunSuite with Logging { Class.forName(classOf[HiveDriver].getCanonicalName) + val verbose = Option(System.getenv("SPARK_SQL_TEST_VERBOSE")).isDefined + def startThriftServerWithin(timeout: FiniteDuration = 30.seconds)(f: Statement => Unit) { val serverScript = "../../sbin/start-thriftserver.sh".split("/").mkString(File.separator) val warehousePath = getTempFilePath("warehouse") @@ -70,6 +72,9 @@ class HiveThriftServer2Suite extends FunSuite with Logging { val buffer = new ArrayBuffer[String]() def captureOutput(source: String)(line: String) { + if (verbose) { + logInfo(s"$source> $line") + } buffer += s"$source> $line" if (line.contains("ThriftBinaryCLIService listening on")) { serverRunning.success(()) From 0fd8f1af74f0cba50de6469d32a3476af7c9bf56 Mon Sep 17 00:00:00 2001 From: Cheng Lian Date: Wed, 3 Sep 2014 14:37:49 -0700 Subject: [PATCH 3/8] Debugging commit to diagnose Jenkins build --- dev/run-tests | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/dev/run-tests b/dev/run-tests index 79401213a7fa..83f725eb4a6d 100755 --- a/dev/run-tests +++ b/dev/run-tests @@ -70,19 +70,19 @@ echo "" echo "=========================================================================" echo "Running Apache RAT checks" echo "=========================================================================" -dev/check-license +#dev/check-license echo "" echo "=========================================================================" echo "Running Scala style checks" echo "=========================================================================" -dev/lint-scala +#dev/lint-scala echo "" echo "=========================================================================" echo "Running Python style checks" echo "=========================================================================" -dev/lint-python +#dev/lint-python echo "" echo "=========================================================================" @@ -104,17 +104,17 @@ fi # echo "q" is needed because sbt on encountering a build file with failure # (either resolution or compilation) prompts the user for input either q, r, # etc to quit or retry. This echo is there to make it not block. -echo -e "q\n" | sbt/sbt $SBT_MAVEN_PROFILES_ARGS test | \ +echo -e "q\n" | sbt/sbt $SBT_MAVEN_PROFILES_ARGS "hive-thriftserver/test-only *.*Suite" | \ grep -v -e "info.*Resolving" -e "warn.*Merging" -e "info.*Including" echo "" echo "=========================================================================" echo "Running PySpark tests" echo "=========================================================================" -./python/run-tests +#./python/run-tests echo "" echo "=========================================================================" echo "Detecting binary incompatibilites with MiMa" echo "=========================================================================" -dev/mima +#dev/mima From 201fdddfb27d54df52f50732b473e25e1d2c53dd Mon Sep 17 00:00:00 2001 From: Cheng Lian Date: Wed, 3 Sep 2014 16:56:09 -0700 Subject: [PATCH 4/8] Enables all other checks --- dev/run-tests | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/dev/run-tests b/dev/run-tests index 83f725eb4a6d..a14b36f71618 100755 --- a/dev/run-tests +++ b/dev/run-tests @@ -70,19 +70,19 @@ echo "" echo "=========================================================================" echo "Running Apache RAT checks" echo "=========================================================================" -#dev/check-license +dev/check-license echo "" echo "=========================================================================" echo "Running Scala style checks" echo "=========================================================================" -#dev/lint-scala +dev/lint-scala echo "" echo "=========================================================================" echo "Running Python style checks" echo "=========================================================================" -#dev/lint-python +dev/lint-python echo "" echo "=========================================================================" @@ -111,10 +111,10 @@ echo "" echo "=========================================================================" echo "Running PySpark tests" echo "=========================================================================" -#./python/run-tests +./python/run-tests echo "" echo "=========================================================================" echo "Detecting binary incompatibilites with MiMa" echo "=========================================================================" -#dev/mima +dev/mima From 2a934c0c7962f77e434335069cf58b509d04571f Mon Sep 17 00:00:00 2001 From: Cheng Lian Date: Wed, 3 Sep 2014 20:58:11 -0700 Subject: [PATCH 5/8] Revert dev/run-tests changes --- dev/run-tests | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dev/run-tests b/dev/run-tests index a14b36f71618..bad940a25433 100755 --- a/dev/run-tests +++ b/dev/run-tests @@ -104,7 +104,7 @@ fi # echo "q" is needed because sbt on encountering a build file with failure # (either resolution or compilation) prompts the user for input either q, r, # etc to quit or retry. This echo is there to make it not block. -echo -e "q\n" | sbt/sbt $SBT_MAVEN_PROFILES_ARGS "hive-thriftserver/test-only *.*Suite" | \ +echo -e "q\n" | sbt/sbt $SBT_MAVEN_PROFILES_ARGS hive/test test | \ grep -v -e "info.*Resolving" -e "warn.*Merging" -e "info.*Including" echo "" From 2c0546d6cf0b9787277718fcc042888207bd56da Mon Sep 17 00:00:00 2001 From: Cheng Lian Date: Wed, 3 Sep 2014 20:58:27 -0700 Subject: [PATCH 6/8] Added debugging output in bin/spark-class --- bin/spark-class | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/bin/spark-class b/bin/spark-class index 5f5f9ea74888..8edd5edd6a5a 100755 --- a/bin/spark-class +++ b/bin/spark-class @@ -143,6 +143,10 @@ else CLASSPATH="$classpath_output" fi +echo "====" +echo After bin/compute-classpath.sh +echo "====" + if [[ "$1" =~ org.apache.spark.tools.* ]]; then if test -z "$SPARK_TOOLS_JAR"; then echo "Failed to find Spark Tools Jar in $FWDIR/tools/target/scala-$SCALA_VERSION/" 1>&2 @@ -166,6 +170,10 @@ export CLASSPATH # to prepare the launch environment of this driver JVM. if [ -n "$SPARK_SUBMIT_BOOTSTRAP_DRIVER" ]; then + echo "====" + echo Within SPARK_SUBMIT_BOOTSTRAP_DRIVER branch + echo "====" + # This is used only if the properties file actually contains these special configs # Export the environment variables needed by SparkSubmitDriverBootstrapper export RUNNER @@ -176,12 +184,16 @@ if [ -n "$SPARK_SUBMIT_BOOTSTRAP_DRIVER" ]; then shift # Ignore main class (org.apache.spark.deploy.SparkSubmit) and use our own exec "$RUNNER" org.apache.spark.deploy.SparkSubmitDriverBootstrapper "$@" else + echo "====" + echo Within normal launching branch + echo "====" + # Note: The format of this command is closely echoed in SparkSubmitDriverBootstrapper.scala - if [ -n "$SPARK_PRINT_LAUNCH_COMMAND" ]; then + #if [ -n "$SPARK_PRINT_LAUNCH_COMMAND" ]; then echo -n "Spark Command: " 1>&2 echo "$RUNNER" -cp "$CLASSPATH" $JAVA_OPTS "$@" 1>&2 echo -e "========================================\n" 1>&2 - fi + #fi exec "$RUNNER" -cp "$CLASSPATH" $JAVA_OPTS "$@" fi From d433fea43c3db6f7fbdaa3c936bcaabf518b895d Mon Sep 17 00:00:00 2001 From: Cheng Lian Date: Wed, 3 Sep 2014 22:13:25 -0700 Subject: [PATCH 7/8] Revert dev/run-tests changes --- dev/run-tests | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dev/run-tests b/dev/run-tests index bad940a25433..79401213a7fa 100755 --- a/dev/run-tests +++ b/dev/run-tests @@ -104,7 +104,7 @@ fi # echo "q" is needed because sbt on encountering a build file with failure # (either resolution or compilation) prompts the user for input either q, r, # etc to quit or retry. This echo is there to make it not block. -echo -e "q\n" | sbt/sbt $SBT_MAVEN_PROFILES_ARGS hive/test test | \ +echo -e "q\n" | sbt/sbt $SBT_MAVEN_PROFILES_ARGS test | \ grep -v -e "info.*Resolving" -e "warn.*Merging" -e "info.*Including" echo "" From 23d96f198e2bbdcc6a50fb853f8a4f1943740b50 Mon Sep 17 00:00:00 2001 From: Cheng Lian Date: Wed, 10 Sep 2014 17:46:18 -0700 Subject: [PATCH 8/8] Revert bin/spark-class debugging changes --- bin/spark-class | 16 ++-------------- 1 file changed, 2 insertions(+), 14 deletions(-) diff --git a/bin/spark-class b/bin/spark-class index 8edd5edd6a5a..5f5f9ea74888 100755 --- a/bin/spark-class +++ b/bin/spark-class @@ -143,10 +143,6 @@ else CLASSPATH="$classpath_output" fi -echo "====" -echo After bin/compute-classpath.sh -echo "====" - if [[ "$1" =~ org.apache.spark.tools.* ]]; then if test -z "$SPARK_TOOLS_JAR"; then echo "Failed to find Spark Tools Jar in $FWDIR/tools/target/scala-$SCALA_VERSION/" 1>&2 @@ -170,10 +166,6 @@ export CLASSPATH # to prepare the launch environment of this driver JVM. if [ -n "$SPARK_SUBMIT_BOOTSTRAP_DRIVER" ]; then - echo "====" - echo Within SPARK_SUBMIT_BOOTSTRAP_DRIVER branch - echo "====" - # This is used only if the properties file actually contains these special configs # Export the environment variables needed by SparkSubmitDriverBootstrapper export RUNNER @@ -184,16 +176,12 @@ if [ -n "$SPARK_SUBMIT_BOOTSTRAP_DRIVER" ]; then shift # Ignore main class (org.apache.spark.deploy.SparkSubmit) and use our own exec "$RUNNER" org.apache.spark.deploy.SparkSubmitDriverBootstrapper "$@" else - echo "====" - echo Within normal launching branch - echo "====" - # Note: The format of this command is closely echoed in SparkSubmitDriverBootstrapper.scala - #if [ -n "$SPARK_PRINT_LAUNCH_COMMAND" ]; then + if [ -n "$SPARK_PRINT_LAUNCH_COMMAND" ]; then echo -n "Spark Command: " 1>&2 echo "$RUNNER" -cp "$CLASSPATH" $JAVA_OPTS "$@" 1>&2 echo -e "========================================\n" 1>&2 - #fi + fi exec "$RUNNER" -cp "$CLASSPATH" $JAVA_OPTS "$@" fi