apache
diff --git a/‎external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaWriter.scala‎
Lines changed: 4 additions & 6 deletions b/‎external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaWriter.scala‎
Lines changed: 4 additions & 6 deletions
diff --git a/‎sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala‎
Lines changed: 1 addition & 1 deletion b/‎sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/Command.scala‎
Lines changed: 2 additions & 1 deletion b/‎sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/Command.scala‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/Statistics.scala‎
Lines changed: 7 additions & 8 deletions b/‎sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/Statistics.scala‎
Lines changed: 7 additions & 8 deletions
diff --git a/‎sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala‎
Lines changed: 6 additions & 6 deletions b/‎sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala‎
Lines changed: 6 additions & 6 deletions
diff --git a/‎sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala‎
Lines changed: 40 additions & 8 deletions b/‎sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala‎
Lines changed: 40 additions & 8 deletions
diff --git a/‎sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala‎
Lines changed: 4 additions & 3 deletions b/‎sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala‎
Lines changed: 4 additions & 3 deletions
diff --git a/‎sql/core/src/main/scala/org/apache/spark/sql/execution/SQLExecution.scala‎
Lines changed: 13 additions & 0 deletions b/‎sql/core/src/main/scala/org/apache/spark/sql/execution/SQLExecution.scala‎
Lines changed: 13 additions & 0 deletions
diff --git a/‎sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala‎
Lines changed: 1 addition & 1 deletion b/‎sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/InMemoryRelation.scala‎
Lines changed: 1 addition & 1 deletion b/‎sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/InMemoryRelation.scala‎
Lines changed: 1 addition & 1 deletion
@@ -85,12 +85,10 @@ private[kafka010] object KafkaWriter extends Logging {
       topic: Option[String] = None): Unit = {
     val schema = queryExecution.analyzed.output
     validateQuery(queryExecution, kafkaParameters, topic)
-    SQLExecution.withNewExecutionId(sparkSession, queryExecution) {
-      queryExecution.toRdd.foreachPartition { iter =>
-        val writeTask = new KafkaWriteTask(kafkaParameters, schema, topic)
-        Utils.tryWithSafeFinally(block = writeTask.execute(iter))(
-          finallyBlock = writeTask.close())
-      }
+    queryExecution.toRdd.foreachPartition { iter =>
+      val writeTask = new KafkaWriteTask(kafkaParameters, schema, topic)
+      Utils.tryWithSafeFinally(block = writeTask.execute(iter))(
+        finallyBlock = writeTask.close())
     }
   }
 }
@@ -357,7 +357,7 @@ abstract class QueryPlan[PlanType <: QueryPlan[PlanType]] extends TreeNode[PlanT
     })
   }
 
-  override protected def innerChildren: Seq[QueryPlan[_]] = subqueries
+  override def innerChildren: Seq[QueryPlan[_]] = subqueries
 
   /**
    * Returns a plan where a best effort attempt has been made to transform `this` in a way
 
@@ -24,6 +24,7 @@ import org.apache.spark.sql.catalyst.expressions.Attribute
  * commands can be used by parsers to represent DDL operations.  Commands, unlike queries, are
  * eagerly executed.
  */
-trait Command extends LeafNode {
+trait Command extends LogicalPlan {
   override def output: Seq[Attribute] = Seq.empty
+  override def children: Seq[LogicalPlan] = Seq.empty
 }
@@ -22,7 +22,8 @@ import java.math.{MathContext, RoundingMode}
 import scala.util.control.NonFatal
 
 import org.apache.spark.internal.Logging
-import org.apache.spark.sql.{AnalysisException, Row}
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.aggregate._
 import org.apache.spark.sql.catalyst.util.DateTimeUtils
@@ -243,9 +244,9 @@ object ColumnStat extends Logging {
     }
 
     col.dataType match {
-      case _: IntegralType => fixedLenTypeStruct(LongType)
+      case dt: IntegralType => fixedLenTypeStruct(dt)
       case _: DecimalType => fixedLenTypeStruct(col.dataType)
-      case DoubleType | FloatType => fixedLenTypeStruct(DoubleType)
+      case dt @ (DoubleType | FloatType) => fixedLenTypeStruct(dt)
       case BooleanType => fixedLenTypeStruct(col.dataType)
       case DateType => fixedLenTypeStruct(col.dataType)
       case TimestampType => fixedLenTypeStruct(col.dataType)
@@ -264,14 +265,12 @@ object ColumnStat extends Logging {
   }
 
   /** Convert a struct for column stats (defined in statExprs) into [[ColumnStat]]. */
-  def rowToColumnStat(row: Row, attr: Attribute): ColumnStat = {
+  def rowToColumnStat(row: InternalRow, attr: Attribute): ColumnStat = {
     ColumnStat(
       distinctCount = BigInt(row.getLong(0)),
       // for string/binary min/max, get should return null
-      min = Option(row.get(1))
-        .map(v => fromExternalString(v.toString, attr.name, attr.dataType)).flatMap(Option.apply),
-      max = Option(row.get(2))
-        .map(v => fromExternalString(v.toString, attr.name, attr.dataType)).flatMap(Option.apply),
+      min = Option(row.get(1, attr.dataType)),
+      max = Option(row.get(2, attr.dataType)),
       nullCount = BigInt(row.getLong(3)),
       avgLen = row.getLong(4),
       maxLen = row.getLong(5)
 
@@ -26,6 +26,7 @@ import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.catalyst.analysis.{EliminateSubqueryAliases, UnresolvedRelation}
 import org.apache.spark.sql.catalyst.catalog.{BucketSpec, CatalogRelation, CatalogTable, CatalogTableType}
 import org.apache.spark.sql.catalyst.plans.logical.{InsertIntoTable, LogicalPlan}
+import org.apache.spark.sql.execution.SQLExecution
 import org.apache.spark.sql.execution.command.DDLUtils
 import org.apache.spark.sql.execution.datasources.{CreateTable, DataSource, LogicalRelation, SaveIntoDataSourceCommand}
 import org.apache.spark.sql.sources.BaseRelation
@@ -231,12 +232,11 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) {
     assertNotBucketed("save")
 
     runCommand(df.sparkSession, "save") {
-      SaveIntoDataSourceCommand(
-        query = df.logicalPlan,
-        provider = source,
+      DataSource(
+        sparkSession = df.sparkSession,
+        className = source,
         partitionColumns = partitioningColumns.getOrElse(Nil),
-        options = extraOptions.toMap,
-        mode = mode)
+        options = extraOptions.toMap).planForWriting(mode, df.logicalPlan)
     }
   }
 
@@ -607,7 +607,7 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) {
     try {
       val start = System.nanoTime()
       // call `QueryExecution.toRDD` to trigger the execution of commands.
-      qe.toRdd
+      SQLExecution.withNewExecutionId(session, qe)(qe.toRdd)
       val end = System.nanoTime()
       session.listenerManager.onSuccess(name, qe, end - start)
     } catch {
 
@@ -179,9 +179,9 @@ class Dataset[T] private[sql](
     // to happen right away to let these side effects take place eagerly.
     queryExecution.analyzed match {
       case c: Command =>
-        LocalRelation(c.output, queryExecution.executedPlan.executeCollect())
+        LocalRelation(c.output, withAction("command", queryExecution)(_.executeCollect()))
       case u @ Union(children) if children.forall(_.isInstanceOf[Command]) =>
-        LocalRelation(u.output, queryExecution.executedPlan.executeCollect())
+        LocalRelation(u.output, withAction("command", queryExecution)(_.executeCollect()))
       case _ =>
         queryExecution.analyzed
     }
@@ -248,8 +248,13 @@ class Dataset[T] private[sql](
       _numRows: Int, truncate: Int = 20, vertical: Boolean = false): String = {
     val numRows = _numRows.max(0)
     val takeResult = toDF().take(numRows + 1)
-    val hasMoreData = takeResult.length > numRows
-    val data = takeResult.take(numRows)
+    showString(takeResult, numRows, truncate, vertical)
+  }
+
+  private def showString(
+      dataWithOneMoreRow: Array[Row], numRows: Int, truncate: Int, vertical: Boolean): String = {
+    val hasMoreData = dataWithOneMoreRow.length > numRows
+    val data = dataWithOneMoreRow.take(numRows)
 
     lazy val timeZone =
       DateTimeUtils.getTimeZone(sparkSession.sessionState.conf.sessionLocalTimeZone)
@@ -684,6 +689,18 @@ class Dataset[T] private[sql](
   } else {
     println(showString(numRows, truncate = 0))
   }
+
+  // An internal version of `show`, which won't set execution id and trigger listeners.
+  private[sql] def showInternal(_numRows: Int, truncate: Boolean): Unit = {
+    val numRows = _numRows.max(0)
+    val takeResult = toDF().takeInternal(numRows + 1)
+
+    if (truncate) {
+      println(showString(takeResult, numRows, truncate = 20, vertical = false))
+    } else {
+      println(showString(takeResult, numRows, truncate = 0, vertical = false))
+    }
+  }
   // scalastyle:on println
 
   /**
@@ -2453,6 +2470,11 @@ class Dataset[T] private[sql](
    */
   def take(n: Int): Array[T] = head(n)
 
+  // An internal version of `take`, which won't set execution id and trigger listeners.
+  private[sql] def takeInternal(n: Int): Array[T] = {
+    collectFromPlan(limit(n).queryExecution.executedPlan)
+  }
+
   /**
    * Returns the first `n` rows in the Dataset as a list.
    *
@@ -2477,6 +2499,11 @@ class Dataset[T] private[sql](
    */
   def collect(): Array[T] = withAction("collect", queryExecution)(collectFromPlan)
 
+  // An internal version of `collect`, which won't set execution id and trigger listeners.
+  private[sql] def collectInternal(): Array[T] = {
+    collectFromPlan(queryExecution.executedPlan)
+  }
+
   /**
    * Returns a Java list that contains all rows in this Dataset.
    *
@@ -2518,6 +2545,11 @@ class Dataset[T] private[sql](
     plan.executeCollect().head.getLong(0)
   }
 
+  // An internal version of `count`, which won't set execution id and trigger listeners.
+  private[sql] def countInternal(): Long = {
+    groupBy().count().queryExecution.executedPlan.executeCollect().head.getLong(0)
+  }
+
   /**
    * Returns a new Dataset that has exactly `numPartitions` partitions.
    *
@@ -2763,7 +2795,7 @@ class Dataset[T] private[sql](
     createTempViewCommand(viewName, replace = true, global = true)
   }
 
-  private def createTempViewCommand(
+  private[spark] def createTempViewCommand(
       viewName: String,
       replace: Boolean,
       global: Boolean): CreateViewCommand = {
@@ -2954,17 +2986,17 @@ class Dataset[T] private[sql](
   }
 
   /** A convenient function to wrap a logical plan and produce a DataFrame. */
-  @inline private def withPlan(logicalPlan: => LogicalPlan): DataFrame = {
+  @inline private def withPlan(logicalPlan: LogicalPlan): DataFrame = {
     Dataset.ofRows(sparkSession, logicalPlan)
   }
 
   /** A convenient function to wrap a logical plan and produce a Dataset. */
-  @inline private def withTypedPlan[U : Encoder](logicalPlan: => LogicalPlan): Dataset[U] = {
+  @inline private def withTypedPlan[U : Encoder](logicalPlan: LogicalPlan): Dataset[U] = {
     Dataset(sparkSession, logicalPlan)
   }
 
   /** A convenient function to wrap a set based logical plan and produce a Dataset. */
-  @inline private def withSetOperator[U : Encoder](logicalPlan: => LogicalPlan): Dataset[U] = {
+  @inline private def withSetOperator[U : Encoder](logicalPlan: LogicalPlan): Dataset[U] = {
     if (classTag.runtimeClass.isAssignableFrom(classOf[Row])) {
       // Set operators widen types (change the schema), so we cannot reuse the row encoder.
       Dataset.ofRows(sparkSession, logicalPlan).asInstanceOf[Dataset[U]]
 
@@ -113,10 +113,11 @@ class QueryExecution(val sparkSession: SparkSession, val logical: LogicalPlan) {
 
 
   /**
-   * Returns the result as a hive compatible sequence of strings. This is for testing only.
+   * Returns the result as a hive compatible sequence of strings. This is used in tests and
+   * `SparkSQLDriver` for CLI applications.
    */
   def hiveResultString(): Seq[String] = executedPlan match {
-    case ExecutedCommandExec(desc: DescribeTableCommand) =>
+    case ExecutedCommandExec(desc: DescribeTableCommand, _) =>
       // If it is a describe command for a Hive table, we want to have the output format
       // be similar with Hive.
       desc.run(sparkSession).map {
@@ -127,7 +128,7 @@ class QueryExecution(val sparkSession: SparkSession, val logical: LogicalPlan) {
             .mkString("\t")
       }
     // SHOW TABLES in Hive only output table names, while ours output database, table name, isTemp.
-    case command @ ExecutedCommandExec(s: ShowTablesCommand) if !s.isExtended =>
+    case command @ ExecutedCommandExec(s: ShowTablesCommand, _) if !s.isExtended =>
       command.executeCollect().map(_.getString(1))
     case other =>
       val result: Seq[Seq[Any]] = other.executeCollectPublic().map(_.toSeq).toSeq
 
@@ -39,6 +39,19 @@ object SQLExecution {
     executionIdToQueryExecution.get(executionId)
   }
 
+  private val testing = sys.props.contains("spark.testing")
+
+  private[sql] def checkSQLExecutionId(sparkSession: SparkSession): Unit = {
+    // only throw an exception during tests. a missing execution ID should not fail a job.
+    if (testing && sparkSession.sparkContext.getLocalProperty(EXECUTION_ID_KEY) == null) {
+      // Attention testers: when a test fails with this exception, it means that the action that
+      // started execution of a query didn't call withNewExecutionId. The execution ID should be
+      // set by calling withNewExecutionId in the action that begins execution, like
+      // Dataset.collect or DataFrameWriter.insertInto.
+      throw new IllegalStateException("Execution ID should be set")
+    }
+  }
+
   /**
    * Wrap an action that will execute "queryExecution" to track all Spark jobs in the body so that
    * we can connect them with an execution.
 
@@ -346,7 +346,7 @@ abstract class SparkStrategies extends QueryPlanner[SparkPlan] {
   // Can we automate these 'pass through' operations?
   object BasicOperators extends Strategy {
     def apply(plan: LogicalPlan): Seq[SparkPlan] = plan match {
-      case r: RunnableCommand => ExecutedCommandExec(r) :: Nil
+      case r: RunnableCommand => ExecutedCommandExec(r, r.children.map(planLater)) :: Nil
 
       case MemoryPlan(sink, output) =>
         val encoder = RowEncoder(sink.schema)
 
@@ -64,7 +64,7 @@ case class InMemoryRelation(
     val batchStats: LongAccumulator = child.sqlContext.sparkContext.longAccumulator)
   extends logical.LeafNode with MultiInstanceRelation {
 
-  override protected def innerChildren: Seq[SparkPlan] = Seq(child)
+  override def innerChildren: Seq[SparkPlan] = Seq(child)
 
   override def producedAttributes: AttributeSet = outputSet
Original file line number	Diff line number	Diff line change
`@@ -85,12 +85,10 @@ private[kafka010] object KafkaWriter extends Logging {`
`85`	`85`	`topic: Option[String] = None): Unit = {`
`86`	`86`	`val schema = queryExecution.analyzed.output`
`87`	`87`	`validateQuery(queryExecution, kafkaParameters, topic)`
`88`		`- SQLExecution.withNewExecutionId(sparkSession, queryExecution) {`
`89`		`- queryExecution.toRdd.foreachPartition { iter =>`
`90`		`- val writeTask = new KafkaWriteTask(kafkaParameters, schema, topic)`
`91`		`- Utils.tryWithSafeFinally(block = writeTask.execute(iter))(`
`92`		`- finallyBlock = writeTask.close())`
`93`		`- }`
	`88`	`+ queryExecution.toRdd.foreachPartition { iter =>`
	`89`	`+ val writeTask = new KafkaWriteTask(kafkaParameters, schema, topic)`
	`90`	`+ Utils.tryWithSafeFinally(block = writeTask.execute(iter))(`
	`91`	`+ finallyBlock = writeTask.close())`
`94`	`92`	`}`
`95`	`93`	`}`
`96`	`94`	`}`
Original file line number	Diff line number	Diff line change
`@@ -357,7 +357,7 @@ abstract class QueryPlan[PlanType <: QueryPlan[PlanType]] extends TreeNode[PlanT`
`357`	`357`	`})`
`358`	`358`	`}`
`359`	`359`
`360`		`- override protected def innerChildren: Seq[QueryPlan[_]] = subqueries`
	`360`	`+ override def innerChildren: Seq[QueryPlan[_]] = subqueries`
`361`	`361`
`362`	`362`	`/**`
`363`	`363`	* Returns a plan where a best effort attempt has been made to transform `this` in a way
Original file line number	Diff line number	Diff line change
`@@ -24,6 +24,7 @@ import org.apache.spark.sql.catalyst.expressions.Attribute`
`24`	`24`	`* commands can be used by parsers to represent DDL operations. Commands, unlike queries, are`
`25`	`25`	`* eagerly executed.`
`26`	`26`	`*/`
`27`		`-trait Command extends LeafNode {`
	`27`	`+trait Command extends LogicalPlan {`
`28`	`28`	`override def output: Seq[Attribute] = Seq.empty`
	`29`	`+ override def children: Seq[LogicalPlan] = Seq.empty`
`29`	`30`	`}`