Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions docs/sql-migration-guide.md
Original file line number Diff line number Diff line change
Expand Up @@ -328,6 +328,8 @@ license: |

- Since Spark 3.0, `SHOW TBLPROPERTIES` will cause `AnalysisException` if the table does not exist. In Spark version 2.4 and earlier, this scenario caused `NoSuchTableException`. Also, `SHOW TBLPROPERTIES` on a temporary view will cause `AnalysisException`. In Spark version 2.4 and earlier, it returned an empty result.

- Since Spark 3.0, `SHOW CREATE TABLE` will always return Spark DDL, even when the given table is a Hive serde table. For Hive DDL, please use `SHOW CREATE TABLE AS SERDE` command instead.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For Hive DDL -> For generating Hive DDL ?


## Upgrading from Spark SQL 2.4.4 to 2.4.5

- Since Spark 2.4.5, `TRUNCATE TABLE` command tries to set back original permission and ACLs during re-creating the table/partition paths. To restore the behaviour of earlier versions, set `spark.sql.truncateTable.ignorePermissionAcl.enabled` to `true`.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -210,7 +210,7 @@ statement
| SHOW PARTITIONS multipartIdentifier partitionSpec? #showPartitions
| SHOW identifier? FUNCTIONS
(LIKE? (multipartIdentifier | pattern=STRING))? #showFunctions
| SHOW CREATE TABLE multipartIdentifier #showCreateTable
| SHOW CREATE TABLE multipartIdentifier (AS SERDE)? #showCreateTable
| SHOW CURRENT NAMESPACE #showCurrentNamespace
| (DESC | DESCRIBE) FUNCTION EXTENDED? describeFuncName #describeFunction
| (DESC | DESCRIBE) namespace EXTENDED?
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3215,7 +3215,7 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging
* Creates a [[ShowCreateTableStatement]]
*/
override def visitShowCreateTable(ctx: ShowCreateTableContext): LogicalPlan = withOrigin(ctx) {
ShowCreateTableStatement(visitMultipartIdentifier(ctx.multipartIdentifier()))
ShowCreateTableStatement(visitMultipartIdentifier(ctx.multipartIdentifier()), ctx.SERDE != null)
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -389,7 +389,9 @@ case class LoadDataStatement(
/**
* A SHOW CREATE TABLE statement, as parsed from SQL.
*/
case class ShowCreateTableStatement(tableName: Seq[String]) extends ParsedStatement
case class ShowCreateTableStatement(
tableName: Seq[String],
asSerde: Boolean = false) extends ParsedStatement

/**
* A CACHE TABLE statement, as parsed from SQL
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -378,10 +378,14 @@ class ResolveSessionCatalog(
isOverwrite,
partition)

case ShowCreateTableStatement(tbl) =>
case ShowCreateTableStatement(tbl, asSerde) if !asSerde =>
val v1TableName = parseV1Table(tbl, "SHOW CREATE TABLE")
ShowCreateTableCommand(v1TableName.asTableIdentifier)

case ShowCreateTableStatement(tbl, asSerde) if asSerde =>
val v1TableName = parseV1Table(tbl, "SHOW CREATE TABLE AS SERDE")
ShowCreateTableAsSerdeCommand(v1TableName.asTableIdentifier)

case CacheTableStatement(tbl, plan, isLazy, options) =>
val v1TableName = parseV1Table(tbl, "CACHE TABLE")
CacheTableCommand(v1TableName.asTableIdentifier, plan, isLazy, options)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ import org.apache.spark.sql.execution.datasources.v2.csv.CSVDataSourceV2
import org.apache.spark.sql.execution.datasources.v2.json.JsonDataSourceV2
import org.apache.spark.sql.execution.datasources.v2.orc.OrcDataSourceV2
import org.apache.spark.sql.execution.datasources.v2.parquet.ParquetDataSourceV2
import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.internal.{HiveSerDe, SQLConf}
import org.apache.spark.sql.types._
import org.apache.spark.sql.util.SchemaUtils

Expand Down Expand Up @@ -1002,7 +1002,57 @@ case class ShowPartitionsCommand(
}
}

case class ShowCreateTableCommand(table: TableIdentifier) extends RunnableCommand {
/**
* Provides common utilities between `ShowCreateTableCommand` and `ShowCreateTableAsSparkCommand`.
*/
trait ShowCreateTableCommandBase {

protected val table: TableIdentifier

protected def showTableLocation(metadata: CatalogTable, builder: StringBuilder): Unit = {
if (metadata.tableType == EXTERNAL) {
metadata.storage.locationUri.foreach { location =>
builder ++= s"LOCATION '${escapeSingleQuotedString(CatalogUtils.URIToString(location))}'\n"
}
}
}

protected def showTableComment(metadata: CatalogTable, builder: StringBuilder): Unit = {
metadata
.comment
.map("COMMENT '" + escapeSingleQuotedString(_) + "'\n")
.foreach(builder.append)
}

protected def showTableProperties(metadata: CatalogTable, builder: StringBuilder): Unit = {
if (metadata.properties.nonEmpty) {
val props = metadata.properties.map { case (key, value) =>
s"'${escapeSingleQuotedString(key)}' = '${escapeSingleQuotedString(value)}'"
}

builder ++= "TBLPROPERTIES "
builder ++= concatByMultiLines(props)
}
}


protected def concatByMultiLines(iter: Iterable[String]): String = {
iter.mkString("(\n ", ",\n ", ")\n")
}
}

/**
* A command that shows the Spark DDL syntax that can be used to create a given table.
* For Hive serde table, this command will generate Spark DDL that can be used to
* create corresponding Spark table.
*
* The syntax of using this command in SQL is:
* {{{
* SHOW CREATE TABLE [db_name.]table_name
* }}}
*/
case class ShowCreateTableCommand(table: TableIdentifier)
extends RunnableCommand with ShowCreateTableCommandBase {
override val output: Seq[Attribute] = Seq(
AttributeReference("createtab_stmt", StringType, nullable = false)()
)
Expand All @@ -1017,16 +1067,154 @@ case class ShowCreateTableCommand(table: TableIdentifier) extends RunnableComman

// TODO: [SPARK-28692] unify this after we unify the
// CREATE TABLE syntax for hive serde and data source table.
val stmt = if (DDLUtils.isDatasourceTable(tableMetadata)) {
showCreateDataSourceTable(tableMetadata)
val metadata = if (DDLUtils.isDatasourceTable(tableMetadata)) {
tableMetadata
} else {
showCreateHiveTable(tableMetadata)
// For a Hive serde table, we try to convert it to Spark DDL.
if (tableMetadata.unsupportedFeatures.nonEmpty) {
throw new AnalysisException(
"Failed to execute SHOW CREATE TABLE against table " +
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This error message is not useful to users as they don't know what to do to make their query work again in 3.0. Can we follow https://github.com/apache/spark/pull/24938/files#diff-a53c8b7022d13417a2ef33372464f9b5R1210 and ask users to run SHOW CREATE TABLE with AS SERDE

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OK. It is too late now in my timezone. I will create a follow-up tomorrow.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

cool, thanks!

s"${tableMetadata.identifier}, which is created by Hive and uses the " +
"following unsupported feature(s)\n" +
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we improve the exception message? Let end users know what are the new syntax for CREATE HIVE SERDE table.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ok. Let me create a follow-up.

tableMetadata.unsupportedFeatures.map(" - " + _).mkString("\n")
)
}

if (tableMetadata.tableType == VIEW) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ditto

throw new AnalysisException("Hive view isn't supported by SHOW CREATE TABLE")
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we just create Spark View?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This requires more change other than simple message/doc change. Is this required to be in 3.0.0 too? If so, how much time we have?

}

if ("true".equalsIgnoreCase(tableMetadata.properties.getOrElse("transactional", "false"))) {
throw new AnalysisException(
"SHOW CREATE TABLE doesn't support transactional Hive table")
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The same here. Let end users know what are the workaround, i.e., new syntax.

}

convertTableMetadata(tableMetadata)
}

val stmt = showCreateDataSourceTable(metadata)

Seq(Row(stmt))
}
}

private def convertTableMetadata(tableMetadata: CatalogTable): CatalogTable = {
val hiveSerde = HiveSerDe(
serde = tableMetadata.storage.serde,
inputFormat = tableMetadata.storage.inputFormat,
outputFormat = tableMetadata.storage.outputFormat)

// Looking for Spark data source that maps to to the Hive serde.
// TODO: some Hive fileformat + row serde might be mapped to Spark data source, e.g. CSV.
val source = HiveSerDe.serdeToSource(hiveSerde)
if (source.isEmpty) {
val builder = StringBuilder.newBuilder
hiveSerde.serde.foreach { serde =>
builder ++= s" SERDE: $serde"
}
hiveSerde.inputFormat.foreach { format =>
builder ++= s" INPUTFORMAT: $format"
}
hiveSerde.outputFormat.foreach { format =>
builder ++= s" OUTPUTFORMAT: $format"
}
throw new AnalysisException(
"Failed to execute SHOW CREATE TABLE against table " +
s"${tableMetadata.identifier}, which is created by Hive and uses the " +
"following unsupported serde configuration\n" +
builder.toString()
)
} else {
// TODO: should we keep Hive serde properties?
val newStorage = tableMetadata.storage.copy(properties = Map.empty)
tableMetadata.copy(provider = source, storage = newStorage)
}
}

private def showDataSourceTableDataColumns(
metadata: CatalogTable, builder: StringBuilder): Unit = {
val columns = metadata.schema.fields.map(_.toDDL)
builder ++= concatByMultiLines(columns)
}

private def showDataSourceTableOptions(metadata: CatalogTable, builder: StringBuilder): Unit = {
// For datasource table, there is a provider there in the metadata.
// If it is a Hive table, we already convert its metadata and fill in a provider.
builder ++= s"USING ${metadata.provider.get}\n"
Copy link
Member

@gengliangwang gengliangwang Jan 31, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: it would be better to add comments or an assertion here to explain that the provider is always defined.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok. Added comments for that.


val dataSourceOptions = SQLConf.get.redactOptions(metadata.storage.properties).map {
case (key, value) => s"${quoteIdentifier(key)} '${escapeSingleQuotedString(value)}'"
}

if (dataSourceOptions.nonEmpty) {
builder ++= "OPTIONS "
builder ++= concatByMultiLines(dataSourceOptions)
}
}

private def showDataSourceTableNonDataColumns(
metadata: CatalogTable, builder: StringBuilder): Unit = {
val partCols = metadata.partitionColumnNames
if (partCols.nonEmpty) {
builder ++= s"PARTITIONED BY ${partCols.mkString("(", ", ", ")")}\n"
}

metadata.bucketSpec.foreach { spec =>
if (spec.bucketColumnNames.nonEmpty) {
builder ++= s"CLUSTERED BY ${spec.bucketColumnNames.mkString("(", ", ", ")")}\n"

if (spec.sortColumnNames.nonEmpty) {
builder ++= s"SORTED BY ${spec.sortColumnNames.mkString("(", ", ", ")")}\n"
}

builder ++= s"INTO ${spec.numBuckets} BUCKETS\n"
}
}
}

private def showCreateDataSourceTable(metadata: CatalogTable): String = {
val builder = StringBuilder.newBuilder

builder ++= s"CREATE TABLE ${table.quotedString} "
showDataSourceTableDataColumns(metadata, builder)
showDataSourceTableOptions(metadata, builder)
showDataSourceTableNonDataColumns(metadata, builder)
showTableComment(metadata, builder)
showTableLocation(metadata, builder)
showTableProperties(metadata, builder)

builder.toString()
}
}

/**
* This commands generates the DDL for Hive serde table.
*
* The syntax of using this command in SQL is:
* {{{
* SHOW CREATE TABLE table_identifier AS SERDE;
* }}}
*/
case class ShowCreateTableAsSerdeCommand(table: TableIdentifier)
extends RunnableCommand with ShowCreateTableCommandBase {
override val output: Seq[Attribute] = Seq(
AttributeReference("createtab_stmt", StringType, nullable = false)()
)

override def run(sparkSession: SparkSession): Seq[Row] = {
val catalog = sparkSession.sessionState.catalog
val tableMetadata = catalog.getTableMetadata(table)

val stmt = if (DDLUtils.isDatasourceTable(tableMetadata)) {
throw new AnalysisException(
s"$table is a Spark data source table. Use `SHOW CREATE TABLE` without `AS SERDE` instead.")
} else {
showCreateHiveTable(tableMetadata)
}

Seq(Row(stmt))
}

private def showCreateHiveTable(metadata: CatalogTable): String = {
def reportUnsupportedError(features: Seq[String]): Unit = {
throw new AnalysisException(
Expand Down Expand Up @@ -1055,12 +1243,12 @@ case class ShowCreateTableCommand(table: TableIdentifier) extends RunnableComman

if (metadata.tableType == VIEW) {
showViewDataColumns(metadata, builder)
showComment(metadata, builder)
showTableComment(metadata, builder)
showViewProperties(metadata, builder)
showViewText(metadata, builder)
} else {
showHiveTableHeader(metadata, builder)
showComment(metadata, builder)
showTableComment(metadata, builder)
showHiveTableNonDataColumns(metadata, builder)
showHiveTableStorageInfo(metadata, builder)
showTableLocation(metadata, builder)
Expand All @@ -1084,10 +1272,6 @@ case class ShowCreateTableCommand(table: TableIdentifier) extends RunnableComman
}
}

private def concatByMultiLines(iter: Iterable[String]): String = {
iter.mkString("(\n ", ",\n ", ")\n")
}

private def showViewProperties(metadata: CatalogTable, builder: StringBuilder): Unit = {
val viewProps = metadata.properties.filterKeys(!_.startsWith(CatalogTable.VIEW_PREFIX))
if (viewProps.nonEmpty) {
Expand Down Expand Up @@ -1136,7 +1320,7 @@ case class ShowCreateTableCommand(table: TableIdentifier) extends RunnableComman
storage.serde.foreach { serde =>
builder ++= s"ROW FORMAT SERDE '$serde'\n"

val serdeProps = metadata.storage.properties.map {
val serdeProps = SQLConf.get.redactOptions(metadata.storage.properties).map {
case (key, value) =>
s"'${escapeSingleQuotedString(key)}' = '${escapeSingleQuotedString(value)}'"
}
Expand All @@ -1156,81 +1340,4 @@ case class ShowCreateTableCommand(table: TableIdentifier) extends RunnableComman
}
}
}

private def showTableLocation(metadata: CatalogTable, builder: StringBuilder): Unit = {
if (metadata.tableType == EXTERNAL) {
metadata.storage.locationUri.foreach { location =>
builder ++= s"LOCATION '${escapeSingleQuotedString(CatalogUtils.URIToString(location))}'\n"
}
}
}

private def showComment(metadata: CatalogTable, builder: StringBuilder): Unit = {
metadata
.comment
.map("COMMENT '" + escapeSingleQuotedString(_) + "'\n")
.foreach(builder.append)
}

private def showTableProperties(metadata: CatalogTable, builder: StringBuilder): Unit = {
if (metadata.properties.nonEmpty) {
val props = metadata.properties.map { case (key, value) =>
s"'${escapeSingleQuotedString(key)}' = '${escapeSingleQuotedString(value)}'"
}

builder ++= s"TBLPROPERTIES ${concatByMultiLines(props)}"
}
}

private def showCreateDataSourceTable(metadata: CatalogTable): String = {
val builder = StringBuilder.newBuilder

builder ++= s"CREATE TABLE ${table.quotedString} "
showDataSourceTableDataColumns(metadata, builder)
showDataSourceTableOptions(metadata, builder)
showDataSourceTableNonDataColumns(metadata, builder)
showComment(metadata, builder)
showTableLocation(metadata, builder)
showTableProperties(metadata, builder)

builder.toString()
}

private def showDataSourceTableDataColumns(
metadata: CatalogTable, builder: StringBuilder): Unit = {
val columns = metadata.schema.fields.map(_.toDDL)
builder ++= concatByMultiLines(columns)
}

private def showDataSourceTableOptions(metadata: CatalogTable, builder: StringBuilder): Unit = {
builder ++= s"USING ${metadata.provider.get}\n"

val dataSourceOptions = SQLConf.get.redactOptions(metadata.storage.properties).map {
case (key, value) => s"${quoteIdentifier(key)} '${escapeSingleQuotedString(value)}'"
}

if (dataSourceOptions.nonEmpty) {
builder ++= s"OPTIONS ${concatByMultiLines(dataSourceOptions)}"
}
}

private def showDataSourceTableNonDataColumns(
metadata: CatalogTable, builder: StringBuilder): Unit = {
val partCols = metadata.partitionColumnNames
if (partCols.nonEmpty) {
builder ++= s"PARTITIONED BY ${partCols.mkString("(", ", ", ")")}\n"
}

metadata.bucketSpec.foreach { spec =>
if (spec.bucketColumnNames.nonEmpty) {
builder ++= s"CLUSTERED BY ${spec.bucketColumnNames.mkString("(", ", ", ")")}\n"

if (spec.sortColumnNames.nonEmpty) {
builder ++= s"SORTED BY ${spec.sortColumnNames.mkString("(", ", ", ")")}\n"
}

builder ++= s"INTO ${spec.numBuckets} BUCKETS\n"
}
}
}
}
Loading