diff --git a/docs/sql-migration-guide.md b/docs/sql-migration-guide.md index 18e7df7aca5aa..533c96a0832de 100644 --- a/docs/sql-migration-guide.md +++ b/docs/sql-migration-guide.md @@ -328,6 +328,8 @@ license: | - Since Spark 3.0, `SHOW TBLPROPERTIES` will cause `AnalysisException` if the table does not exist. In Spark version 2.4 and earlier, this scenario caused `NoSuchTableException`. Also, `SHOW TBLPROPERTIES` on a temporary view will cause `AnalysisException`. In Spark version 2.4 and earlier, it returned an empty result. + - Since Spark 3.0, `SHOW CREATE TABLE` will always return Spark DDL, even when the given table is a Hive serde table. For Hive DDL, please use `SHOW CREATE TABLE AS SERDE` command instead. + ## Upgrading from Spark SQL 2.4.4 to 2.4.5 - Since Spark 2.4.5, `TRUNCATE TABLE` command tries to set back original permission and ACLs during re-creating the table/partition paths. To restore the behaviour of earlier versions, set `spark.sql.truncateTable.ignorePermissionAcl.enabled` to `true`. diff --git a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 index 358943fc484ca..6f2bb7a9a7536 100644 --- a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 +++ b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 @@ -210,7 +210,7 @@ statement | SHOW PARTITIONS multipartIdentifier partitionSpec? #showPartitions | SHOW identifier? FUNCTIONS (LIKE? (multipartIdentifier | pattern=STRING))? #showFunctions - | SHOW CREATE TABLE multipartIdentifier #showCreateTable + | SHOW CREATE TABLE multipartIdentifier (AS SERDE)? #showCreateTable | SHOW CURRENT NAMESPACE #showCurrentNamespace | (DESC | DESCRIBE) FUNCTION EXTENDED? describeFuncName #describeFunction | (DESC | DESCRIBE) namespace EXTENDED? diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index 967eca77145e2..e9ad84472904d 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -3215,7 +3215,7 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging * Creates a [[ShowCreateTableStatement]] */ override def visitShowCreateTable(ctx: ShowCreateTableContext): LogicalPlan = withOrigin(ctx) { - ShowCreateTableStatement(visitMultipartIdentifier(ctx.multipartIdentifier())) + ShowCreateTableStatement(visitMultipartIdentifier(ctx.multipartIdentifier()), ctx.SERDE != null) } /** diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala index 44f7b4143926d..1e6b67bf78b70 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala @@ -389,7 +389,9 @@ case class LoadDataStatement( /** * A SHOW CREATE TABLE statement, as parsed from SQL. */ -case class ShowCreateTableStatement(tableName: Seq[String]) extends ParsedStatement +case class ShowCreateTableStatement( + tableName: Seq[String], + asSerde: Boolean = false) extends ParsedStatement /** * A CACHE TABLE statement, as parsed from SQL diff --git a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala index 12439fd94c679..486e7f1f84b46 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala @@ -378,10 +378,14 @@ class ResolveSessionCatalog( isOverwrite, partition) - case ShowCreateTableStatement(tbl) => + case ShowCreateTableStatement(tbl, asSerde) if !asSerde => val v1TableName = parseV1Table(tbl, "SHOW CREATE TABLE") ShowCreateTableCommand(v1TableName.asTableIdentifier) + case ShowCreateTableStatement(tbl, asSerde) if asSerde => + val v1TableName = parseV1Table(tbl, "SHOW CREATE TABLE AS SERDE") + ShowCreateTableAsSerdeCommand(v1TableName.asTableIdentifier) + case CacheTableStatement(tbl, plan, isLazy, options) => val v1TableName = parseV1Table(tbl, "CACHE TABLE") CacheTableCommand(v1TableName.asTableIdentifier, plan, isLazy, options) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala index a92fbdf25975b..468ca505cce1f 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala @@ -44,7 +44,7 @@ import org.apache.spark.sql.execution.datasources.v2.csv.CSVDataSourceV2 import org.apache.spark.sql.execution.datasources.v2.json.JsonDataSourceV2 import org.apache.spark.sql.execution.datasources.v2.orc.OrcDataSourceV2 import org.apache.spark.sql.execution.datasources.v2.parquet.ParquetDataSourceV2 -import org.apache.spark.sql.internal.SQLConf +import org.apache.spark.sql.internal.{HiveSerDe, SQLConf} import org.apache.spark.sql.types._ import org.apache.spark.sql.util.SchemaUtils @@ -1002,7 +1002,57 @@ case class ShowPartitionsCommand( } } -case class ShowCreateTableCommand(table: TableIdentifier) extends RunnableCommand { +/** + * Provides common utilities between `ShowCreateTableCommand` and `ShowCreateTableAsSparkCommand`. + */ +trait ShowCreateTableCommandBase { + + protected val table: TableIdentifier + + protected def showTableLocation(metadata: CatalogTable, builder: StringBuilder): Unit = { + if (metadata.tableType == EXTERNAL) { + metadata.storage.locationUri.foreach { location => + builder ++= s"LOCATION '${escapeSingleQuotedString(CatalogUtils.URIToString(location))}'\n" + } + } + } + + protected def showTableComment(metadata: CatalogTable, builder: StringBuilder): Unit = { + metadata + .comment + .map("COMMENT '" + escapeSingleQuotedString(_) + "'\n") + .foreach(builder.append) + } + + protected def showTableProperties(metadata: CatalogTable, builder: StringBuilder): Unit = { + if (metadata.properties.nonEmpty) { + val props = metadata.properties.map { case (key, value) => + s"'${escapeSingleQuotedString(key)}' = '${escapeSingleQuotedString(value)}'" + } + + builder ++= "TBLPROPERTIES " + builder ++= concatByMultiLines(props) + } + } + + + protected def concatByMultiLines(iter: Iterable[String]): String = { + iter.mkString("(\n ", ",\n ", ")\n") + } +} + +/** + * A command that shows the Spark DDL syntax that can be used to create a given table. + * For Hive serde table, this command will generate Spark DDL that can be used to + * create corresponding Spark table. + * + * The syntax of using this command in SQL is: + * {{{ + * SHOW CREATE TABLE [db_name.]table_name + * }}} + */ +case class ShowCreateTableCommand(table: TableIdentifier) + extends RunnableCommand with ShowCreateTableCommandBase { override val output: Seq[Attribute] = Seq( AttributeReference("createtab_stmt", StringType, nullable = false)() ) @@ -1017,16 +1067,154 @@ case class ShowCreateTableCommand(table: TableIdentifier) extends RunnableComman // TODO: [SPARK-28692] unify this after we unify the // CREATE TABLE syntax for hive serde and data source table. - val stmt = if (DDLUtils.isDatasourceTable(tableMetadata)) { - showCreateDataSourceTable(tableMetadata) + val metadata = if (DDLUtils.isDatasourceTable(tableMetadata)) { + tableMetadata } else { - showCreateHiveTable(tableMetadata) + // For a Hive serde table, we try to convert it to Spark DDL. + if (tableMetadata.unsupportedFeatures.nonEmpty) { + throw new AnalysisException( + "Failed to execute SHOW CREATE TABLE against table " + + s"${tableMetadata.identifier}, which is created by Hive and uses the " + + "following unsupported feature(s)\n" + + tableMetadata.unsupportedFeatures.map(" - " + _).mkString("\n") + ) + } + + if (tableMetadata.tableType == VIEW) { + throw new AnalysisException("Hive view isn't supported by SHOW CREATE TABLE") + } + + if ("true".equalsIgnoreCase(tableMetadata.properties.getOrElse("transactional", "false"))) { + throw new AnalysisException( + "SHOW CREATE TABLE doesn't support transactional Hive table") + } + + convertTableMetadata(tableMetadata) } + val stmt = showCreateDataSourceTable(metadata) + Seq(Row(stmt)) } } + private def convertTableMetadata(tableMetadata: CatalogTable): CatalogTable = { + val hiveSerde = HiveSerDe( + serde = tableMetadata.storage.serde, + inputFormat = tableMetadata.storage.inputFormat, + outputFormat = tableMetadata.storage.outputFormat) + + // Looking for Spark data source that maps to to the Hive serde. + // TODO: some Hive fileformat + row serde might be mapped to Spark data source, e.g. CSV. + val source = HiveSerDe.serdeToSource(hiveSerde) + if (source.isEmpty) { + val builder = StringBuilder.newBuilder + hiveSerde.serde.foreach { serde => + builder ++= s" SERDE: $serde" + } + hiveSerde.inputFormat.foreach { format => + builder ++= s" INPUTFORMAT: $format" + } + hiveSerde.outputFormat.foreach { format => + builder ++= s" OUTPUTFORMAT: $format" + } + throw new AnalysisException( + "Failed to execute SHOW CREATE TABLE against table " + + s"${tableMetadata.identifier}, which is created by Hive and uses the " + + "following unsupported serde configuration\n" + + builder.toString() + ) + } else { + // TODO: should we keep Hive serde properties? + val newStorage = tableMetadata.storage.copy(properties = Map.empty) + tableMetadata.copy(provider = source, storage = newStorage) + } + } + + private def showDataSourceTableDataColumns( + metadata: CatalogTable, builder: StringBuilder): Unit = { + val columns = metadata.schema.fields.map(_.toDDL) + builder ++= concatByMultiLines(columns) + } + + private def showDataSourceTableOptions(metadata: CatalogTable, builder: StringBuilder): Unit = { + // For datasource table, there is a provider there in the metadata. + // If it is a Hive table, we already convert its metadata and fill in a provider. + builder ++= s"USING ${metadata.provider.get}\n" + + val dataSourceOptions = SQLConf.get.redactOptions(metadata.storage.properties).map { + case (key, value) => s"${quoteIdentifier(key)} '${escapeSingleQuotedString(value)}'" + } + + if (dataSourceOptions.nonEmpty) { + builder ++= "OPTIONS " + builder ++= concatByMultiLines(dataSourceOptions) + } + } + + private def showDataSourceTableNonDataColumns( + metadata: CatalogTable, builder: StringBuilder): Unit = { + val partCols = metadata.partitionColumnNames + if (partCols.nonEmpty) { + builder ++= s"PARTITIONED BY ${partCols.mkString("(", ", ", ")")}\n" + } + + metadata.bucketSpec.foreach { spec => + if (spec.bucketColumnNames.nonEmpty) { + builder ++= s"CLUSTERED BY ${spec.bucketColumnNames.mkString("(", ", ", ")")}\n" + + if (spec.sortColumnNames.nonEmpty) { + builder ++= s"SORTED BY ${spec.sortColumnNames.mkString("(", ", ", ")")}\n" + } + + builder ++= s"INTO ${spec.numBuckets} BUCKETS\n" + } + } + } + + private def showCreateDataSourceTable(metadata: CatalogTable): String = { + val builder = StringBuilder.newBuilder + + builder ++= s"CREATE TABLE ${table.quotedString} " + showDataSourceTableDataColumns(metadata, builder) + showDataSourceTableOptions(metadata, builder) + showDataSourceTableNonDataColumns(metadata, builder) + showTableComment(metadata, builder) + showTableLocation(metadata, builder) + showTableProperties(metadata, builder) + + builder.toString() + } +} + +/** + * This commands generates the DDL for Hive serde table. + * + * The syntax of using this command in SQL is: + * {{{ + * SHOW CREATE TABLE table_identifier AS SERDE; + * }}} + */ +case class ShowCreateTableAsSerdeCommand(table: TableIdentifier) + extends RunnableCommand with ShowCreateTableCommandBase { + override val output: Seq[Attribute] = Seq( + AttributeReference("createtab_stmt", StringType, nullable = false)() + ) + + override def run(sparkSession: SparkSession): Seq[Row] = { + val catalog = sparkSession.sessionState.catalog + val tableMetadata = catalog.getTableMetadata(table) + + val stmt = if (DDLUtils.isDatasourceTable(tableMetadata)) { + throw new AnalysisException( + s"$table is a Spark data source table. Use `SHOW CREATE TABLE` without `AS SERDE` instead.") + } else { + showCreateHiveTable(tableMetadata) + } + + Seq(Row(stmt)) + } + private def showCreateHiveTable(metadata: CatalogTable): String = { def reportUnsupportedError(features: Seq[String]): Unit = { throw new AnalysisException( @@ -1055,12 +1243,12 @@ case class ShowCreateTableCommand(table: TableIdentifier) extends RunnableComman if (metadata.tableType == VIEW) { showViewDataColumns(metadata, builder) - showComment(metadata, builder) + showTableComment(metadata, builder) showViewProperties(metadata, builder) showViewText(metadata, builder) } else { showHiveTableHeader(metadata, builder) - showComment(metadata, builder) + showTableComment(metadata, builder) showHiveTableNonDataColumns(metadata, builder) showHiveTableStorageInfo(metadata, builder) showTableLocation(metadata, builder) @@ -1084,10 +1272,6 @@ case class ShowCreateTableCommand(table: TableIdentifier) extends RunnableComman } } - private def concatByMultiLines(iter: Iterable[String]): String = { - iter.mkString("(\n ", ",\n ", ")\n") - } - private def showViewProperties(metadata: CatalogTable, builder: StringBuilder): Unit = { val viewProps = metadata.properties.filterKeys(!_.startsWith(CatalogTable.VIEW_PREFIX)) if (viewProps.nonEmpty) { @@ -1136,7 +1320,7 @@ case class ShowCreateTableCommand(table: TableIdentifier) extends RunnableComman storage.serde.foreach { serde => builder ++= s"ROW FORMAT SERDE '$serde'\n" - val serdeProps = metadata.storage.properties.map { + val serdeProps = SQLConf.get.redactOptions(metadata.storage.properties).map { case (key, value) => s"'${escapeSingleQuotedString(key)}' = '${escapeSingleQuotedString(value)}'" } @@ -1156,81 +1340,4 @@ case class ShowCreateTableCommand(table: TableIdentifier) extends RunnableComman } } } - - private def showTableLocation(metadata: CatalogTable, builder: StringBuilder): Unit = { - if (metadata.tableType == EXTERNAL) { - metadata.storage.locationUri.foreach { location => - builder ++= s"LOCATION '${escapeSingleQuotedString(CatalogUtils.URIToString(location))}'\n" - } - } - } - - private def showComment(metadata: CatalogTable, builder: StringBuilder): Unit = { - metadata - .comment - .map("COMMENT '" + escapeSingleQuotedString(_) + "'\n") - .foreach(builder.append) - } - - private def showTableProperties(metadata: CatalogTable, builder: StringBuilder): Unit = { - if (metadata.properties.nonEmpty) { - val props = metadata.properties.map { case (key, value) => - s"'${escapeSingleQuotedString(key)}' = '${escapeSingleQuotedString(value)}'" - } - - builder ++= s"TBLPROPERTIES ${concatByMultiLines(props)}" - } - } - - private def showCreateDataSourceTable(metadata: CatalogTable): String = { - val builder = StringBuilder.newBuilder - - builder ++= s"CREATE TABLE ${table.quotedString} " - showDataSourceTableDataColumns(metadata, builder) - showDataSourceTableOptions(metadata, builder) - showDataSourceTableNonDataColumns(metadata, builder) - showComment(metadata, builder) - showTableLocation(metadata, builder) - showTableProperties(metadata, builder) - - builder.toString() - } - - private def showDataSourceTableDataColumns( - metadata: CatalogTable, builder: StringBuilder): Unit = { - val columns = metadata.schema.fields.map(_.toDDL) - builder ++= concatByMultiLines(columns) - } - - private def showDataSourceTableOptions(metadata: CatalogTable, builder: StringBuilder): Unit = { - builder ++= s"USING ${metadata.provider.get}\n" - - val dataSourceOptions = SQLConf.get.redactOptions(metadata.storage.properties).map { - case (key, value) => s"${quoteIdentifier(key)} '${escapeSingleQuotedString(value)}'" - } - - if (dataSourceOptions.nonEmpty) { - builder ++= s"OPTIONS ${concatByMultiLines(dataSourceOptions)}" - } - } - - private def showDataSourceTableNonDataColumns( - metadata: CatalogTable, builder: StringBuilder): Unit = { - val partCols = metadata.partitionColumnNames - if (partCols.nonEmpty) { - builder ++= s"PARTITIONED BY ${partCols.mkString("(", ", ", ")")}\n" - } - - metadata.bucketSpec.foreach { spec => - if (spec.bucketColumnNames.nonEmpty) { - builder ++= s"CLUSTERED BY ${spec.bucketColumnNames.mkString("(", ", ", ")")}\n" - - if (spec.sortColumnNames.nonEmpty) { - builder ++= s"SORTED BY ${spec.sortColumnNames.mkString("(", ", ", ")")}\n" - } - - builder ++= s"INTO ${spec.numBuckets} BUCKETS\n" - } - } - } } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/HiveSerDe.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/HiveSerDe.scala index 4921e3ca903c4..64b7e7fe7923a 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/internal/HiveSerDe.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/HiveSerDe.scala @@ -65,6 +65,14 @@ object HiveSerDe { outputFormat = Option("org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat"), serde = Option("org.apache.hadoop.hive.serde2.avro.AvroSerDe"))) + // `HiveSerDe` in `serdeMap` should be dintinct. + val serdeInverseMap: Map[HiveSerDe, String] = serdeMap.flatMap { + case ("sequencefile", _) => None + case ("rcfile", _) => None + case ("textfile", serde) => Some((serde, "text")) + case pair => Some(pair.swap) + } + /** * Get the Hive SerDe information from the data source abbreviation string or classname. * @@ -88,6 +96,14 @@ object HiveSerDe { serdeMap.get(key) } + /** + * Get the Spark data source name from the Hive SerDe information. + * + * @param serde Hive SerDe information. + * @return Spark data source name associated with the specified Hive Serde. + */ + def serdeToSource(serde: HiveSerDe): Option[String] = serdeInverseMap.get(serde) + def getDefaultStorage(conf: SQLConf): CatalogStorageFormat = { // To respect hive-site.xml, it peeks Hadoop configuration from existing Spark session, // as an easy workaround. See SPARK-27555. diff --git a/sql/core/src/test/resources/sql-tests/inputs/show-create-table.sql b/sql/core/src/test/resources/sql-tests/inputs/show-create-table.sql index ccb40f8d991b4..dc77f87d9743a 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/show-create-table.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/show-create-table.sql @@ -73,7 +73,7 @@ CREATE TABLE tbl (a INT, b STRING, c INT) USING parquet; CREATE VIEW view_SPARK_30302 (aaa, bbb) AS SELECT a, b FROM tbl; -SHOW CREATE TABLE view_SPARK_30302; +SHOW CREATE TABLE view_SPARK_30302 AS SERDE; DROP VIEW view_SPARK_30302; @@ -82,7 +82,7 @@ CREATE VIEW view_SPARK_30302 (aaa COMMENT 'comment with \'quoted text\' for aaa' COMMENT 'This is a comment with \'quoted text\' for view' AS SELECT a, b FROM tbl; -SHOW CREATE TABLE view_SPARK_30302; +SHOW CREATE TABLE view_SPARK_30302 AS SERDE; DROP VIEW view_SPARK_30302; @@ -91,6 +91,13 @@ CREATE VIEW view_SPARK_30302 (aaa, bbb) TBLPROPERTIES ('a' = '1', 'b' = '2') AS SELECT a, b FROM tbl; +SHOW CREATE TABLE view_SPARK_30302 AS SERDE; +DROP VIEW view_SPARK_30302; + +-- SHOW CREATE TABLE does not support view +CREATE VIEW view_SPARK_30302 (aaa, bbb) +AS SELECT a, b FROM tbl; + SHOW CREATE TABLE view_SPARK_30302; DROP VIEW view_SPARK_30302; diff --git a/sql/core/src/test/resources/sql-tests/results/show-create-table.sql.out b/sql/core/src/test/resources/sql-tests/results/show-create-table.sql.out index 5771f218e3b57..e8ee07171651d 100644 --- a/sql/core/src/test/resources/sql-tests/results/show-create-table.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/show-create-table.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 38 +-- Number of queries: 41 -- !query @@ -291,7 +291,7 @@ struct<> -- !query -SHOW CREATE TABLE view_SPARK_30302 +SHOW CREATE TABLE view_SPARK_30302 AS SERDE -- !query schema struct -- !query output @@ -320,7 +320,7 @@ struct<> -- !query -SHOW CREATE TABLE view_SPARK_30302 +SHOW CREATE TABLE view_SPARK_30302 AS SERDE -- !query schema struct -- !query output @@ -350,7 +350,7 @@ struct<> -- !query -SHOW CREATE TABLE view_SPARK_30302 +SHOW CREATE TABLE view_SPARK_30302 AS SERDE -- !query schema struct -- !query output @@ -371,6 +371,32 @@ struct<> +-- !query +CREATE VIEW view_SPARK_30302 (aaa, bbb) +AS SELECT a, b FROM tbl +-- !query schema +struct<> +-- !query output + + + +-- !query +SHOW CREATE TABLE view_SPARK_30302 +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +Hive view isn't supported by SHOW CREATE TABLE; + + +-- !query +DROP VIEW view_SPARK_30302 +-- !query schema +struct<> +-- !query output + + + -- !query DROP TABLE tbl -- !query schema diff --git a/sql/core/src/test/scala/org/apache/spark/sql/ShowCreateTableSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/ShowCreateTableSuite.scala index d7b489e4fa07d..b3b94f8be0d17 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/ShowCreateTableSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/ShowCreateTableSuite.scala @@ -148,20 +148,6 @@ abstract class ShowCreateTableSuite extends QueryTest with SQLTestUtils { } } - test("view") { - withView("v1") { - sql("CREATE VIEW v1 AS SELECT 1 AS a") - checkCreateView("v1") - } - } - - test("view with output columns") { - withView("v1") { - sql("CREATE VIEW v1 (b) AS SELECT 1 AS a") - checkCreateView("v1") - } - } - test("temp view") { val viewName = "spark_28383" withTempView(viewName) { @@ -225,7 +211,7 @@ abstract class ShowCreateTableSuite extends QueryTest with SQLTestUtils { } } - private def checkCatalogTables(expected: CatalogTable, actual: CatalogTable): Unit = { + protected def checkCatalogTables(expected: CatalogTable, actual: CatalogTable): Unit = { def normalize(table: CatalogTable): CatalogTable = { val nondeterministicProps = Set( "CreateTime", diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveShowCreateTableSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveShowCreateTableSuite.scala index a551ab704b62a..e5d572c90af38 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveShowCreateTableSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveShowCreateTableSuite.scala @@ -18,8 +18,10 @@ package org.apache.spark.sql.hive import org.apache.spark.sql.{AnalysisException, ShowCreateTableSuite} +import org.apache.spark.sql.catalyst.TableIdentifier +import org.apache.spark.sql.catalyst.catalog.{CatalogStorageFormat, CatalogTable} import org.apache.spark.sql.hive.test.TestHiveSingleton -import org.apache.spark.sql.internal.SQLConf +import org.apache.spark.sql.internal.{HiveSerDe, SQLConf} class HiveShowCreateTableSuite extends ShowCreateTableSuite with TestHiveSingleton { @@ -38,6 +40,20 @@ class HiveShowCreateTableSuite extends ShowCreateTableSuite with TestHiveSinglet super.afterAll() } + test("view") { + withView("v1") { + sql("CREATE VIEW v1 AS SELECT 1 AS a") + checkCreateHiveTableOrView("v1", "VIEW") + } + } + + test("view with output columns") { + withView("v1") { + sql("CREATE VIEW v1 (b) AS SELECT 1 AS a") + checkCreateHiveTableOrView("v1", "VIEW") + } + } + test("simple hive table") { withTable("t1") { sql( @@ -52,7 +68,7 @@ class HiveShowCreateTableSuite extends ShowCreateTableSuite with TestHiveSinglet """.stripMargin ) - checkCreateTable("t1") + checkCreateHiveTableOrView("t1") } } @@ -72,7 +88,7 @@ class HiveShowCreateTableSuite extends ShowCreateTableSuite with TestHiveSinglet """.stripMargin ) - checkCreateTable("t1") + checkCreateHiveTableOrView("t1") } } } @@ -92,7 +108,7 @@ class HiveShowCreateTableSuite extends ShowCreateTableSuite with TestHiveSinglet """.stripMargin ) - checkCreateTable("t1") + checkCreateHiveTableOrView("t1") } } @@ -110,7 +126,7 @@ class HiveShowCreateTableSuite extends ShowCreateTableSuite with TestHiveSinglet """.stripMargin ) - checkCreateTable("t1") + checkCreateHiveTableOrView("t1") } } @@ -125,7 +141,7 @@ class HiveShowCreateTableSuite extends ShowCreateTableSuite with TestHiveSinglet """.stripMargin ) - checkCreateTable("t1") + checkCreateHiveTableOrView("t1") } } @@ -147,7 +163,7 @@ class HiveShowCreateTableSuite extends ShowCreateTableSuite with TestHiveSinglet """.stripMargin ) - checkCreateTable("t1") + checkCreateHiveTableOrView("t1") } } @@ -160,7 +176,7 @@ class HiveShowCreateTableSuite extends ShowCreateTableSuite with TestHiveSinglet |INTO 2 BUCKETS """.stripMargin ) - checkCreateTable("t1") + checkCreateHiveTableOrView("t1") } } @@ -188,18 +204,44 @@ class HiveShowCreateTableSuite extends ShowCreateTableSuite with TestHiveSinglet } assert(cause.getMessage.contains(" - partitioned view")) + + val causeForSpark = intercept[AnalysisException] { + sql("SHOW CREATE TABLE v1 AS SERDE") + } + + assert(causeForSpark.getMessage.contains(" - partitioned view")) } } } test("SPARK-24911: keep quotes for nested fields in hive") { withTable("t1") { - val createTable = "CREATE TABLE `t1`(`a` STRUCT<`b`: STRING>) USING hive" + val createTable = "CREATE TABLE `t1` (`a` STRUCT<`b`: STRING>) USING hive" sql(createTable) val shownDDL = getShowDDL("SHOW CREATE TABLE t1") assert(shownDDL == createTable.dropRight(" USING hive".length)) - checkCreateTable("t1") + checkCreateHiveTableOrView("t1") + } + } + + /** + * This method compares the given table with the table created by the DDL generated by + * `SHOW CREATE TABLE AS SERDE`. + */ + private def checkCreateHiveTableOrView(tableName: String, checkType: String = "TABLE"): Unit = { + val table = TableIdentifier(tableName, Some("default")) + val db = table.database.getOrElse("default") + val expected = spark.sharedState.externalCatalog.getTable(db, table.table) + val shownDDL = sql(s"SHOW CREATE TABLE ${table.quotedString} AS SERDE").head().getString(0) + sql(s"DROP $checkType ${table.quotedString}") + + try { + sql(shownDDL) + val actual = spark.sharedState.externalCatalog.getTable(db, table.table) + checkCatalogTables(expected, actual) + } finally { + sql(s"DROP $checkType IF EXISTS ${table.table}") } } @@ -207,4 +249,269 @@ class HiveShowCreateTableSuite extends ShowCreateTableSuite with TestHiveSinglet hiveContext.sharedState.externalCatalog.unwrapped.asInstanceOf[HiveExternalCatalog] .client.runSqlHive(ddl) } + + private def checkCreateSparkTableAsHive(tableName: String): Unit = { + val table = TableIdentifier(tableName, Some("default")) + val db = table.database.get + val hiveTable = spark.sharedState.externalCatalog.getTable(db, table.table) + val sparkDDL = sql(s"SHOW CREATE TABLE ${table.quotedString}").head().getString(0) + // Drops original Hive table. + sql(s"DROP TABLE ${table.quotedString}") + + try { + // Creates Spark datasource table using generated Spark DDL. + sql(sparkDDL) + val sparkTable = spark.sharedState.externalCatalog.getTable(db, table.table) + checkHiveCatalogTables(hiveTable, sparkTable) + } finally { + sql(s"DROP TABLE IF EXISTS ${table.table}") + } + } + + private def checkHiveCatalogTables(hiveTable: CatalogTable, sparkTable: CatalogTable): Unit = { + def normalize(table: CatalogTable): CatalogTable = { + val nondeterministicProps = Set( + "CreateTime", + "transient_lastDdlTime", + "grantTime", + "lastUpdateTime", + "last_modified_by", + "last_modified_time", + "Owner:", + // The following are hive specific schema parameters which we do not need to match exactly. + "totalNumberFiles", + "maxFileSize", + "minFileSize" + ) + + table.copy( + createTime = 0L, + lastAccessTime = 0L, + properties = table.properties.filterKeys(!nondeterministicProps.contains(_)), + stats = None, + ignoredProperties = Map.empty, + storage = table.storage.copy(properties = Map.empty), + provider = None, + tracksPartitionsInCatalog = false + ) + } + + def fillSerdeFromProvider(table: CatalogTable): CatalogTable = { + table.provider.flatMap(HiveSerDe.sourceToSerDe(_)).map { hiveSerde => + val newStorage = table.storage.copy( + inputFormat = hiveSerde.inputFormat, + outputFormat = hiveSerde.outputFormat, + serde = hiveSerde.serde + ) + table.copy(storage = newStorage) + }.getOrElse(table) + } + + assert(normalize(fillSerdeFromProvider(sparkTable)) == normalize(hiveTable)) + } + + test("simple hive table in Spark DDL") { + withTable("t1") { + sql( + s""" + |CREATE TABLE t1 ( + | c1 STRING COMMENT 'bla', + | c2 STRING + |) + |TBLPROPERTIES ( + | 'prop1' = 'value1', + | 'prop2' = 'value2' + |) + |STORED AS orc + """.stripMargin + ) + + checkCreateSparkTableAsHive("t1") + } + } + + test("show create table as serde can't work on data source table") { + withTable("t1") { + sql( + s""" + |CREATE TABLE t1 ( + | c1 STRING COMMENT 'bla', + | c2 STRING + |) + |USING orc + """.stripMargin + ) + + val cause = intercept[AnalysisException] { + checkCreateHiveTableOrView("t1") + } + + assert(cause.getMessage.contains("Use `SHOW CREATE TABLE` without `AS SERDE` instead")) + } + } + + test("simple external hive table in Spark DDL") { + withTempDir { dir => + withTable("t1") { + sql( + s""" + |CREATE TABLE t1 ( + | c1 STRING COMMENT 'bla', + | c2 STRING + |) + |LOCATION '${dir.toURI}' + |TBLPROPERTIES ( + | 'prop1' = 'value1', + | 'prop2' = 'value2' + |) + |STORED AS orc + """.stripMargin + ) + + checkCreateSparkTableAsHive("t1") + } + } + } + + test("hive table with STORED AS clause in Spark DDL") { + withTable("t1") { + sql( + s""" + |CREATE TABLE t1 ( + | c1 INT COMMENT 'bla', + | c2 STRING + |) + |STORED AS PARQUET + """.stripMargin + ) + + checkCreateSparkTableAsHive("t1") + } + } + + test("hive table with nested fields with STORED AS clause in Spark DDL") { + withTable("t1") { + sql( + s""" + |CREATE TABLE t1 ( + | c1 INT COMMENT 'bla', + | c2 STRING, + | c3 STRUCT + |) + |STORED AS PARQUET + """.stripMargin + ) + + checkCreateSparkTableAsHive("t1") + } + } + + test("hive table with unsupported fileformat in Spark DDL") { + withTable("t1") { + sql( + s""" + |CREATE TABLE t1 ( + | c1 INT COMMENT 'bla', + | c2 STRING + |) + |STORED AS RCFILE + """.stripMargin + ) + + val cause = intercept[AnalysisException] { + checkCreateSparkTableAsHive("t1") + } + + assert(cause.getMessage.contains("unsupported serde configuration")) + } + } + + test("hive table with serde info in Spark DDL") { + withTable("t1") { + sql( + s""" + |CREATE TABLE t1 ( + | c1 INT COMMENT 'bla', + | c2 STRING + |) + |ROW FORMAT SERDE 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' + |STORED AS + | INPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' + | OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' + """.stripMargin + ) + + checkCreateSparkTableAsHive("t1") + } + } + + test("hive view is not supported by show create table without as serde") { + withTable("t1") { + withView("v1") { + sql("CREATE TABLE t1 (c1 STRING, c2 STRING)") + + createRawHiveTable( + s""" + |CREATE VIEW v1 + |AS SELECT * from t1 + """.stripMargin + ) + + val cause = intercept[AnalysisException] { + sql("SHOW CREATE TABLE v1") + } + + assert(cause.getMessage.contains("view isn't supported")) + } + } + } + + test("partitioned, bucketed hive table in Spark DDL") { + withTable("t1") { + sql( + s""" + |CREATE TABLE t1 ( + | emp_id INT COMMENT 'employee id', emp_name STRING, + | emp_dob STRING COMMENT 'employee date of birth', emp_sex STRING COMMENT 'M/F' + |) + |COMMENT 'employee table' + |PARTITIONED BY ( + | emp_country STRING COMMENT '2-char code', emp_state STRING COMMENT '2-char code' + |) + |CLUSTERED BY (emp_sex) SORTED BY (emp_id ASC) INTO 10 BUCKETS + |STORED AS ORC + """.stripMargin + ) + + checkCreateSparkTableAsHive("t1") + } + } + + test("show create table for transactional hive table") { + withTable("t1") { + sql( + s""" + |CREATE TABLE t1 ( + | c1 STRING COMMENT 'bla', + | c2 STRING + |) + |TBLPROPERTIES ( + | 'transactional' = 'true', + | 'prop1' = 'value1', + | 'prop2' = 'value2' + |) + |CLUSTERED BY (c1) INTO 10 BUCKETS + |STORED AS ORC + """.stripMargin + ) + + + val cause = intercept[AnalysisException] { + sql("SHOW CREATE TABLE t1") + } + + assert(cause.getMessage.contains( + "SHOW CREATE TABLE doesn't support transactional Hive table")) + } + } }