-
Notifications
You must be signed in to change notification settings - Fork 29k
[SPARK-27946][SQL] Hive DDL to Spark DDL conversion USING "show create table" #24938
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
7795355
b9dacc5
021e48b
a5e3a15
a909790
6fa7fab
5f92532
8dec28b
9228d6a
d444f29
bbe6fa7
4f33fd8
4dc4007
a4b0ce6
115dce3
88feda3
67cbea2
4311955
f886f04
9882932
27c76b3
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -44,7 +44,7 @@ import org.apache.spark.sql.execution.datasources.v2.csv.CSVDataSourceV2 | |
| import org.apache.spark.sql.execution.datasources.v2.json.JsonDataSourceV2 | ||
| import org.apache.spark.sql.execution.datasources.v2.orc.OrcDataSourceV2 | ||
| import org.apache.spark.sql.execution.datasources.v2.parquet.ParquetDataSourceV2 | ||
| import org.apache.spark.sql.internal.SQLConf | ||
| import org.apache.spark.sql.internal.{HiveSerDe, SQLConf} | ||
| import org.apache.spark.sql.types._ | ||
| import org.apache.spark.sql.util.SchemaUtils | ||
|
|
||
|
|
@@ -1002,7 +1002,57 @@ case class ShowPartitionsCommand( | |
| } | ||
| } | ||
|
|
||
| case class ShowCreateTableCommand(table: TableIdentifier) extends RunnableCommand { | ||
| /** | ||
| * Provides common utilities between `ShowCreateTableCommand` and `ShowCreateTableAsSparkCommand`. | ||
| */ | ||
| trait ShowCreateTableCommandBase { | ||
|
|
||
| protected val table: TableIdentifier | ||
|
|
||
| protected def showTableLocation(metadata: CatalogTable, builder: StringBuilder): Unit = { | ||
| if (metadata.tableType == EXTERNAL) { | ||
| metadata.storage.locationUri.foreach { location => | ||
| builder ++= s"LOCATION '${escapeSingleQuotedString(CatalogUtils.URIToString(location))}'\n" | ||
| } | ||
| } | ||
| } | ||
|
|
||
| protected def showTableComment(metadata: CatalogTable, builder: StringBuilder): Unit = { | ||
| metadata | ||
| .comment | ||
| .map("COMMENT '" + escapeSingleQuotedString(_) + "'\n") | ||
| .foreach(builder.append) | ||
| } | ||
|
|
||
| protected def showTableProperties(metadata: CatalogTable, builder: StringBuilder): Unit = { | ||
| if (metadata.properties.nonEmpty) { | ||
| val props = metadata.properties.map { case (key, value) => | ||
| s"'${escapeSingleQuotedString(key)}' = '${escapeSingleQuotedString(value)}'" | ||
| } | ||
|
|
||
| builder ++= "TBLPROPERTIES " | ||
| builder ++= concatByMultiLines(props) | ||
| } | ||
| } | ||
|
|
||
|
|
||
| protected def concatByMultiLines(iter: Iterable[String]): String = { | ||
| iter.mkString("(\n ", ",\n ", ")\n") | ||
| } | ||
| } | ||
|
|
||
| /** | ||
| * A command that shows the Spark DDL syntax that can be used to create a given table. | ||
| * For Hive serde table, this command will generate Spark DDL that can be used to | ||
| * create corresponding Spark table. | ||
| * | ||
| * The syntax of using this command in SQL is: | ||
| * {{{ | ||
| * SHOW CREATE TABLE [db_name.]table_name | ||
| * }}} | ||
| */ | ||
| case class ShowCreateTableCommand(table: TableIdentifier) | ||
| extends RunnableCommand with ShowCreateTableCommandBase { | ||
| override val output: Seq[Attribute] = Seq( | ||
| AttributeReference("createtab_stmt", StringType, nullable = false)() | ||
| ) | ||
|
|
@@ -1017,16 +1067,154 @@ case class ShowCreateTableCommand(table: TableIdentifier) extends RunnableComman | |
|
|
||
| // TODO: [SPARK-28692] unify this after we unify the | ||
| // CREATE TABLE syntax for hive serde and data source table. | ||
| val stmt = if (DDLUtils.isDatasourceTable(tableMetadata)) { | ||
| showCreateDataSourceTable(tableMetadata) | ||
| val metadata = if (DDLUtils.isDatasourceTable(tableMetadata)) { | ||
| tableMetadata | ||
| } else { | ||
| showCreateHiveTable(tableMetadata) | ||
| // For a Hive serde table, we try to convert it to Spark DDL. | ||
| if (tableMetadata.unsupportedFeatures.nonEmpty) { | ||
| throw new AnalysisException( | ||
| "Failed to execute SHOW CREATE TABLE against table " + | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This error message is not useful to users as they don't know what to do to make their query work again in 3.0. Can we follow https://github.com/apache/spark/pull/24938/files#diff-a53c8b7022d13417a2ef33372464f9b5R1210 and ask users to run
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. OK. It is too late now in my timezone. I will create a follow-up tomorrow.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. cool, thanks! |
||
| s"${tableMetadata.identifier}, which is created by Hive and uses the " + | ||
| "following unsupported feature(s)\n" + | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can we improve the exception message? Let end users know what are the new syntax for CREATE HIVE SERDE table.
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ok. Let me create a follow-up. |
||
| tableMetadata.unsupportedFeatures.map(" - " + _).mkString("\n") | ||
| ) | ||
| } | ||
|
|
||
| if (tableMetadata.tableType == VIEW) { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ditto |
||
| throw new AnalysisException("Hive view isn't supported by SHOW CREATE TABLE") | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can we just create Spark View?
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This requires more change other than simple message/doc change. Is this required to be in 3.0.0 too? If so, how much time we have? |
||
| } | ||
|
|
||
| if ("true".equalsIgnoreCase(tableMetadata.properties.getOrElse("transactional", "false"))) { | ||
| throw new AnalysisException( | ||
| "SHOW CREATE TABLE doesn't support transactional Hive table") | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The same here. Let end users know what are the workaround, i.e., new syntax. |
||
| } | ||
|
|
||
| convertTableMetadata(tableMetadata) | ||
| } | ||
|
|
||
| val stmt = showCreateDataSourceTable(metadata) | ||
|
|
||
| Seq(Row(stmt)) | ||
| } | ||
| } | ||
|
|
||
| private def convertTableMetadata(tableMetadata: CatalogTable): CatalogTable = { | ||
| val hiveSerde = HiveSerDe( | ||
| serde = tableMetadata.storage.serde, | ||
| inputFormat = tableMetadata.storage.inputFormat, | ||
| outputFormat = tableMetadata.storage.outputFormat) | ||
|
|
||
| // Looking for Spark data source that maps to to the Hive serde. | ||
| // TODO: some Hive fileformat + row serde might be mapped to Spark data source, e.g. CSV. | ||
| val source = HiveSerDe.serdeToSource(hiveSerde) | ||
| if (source.isEmpty) { | ||
| val builder = StringBuilder.newBuilder | ||
| hiveSerde.serde.foreach { serde => | ||
| builder ++= s" SERDE: $serde" | ||
| } | ||
| hiveSerde.inputFormat.foreach { format => | ||
| builder ++= s" INPUTFORMAT: $format" | ||
| } | ||
| hiveSerde.outputFormat.foreach { format => | ||
| builder ++= s" OUTPUTFORMAT: $format" | ||
| } | ||
| throw new AnalysisException( | ||
| "Failed to execute SHOW CREATE TABLE against table " + | ||
| s"${tableMetadata.identifier}, which is created by Hive and uses the " + | ||
| "following unsupported serde configuration\n" + | ||
| builder.toString() | ||
| ) | ||
| } else { | ||
| // TODO: should we keep Hive serde properties? | ||
| val newStorage = tableMetadata.storage.copy(properties = Map.empty) | ||
| tableMetadata.copy(provider = source, storage = newStorage) | ||
| } | ||
| } | ||
|
|
||
| private def showDataSourceTableDataColumns( | ||
| metadata: CatalogTable, builder: StringBuilder): Unit = { | ||
| val columns = metadata.schema.fields.map(_.toDDL) | ||
| builder ++= concatByMultiLines(columns) | ||
| } | ||
|
|
||
| private def showDataSourceTableOptions(metadata: CatalogTable, builder: StringBuilder): Unit = { | ||
| // For datasource table, there is a provider there in the metadata. | ||
| // If it is a Hive table, we already convert its metadata and fill in a provider. | ||
| builder ++= s"USING ${metadata.provider.get}\n" | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nit: it would be better to add comments or an assertion here to explain that the provider is always defined.
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ok. Added comments for that. |
||
|
|
||
| val dataSourceOptions = SQLConf.get.redactOptions(metadata.storage.properties).map { | ||
| case (key, value) => s"${quoteIdentifier(key)} '${escapeSingleQuotedString(value)}'" | ||
| } | ||
|
|
||
| if (dataSourceOptions.nonEmpty) { | ||
| builder ++= "OPTIONS " | ||
| builder ++= concatByMultiLines(dataSourceOptions) | ||
| } | ||
| } | ||
|
|
||
| private def showDataSourceTableNonDataColumns( | ||
| metadata: CatalogTable, builder: StringBuilder): Unit = { | ||
| val partCols = metadata.partitionColumnNames | ||
| if (partCols.nonEmpty) { | ||
| builder ++= s"PARTITIONED BY ${partCols.mkString("(", ", ", ")")}\n" | ||
| } | ||
|
|
||
| metadata.bucketSpec.foreach { spec => | ||
| if (spec.bucketColumnNames.nonEmpty) { | ||
| builder ++= s"CLUSTERED BY ${spec.bucketColumnNames.mkString("(", ", ", ")")}\n" | ||
|
|
||
| if (spec.sortColumnNames.nonEmpty) { | ||
| builder ++= s"SORTED BY ${spec.sortColumnNames.mkString("(", ", ", ")")}\n" | ||
| } | ||
|
|
||
| builder ++= s"INTO ${spec.numBuckets} BUCKETS\n" | ||
| } | ||
| } | ||
| } | ||
|
|
||
| private def showCreateDataSourceTable(metadata: CatalogTable): String = { | ||
| val builder = StringBuilder.newBuilder | ||
|
|
||
| builder ++= s"CREATE TABLE ${table.quotedString} " | ||
| showDataSourceTableDataColumns(metadata, builder) | ||
| showDataSourceTableOptions(metadata, builder) | ||
| showDataSourceTableNonDataColumns(metadata, builder) | ||
| showTableComment(metadata, builder) | ||
| showTableLocation(metadata, builder) | ||
| showTableProperties(metadata, builder) | ||
|
|
||
| builder.toString() | ||
| } | ||
| } | ||
|
|
||
| /** | ||
| * This commands generates the DDL for Hive serde table. | ||
| * | ||
| * The syntax of using this command in SQL is: | ||
| * {{{ | ||
| * SHOW CREATE TABLE table_identifier AS SERDE; | ||
| * }}} | ||
| */ | ||
| case class ShowCreateTableAsSerdeCommand(table: TableIdentifier) | ||
| extends RunnableCommand with ShowCreateTableCommandBase { | ||
| override val output: Seq[Attribute] = Seq( | ||
| AttributeReference("createtab_stmt", StringType, nullable = false)() | ||
| ) | ||
|
|
||
| override def run(sparkSession: SparkSession): Seq[Row] = { | ||
| val catalog = sparkSession.sessionState.catalog | ||
| val tableMetadata = catalog.getTableMetadata(table) | ||
|
|
||
| val stmt = if (DDLUtils.isDatasourceTable(tableMetadata)) { | ||
| throw new AnalysisException( | ||
| s"$table is a Spark data source table. Use `SHOW CREATE TABLE` without `AS SERDE` instead.") | ||
| } else { | ||
| showCreateHiveTable(tableMetadata) | ||
| } | ||
|
|
||
| Seq(Row(stmt)) | ||
| } | ||
|
|
||
| private def showCreateHiveTable(metadata: CatalogTable): String = { | ||
| def reportUnsupportedError(features: Seq[String]): Unit = { | ||
| throw new AnalysisException( | ||
|
|
@@ -1055,12 +1243,12 @@ case class ShowCreateTableCommand(table: TableIdentifier) extends RunnableComman | |
|
|
||
| if (metadata.tableType == VIEW) { | ||
| showViewDataColumns(metadata, builder) | ||
| showComment(metadata, builder) | ||
| showTableComment(metadata, builder) | ||
| showViewProperties(metadata, builder) | ||
| showViewText(metadata, builder) | ||
| } else { | ||
| showHiveTableHeader(metadata, builder) | ||
| showComment(metadata, builder) | ||
| showTableComment(metadata, builder) | ||
| showHiveTableNonDataColumns(metadata, builder) | ||
| showHiveTableStorageInfo(metadata, builder) | ||
| showTableLocation(metadata, builder) | ||
|
|
@@ -1084,10 +1272,6 @@ case class ShowCreateTableCommand(table: TableIdentifier) extends RunnableComman | |
| } | ||
| } | ||
|
|
||
| private def concatByMultiLines(iter: Iterable[String]): String = { | ||
| iter.mkString("(\n ", ",\n ", ")\n") | ||
| } | ||
|
|
||
| private def showViewProperties(metadata: CatalogTable, builder: StringBuilder): Unit = { | ||
| val viewProps = metadata.properties.filterKeys(!_.startsWith(CatalogTable.VIEW_PREFIX)) | ||
| if (viewProps.nonEmpty) { | ||
|
|
@@ -1136,7 +1320,7 @@ case class ShowCreateTableCommand(table: TableIdentifier) extends RunnableComman | |
| storage.serde.foreach { serde => | ||
| builder ++= s"ROW FORMAT SERDE '$serde'\n" | ||
|
|
||
| val serdeProps = metadata.storage.properties.map { | ||
| val serdeProps = SQLConf.get.redactOptions(metadata.storage.properties).map { | ||
| case (key, value) => | ||
| s"'${escapeSingleQuotedString(key)}' = '${escapeSingleQuotedString(value)}'" | ||
| } | ||
|
|
@@ -1156,81 +1340,4 @@ case class ShowCreateTableCommand(table: TableIdentifier) extends RunnableComman | |
| } | ||
| } | ||
| } | ||
|
|
||
| private def showTableLocation(metadata: CatalogTable, builder: StringBuilder): Unit = { | ||
| if (metadata.tableType == EXTERNAL) { | ||
| metadata.storage.locationUri.foreach { location => | ||
| builder ++= s"LOCATION '${escapeSingleQuotedString(CatalogUtils.URIToString(location))}'\n" | ||
| } | ||
| } | ||
| } | ||
|
|
||
| private def showComment(metadata: CatalogTable, builder: StringBuilder): Unit = { | ||
| metadata | ||
| .comment | ||
| .map("COMMENT '" + escapeSingleQuotedString(_) + "'\n") | ||
| .foreach(builder.append) | ||
| } | ||
|
|
||
| private def showTableProperties(metadata: CatalogTable, builder: StringBuilder): Unit = { | ||
| if (metadata.properties.nonEmpty) { | ||
| val props = metadata.properties.map { case (key, value) => | ||
| s"'${escapeSingleQuotedString(key)}' = '${escapeSingleQuotedString(value)}'" | ||
| } | ||
|
|
||
| builder ++= s"TBLPROPERTIES ${concatByMultiLines(props)}" | ||
| } | ||
| } | ||
|
|
||
| private def showCreateDataSourceTable(metadata: CatalogTable): String = { | ||
| val builder = StringBuilder.newBuilder | ||
|
|
||
| builder ++= s"CREATE TABLE ${table.quotedString} " | ||
| showDataSourceTableDataColumns(metadata, builder) | ||
| showDataSourceTableOptions(metadata, builder) | ||
| showDataSourceTableNonDataColumns(metadata, builder) | ||
| showComment(metadata, builder) | ||
| showTableLocation(metadata, builder) | ||
| showTableProperties(metadata, builder) | ||
|
|
||
| builder.toString() | ||
| } | ||
|
|
||
| private def showDataSourceTableDataColumns( | ||
| metadata: CatalogTable, builder: StringBuilder): Unit = { | ||
| val columns = metadata.schema.fields.map(_.toDDL) | ||
| builder ++= concatByMultiLines(columns) | ||
| } | ||
|
|
||
| private def showDataSourceTableOptions(metadata: CatalogTable, builder: StringBuilder): Unit = { | ||
| builder ++= s"USING ${metadata.provider.get}\n" | ||
|
|
||
| val dataSourceOptions = SQLConf.get.redactOptions(metadata.storage.properties).map { | ||
| case (key, value) => s"${quoteIdentifier(key)} '${escapeSingleQuotedString(value)}'" | ||
| } | ||
|
|
||
| if (dataSourceOptions.nonEmpty) { | ||
| builder ++= s"OPTIONS ${concatByMultiLines(dataSourceOptions)}" | ||
| } | ||
| } | ||
|
|
||
| private def showDataSourceTableNonDataColumns( | ||
| metadata: CatalogTable, builder: StringBuilder): Unit = { | ||
| val partCols = metadata.partitionColumnNames | ||
| if (partCols.nonEmpty) { | ||
| builder ++= s"PARTITIONED BY ${partCols.mkString("(", ", ", ")")}\n" | ||
| } | ||
|
|
||
| metadata.bucketSpec.foreach { spec => | ||
| if (spec.bucketColumnNames.nonEmpty) { | ||
| builder ++= s"CLUSTERED BY ${spec.bucketColumnNames.mkString("(", ", ", ")")}\n" | ||
|
|
||
| if (spec.sortColumnNames.nonEmpty) { | ||
| builder ++= s"SORTED BY ${spec.sortColumnNames.mkString("(", ", ", ")")}\n" | ||
| } | ||
|
|
||
| builder ++= s"INTO ${spec.numBuckets} BUCKETS\n" | ||
| } | ||
| } | ||
| } | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
For Hive DDL->For generating Hive DDL?