revert savamode check

windpiger · windpiger · commit a8dbccaff206 · 2017-03-01T10:49:07.000+08:00
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala
@@ -412,28 +412,15 @@ case class DataSource(
     //  2. Output path must be a legal HDFS style file system path;
     //  3. It's OK that the output path doesn't exist yet;
     val allPaths = paths ++ caseInsensitiveOptions.get("path")
-    val (outputPath, pathExists) = if (allPaths.length == 1) {
+    val outputPath = if (allPaths.length == 1) {
       val path = new Path(allPaths.head)
       val fs = path.getFileSystem(sparkSession.sessionState.newHadoopConf())
-      val qualifiedOutputPath = path.makeQualified(fs.getUri, fs.getWorkingDirectory)
-      (qualifiedOutputPath, fs.exists(qualifiedOutputPath))
+      path.makeQualified(fs.getUri, fs.getWorkingDirectory)
     } else {
       throw new IllegalArgumentException("Expected exactly one path to be specified, but " +
         s"got: ${allPaths.mkString(", ")}")
     }
 
-    if (pathExists) {
-      if (mode == SaveMode.ErrorIfExists) {
-        throw new AnalysisException(s"path $outputPath already exists.")
-      }
-      if (mode == SaveMode.Ignore) {
-        // Since the path already exists and the save mode is Ignore, we will just return.
-        return
-      }
-    }
-
-    // if path does not exist, the ErrorIfExists and Ignore can be transformed to Append
-    val transformedMode = if (mode != SaveMode.Overwrite) SaveMode.Append else mode
     val caseSensitive = sparkSession.sessionState.conf.caseSensitiveAnalysis
     PartitioningUtils.validatePartitionColumn(data.schema, partitionColumns, caseSensitive)
 
@@ -464,7 +451,7 @@ case class DataSource(
         fileFormat = format,
         options = options,
         query = data.logicalPlan,
-        mode = transformedMode,
+        mode = mode,
         catalogTable = catalogTable,
         fileIndex = fileIndex)
       sparkSession.sessionState.executePlan(plan).toRdd
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/InsertIntoHadoopFsRelationCommand.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/InsertIntoHadoopFsRelationCommand.scala
@@ -97,45 +97,63 @@ case class InsertIntoHadoopFsRelationCommand(
       outputPath = outputPath.toString,
       isAppend = isAppend)
 
-    if (mode == SaveMode.Overwrite) {
-      deleteMatchingPartitions(fs, qualifiedOutputPath, customPartitionLocations, committer)
+    val doInsertion = (mode, pathExists) match {
+      case (SaveMode.ErrorIfExists, true) =>
+        throw new AnalysisException(s"path $qualifiedOutputPath already exists.")
+      case (SaveMode.Overwrite, true) =>
+        deleteMatchingPartitions(fs, qualifiedOutputPath, customPartitionLocations, committer)
+        true
+      case (SaveMode.Append, _) | (SaveMode.Overwrite, _) | (SaveMode.ErrorIfExists, false) =>
+        true
+      case (SaveMode.Ignore, exists) =>
+        !exists
+      case (s, exists) =>
+        throw new IllegalStateException(s"unsupported save mode $s ($exists)")
     }
 
-    // Callback for updating metastore partition metadata after the insertion job completes.
-    def refreshPartitionsCallback(updatedPartitions: Seq[TablePartitionSpec]): Unit = {
-      if (partitionsTrackedByCatalog) {
-        val newPartitions = updatedPartitions.toSet -- initialMatchingPartitions
-        if (newPartitions.nonEmpty) {
-          AlterTableAddPartitionCommand(
-            catalogTable.get.identifier, newPartitions.toSeq.map(p => (p, None)),
-            ifNotExists = true).run(sparkSession)
-        }
-        if (mode == SaveMode.Overwrite) {
-          val deletedPartitions = initialMatchingPartitions.toSet -- updatedPartitions
-          if (deletedPartitions.nonEmpty) {
-            AlterTableDropPartitionCommand(
-              catalogTable.get.identifier, deletedPartitions.toSeq,
-              ifExists = true, purge = false,
-              retainData = true /* already deleted */).run(sparkSession)
+    if (doInsertion) {
+
+      // Callback for updating metastore partition metadata after the insertion job completes.
+      def refreshPartitionsCallback(updatedPartitions: Seq[TablePartitionSpec]): Unit = {
+        if (partitionsTrackedByCatalog) {
+          val newPartitions = updatedPartitions.toSet -- initialMatchingPartitions
+          if (newPartitions.nonEmpty) {
+            AlterTableAddPartitionCommand(
+              catalogTable.get.identifier, newPartitions.toSeq.map(p => (p, None)),
+              ifNotExists = true).run(sparkSession)
+          }
+          if (mode == SaveMode.Overwrite) {
+            val deletedPartitions = initialMatchingPartitions.toSet -- updatedPartitions
+            if (deletedPartitions.nonEmpty) {
+              AlterTableDropPartitionCommand(
+                catalogTable.get.identifier, deletedPartitions.toSeq,
+                ifExists = true, purge = false,
+                retainData = true /* already deleted */).run(sparkSession)
+            }
           }
         }
       }
-    }
 
-    FileFormatWriter.write(
-      sparkSession = sparkSession,
-      queryExecution = Dataset.ofRows(sparkSession, query).queryExecution,
-      fileFormat = fileFormat,
-      committer = committer,
-      outputSpec = FileFormatWriter.OutputSpec(
-        qualifiedOutputPath.toString, customPartitionLocations),
-      hadoopConf = hadoopConf,
-      partitionColumns = partitionColumns,
-      bucketSpec = bucketSpec,
-      refreshFunction = refreshPartitionsCallback,
-      options = options)
-
-    fileIndex.foreach(_.refresh())
+      FileFormatWriter.write(
+        sparkSession = sparkSession,
+        queryExecution = Dataset.ofRows(sparkSession, query).queryExecution,
+        fileFormat = fileFormat,
+        committer = committer,
+        outputSpec = FileFormatWriter.OutputSpec(
+          qualifiedOutputPath.toString, customPartitionLocations),
+        hadoopConf = hadoopConf,
+        partitionColumns = partitionColumns,
+        bucketSpec = bucketSpec,
+        refreshFunction = refreshPartitionsCallback,
+        options = options)
+
+      // refresh cached files in FileIndex
+      fileIndex.foreach(_.refresh())
+      // refresh data cache if table is cached
+      sparkSession.catalog.refreshByPath(outputPath.toString)
+    } else {
+      logInfo("Skipping insertion into a relation that already exists.")
+    }
 
     Seq.empty[Row]
   }