Move tests to PythonUDFSuite.

viirya · viirya · commit 33a5e0d9cf34 · 2019-07-22T11:26:35.000+08:00
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
@@ -2189,50 +2189,4 @@ class DataFrameSuite extends QueryTest with SharedSQLContext {
           |*(1) Range (0, 10, step=1, splits=2)""".stripMargin))
     }
   }
-
-  test("SPARK-28445: PythonUDF in grouping key and aggregate expressions") {
-    import IntegratedUDFTestUtils._
-
-    val scalaTestUDF = TestScalaUDF(name = "scalaUDF")
-    val pythonTestUDF = TestPythonUDF(name = "pyUDF")
-    assume(shouldTestPythonUDFs)
-
-    withTempView("testData") {
-      sql(
-        """CREATE OR REPLACE TEMPORARY VIEW testData AS
-          |SELECT * FROM VALUES
-          |(1, 1), (1, 2), (2, 1), (2, 2), (3, 1), (3, 2), (null, 1), (3, null), (null, null)
-          |AS testData(a, b)""".stripMargin)
-
-      val base = spark.table("testData")
-
-      val df = base.groupBy(scalaTestUDF(base("a") + 1))
-        .agg(scalaTestUDF(base("a") + 1), scalaTestUDF(count(base("b"))))
-      val df2 = base.groupBy(pythonTestUDF(base("a") + 1))
-        .agg(pythonTestUDF(base("a") + 1), pythonTestUDF(count(base("b"))))
-      checkAnswer(df, df2)
-
-      val df3 = base.groupBy(scalaTestUDF(base("a") + 1))
-        .agg(scalaTestUDF(base("a") + 1) + 1, scalaTestUDF(count(base("b"))))
-      val df4 = base.groupBy(pythonTestUDF(base("a") + 1))
-        .agg(pythonTestUDF(base("a") + 1) + 1, pythonTestUDF(count(base("b"))))
-      checkAnswer(df3, df4)
-
-      // PythonUDF in aggregate expression has grouping key in its arguments.
-      val df5 = base.groupBy(scalaTestUDF(base("a") + 1))
-        .agg(scalaTestUDF(scalaTestUDF(base("a") + 1)), scalaTestUDF(count(base("b"))))
-      val df6 = base.groupBy(pythonTestUDF(base("a") + 1))
-        .agg(pythonTestUDF(pythonTestUDF(base("a") + 1)), pythonTestUDF(count(base("b"))))
-      checkAnswer(df5, df6)
-
-      // PythonUDF over grouping key is argument to aggregate function.
-      val df7 = base.groupBy(scalaTestUDF(base("a") + 1))
-        .agg(scalaTestUDF(scalaTestUDF(base("a") + 1)),
-          scalaTestUDF(count(scalaTestUDF(base("a") + 1))))
-      val df8 = base.groupBy(pythonTestUDF(base("a") + 1))
-        .agg(pythonTestUDF(pythonTestUDF(base("a") + 1)),
-          pythonTestUDF(count(pythonTestUDF(base("a") + 1))))
-      checkAnswer(df7, df8)
-    }
-  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/python/PythonUDFSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/python/PythonUDFSuite.scala
@@ -0,0 +1,67 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.python
+
+import org.apache.spark.sql.{IntegratedUDFTestUtils, QueryTest}
+import org.apache.spark.sql.functions.count
+import org.apache.spark.sql.test.SharedSQLContext
+
+class PythonUDFSuite extends QueryTest with SharedSQLContext {
+  import testImplicits._
+
+  test("SPARK-28445: PythonUDF in grouping key and aggregate expressions") {
+    import IntegratedUDFTestUtils._
+
+    val scalaTestUDF = TestScalaUDF(name = "scalaUDF")
+    val pythonTestUDF = TestPythonUDF(name = "pyUDF")
+    assume(shouldTestPythonUDFs)
+
+    val base = Seq(
+      (Some(1), Some(1)), (Some(1), Some(2)), (Some(2), Some(1)),
+      (Some(2), Some(2)), (Some(3), Some(1)), (Some(3), Some(2)),
+      (None, Some(1)), (Some(3), None), (None, None)).toDF("a", "b")
+
+    val df = base.groupBy(scalaTestUDF(base("a") + 1))
+      .agg(scalaTestUDF(base("a") + 1), scalaTestUDF(count(base("b"))))
+    val df2 = base.groupBy(pythonTestUDF(base("a") + 1))
+      .agg(pythonTestUDF(base("a") + 1), pythonTestUDF(count(base("b"))))
+    checkAnswer(df, df2)
+
+    val df3 = base.groupBy(scalaTestUDF(base("a") + 1))
+      .agg(scalaTestUDF(base("a") + 1) + 1, scalaTestUDF(count(base("b"))))
+    val df4 = base.groupBy(pythonTestUDF(base("a") + 1))
+      .agg(pythonTestUDF(base("a") + 1) + 1, pythonTestUDF(count(base("b"))))
+    checkAnswer(df3, df4)
+
+    // PythonUDF in aggregate expression has grouping key in its arguments.
+    val df5 = base.groupBy(scalaTestUDF(base("a") + 1))
+      .agg(scalaTestUDF(scalaTestUDF(base("a") + 1)), scalaTestUDF(count(base("b"))))
+    val df6 = base.groupBy(pythonTestUDF(base("a") + 1))
+      .agg(pythonTestUDF(pythonTestUDF(base("a") + 1)), pythonTestUDF(count(base("b"))))
+    checkAnswer(df5, df6)
+
+    // PythonUDF over grouping key is argument to aggregate function.
+    val df7 = base.groupBy(scalaTestUDF(base("a") + 1))
+      .agg(scalaTestUDF(scalaTestUDF(base("a") + 1)),
+        scalaTestUDF(count(scalaTestUDF(base("a") + 1))))
+    val df8 = base.groupBy(pythonTestUDF(base("a") + 1))
+      .agg(pythonTestUDF(pythonTestUDF(base("a") + 1)),
+        pythonTestUDF(count(pythonTestUDF(base("a") + 1))))
+    checkAnswer(df7, df8)
+  }
+}