@@ -29,28 +29,31 @@ Spark SQL supports integration of Hive UDFs, UDAFs and UDTFs. Similar to Spark U
2929<pre ><code >
3030// Register a Hive UDF and use it in Spark SQL
3131// Scala
32- sql(s"CREATE TEMPORARY FUNCTION testUDF AS 'org.apache.spark.sql.hive.execution.PairUDF'")
33- sql("SELECT testUDF(pair) FROM hiveUDFTestTable")
32+ // include the JAR file containing mytest.hiveUDF implementation
33+ sql("CREATE TEMPORARY FUNCTION testUDF AS 'mytest.hiveUDF'")
34+ sql("SELECT testUDF(value) FROM hiveUDFTestTable")
3435
3536// Register a Hive UDAF and use it in Spark SQL
3637// Scala
38+ // include the JAR file containing
39+ // org.apache.hadoop.hive.ql.udf.generic.GenericUDAFMax
3740sql(
3841 """
39- |CREATE TEMPORARY FUNCTION test_avg
40- |AS 'org.apache.hadoop.hive.ql.udf.generic.GenericUDAFAverage '
42+ |CREATE TEMPORARY FUNCTION hive_max
43+ |AS 'org.apache.hadoop.hive.ql.udf.generic.GenericUDAFMax '
4144 """.stripMargin)
42- sql("SELECT test_avg(1), test_avg(substr(value,5)) FROM src ")
45+ sql("SELECT key % 2, hive_max(key) FROM t GROUP BY key % 2 ")
4346
4447// Register a Hive UDTF and use it in Spark SQL
4548// Scala
4649// GenericUDTFCount2 outputs the number of rows seen, twice.
4750// The function source code can be found at:
4851// https://cwiki.apache.org/confluence/display/Hive/DeveloperGuide+UDTF
49- sql(s"ADD JAR ${hiveContext.getHiveFile("TestUDTF.jar").getCanonicalPath}")
52+ // include the JAR file containing GenericUDTFCount2 implementation
5053sql(
5154 """
52- |CREATE TEMPORARY FUNCTION udtf_count2
53- |AS 'org.apache.spark.sql.hive.execution.GenericUDTFCount2'
55+ |CREATE TEMPORARY FUNCTION udtf_count2
56+ |AS 'org.apache.spark.sql.hive.execution.GenericUDTFCount2'
5457 """.stripMargin)
5558sql("SELECT udtf_count2(a) FROM (SELECT 1 AS a)").show
5659
@@ -61,4 +64,4 @@ sql("SELECT udtf_count2(a) FROM (SELECT 1 AS a)").show
6164| 1|
6265+----+
6366
64- </code ></pre >
67+ </code ></pre >
0 commit comments