diff --git a/README.md b/README.md index 3f2794b9..f54416f0 100644 --- a/README.md +++ b/README.md @@ -28,6 +28,7 @@ We have opened a Spark Project Improvement Proposal: [Kotlin support for Apache - [Overload Resolution Ambiguity](#overload-resolution-ambiguity) - [Tuples](#tuples) - [Streaming](#streaming) + - [User Defined Functions](#user-defined-functions) - [Examples](#examples) - [Reporting issues/Support](#reporting-issuessupport) - [Code of Conduct](#code-of-conduct) @@ -275,6 +276,48 @@ withSparkStreaming(batchDuration = Durations.seconds(1), timeout = 10_000) { // For more information, check the [wiki](https://github.com/JetBrains/kotlin-spark-api/wiki/Streaming). +### User Defined Functions + +Spark has a way to call functions from SQL using so-called [UDFs](https://spark.apache.org/docs/latest/sql-ref-functions-udf-scalar.html). +Using the Scala/Java API from Kotlin is not that obvious, so we decided to add special UDF support for Kotlin. +This support grew into a typesafe, name-safe, and feature-rich solution for which we will give an example: +```kotlin +// example of creation/naming, and registering of a simple UDF +val plusOne by udf { x: Int -> x + 1 } +plusOne.register() +spark.sql("SELECT plusOne(5)").show() +// +----------+ +// |plusOne(5)| +// +----------+ +// | 6| +// +----------+ + +// directly registering +udf.register("plusTwo") { x: Double -> x + 2.0 } +spark.sql("SELECT plusTwo(2.0d)").show() +// +------------+ +// |plusTwo(2.0)| +// +------------+ +// | 4.0| +// +------------+ + +// dataset select +val result: Dataset = myDs.select( + plusOne(col(MyType::age)) +) +``` + +We support: + - a notation close to Spark's + - smart naming (with reflection) + - creation from function references + - typed column operations + - UDAF support and functional creation + - (Unique!) simple vararg UDF support + +For more, check the [extensive examples](examples/src/main/kotlin/org/jetbrains/kotlinx/spark/examples/UDFs.kt). +Also, check out the [wiki](https://github.com/Kotlin/kotlin-spark-api/wiki/UDF). + ## Examples For more, check out [examples](examples/src/main/kotlin/org/jetbrains/kotlinx/spark/examples) module. diff --git a/core/3.2/src/main/scala/org/jetbrains/kotlinx/spark/extensions/VarargUnwrapper.scala b/core/3.2/src/main/scala/org/jetbrains/kotlinx/spark/extensions/VarargUnwrapper.scala new file mode 100644 index 00000000..27f317a4 --- /dev/null +++ b/core/3.2/src/main/scala/org/jetbrains/kotlinx/spark/extensions/VarargUnwrapper.scala @@ -0,0 +1,93 @@ +package org.jetbrains.kotlinx.spark.extensions + +import org.apache.spark.sql.api.java.{UDF1, UDF2} + +/** + * Allows any simple vararg function reference to be treated as 23 different Scala functions. + * Used to make vararg UDFs for `ScalaUDF`. + * + * @param varargFunc + * @param newArray + * @tparam T + * @tparam Array + * @tparam R + */ +class VarargUnwrapper[T, Array, R]( + val varargFunc: UDF1[Array, R], + val newArray: UDF2[Integer, UDF1[Integer, T], Array], +) extends Serializable + with Function0[R] + with Function1[T, R] + with Function2[T, T, R] + with Function3[T, T, T, R] + with Function4[T, T, T, T, R] + with Function5[T, T, T, T, T, R] + with Function6[T, T, T, T, T, T, R] + with Function7[T, T, T, T, T, T, T, R] + with Function8[T, T, T, T, T, T, T, T, R] + with Function9[T, T, T, T, T, T, T, T, T, R] + with Function10[T, T, T, T, T, T, T, T, T, T, R] + with Function11[T, T, T, T, T, T, T, T, T, T, T, R] + with Function12[T, T, T, T, T, T, T, T, T, T, T, T, R] + with Function13[T, T, T, T, T, T, T, T, T, T, T, T, T, R] + with Function14[T, T, T, T, T, T, T, T, T, T, T, T, T, T, R] + with Function15[T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, R] + with Function16[T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, R] + with Function17[T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, R] + with Function18[T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, R] + with Function19[T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, R] + with Function20[T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, R] + with Function21[T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, R] + with Function22[T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, R] { + + private def vararg(t: T*): R = varargFunc.call(newArray.call(t.size, { t(_) })) + + override def curried: Nothing = throw new UnsupportedOperationException() + override def tupled: Nothing = throw new UnsupportedOperationException() + + override def apply(): R = vararg() + + override def apply(v0: T): R = vararg(v0) + + override def apply(v0: T, v1: T): R = vararg(v0, v1) + + override def apply(v0: T, v1: T, v2: T): R = vararg(v0, v1, v2) + + override def apply(v0: T, v1: T, v2: T, v3: T): R = vararg(v0, v1, v2, v3) + + override def apply(v0: T, v1: T, v2: T, v3: T, v4: T): R = vararg(v0, v1, v2, v3, v4) + + override def apply(v0: T, v1: T, v2: T, v3: T, v4: T, v5: T): R = vararg(v0, v1, v2, v3, v4, v5) + + override def apply(v0: T, v1: T, v2: T, v3: T, v4: T, v5: T, v6: T): R = vararg(v0, v1, v2, v3, v4, v5, v6) + + override def apply(v0: T, v1: T, v2: T, v3: T, v4: T, v5: T, v6: T, v7: T): R = vararg(v0, v1, v2, v3, v4, v5, v6, v7) + + override def apply(v0: T, v1: T, v2: T, v3: T, v4: T, v5: T, v6: T, v7: T, v8: T): R = vararg(v0, v1, v2, v3, v4, v5, v6, v7, v8) + + override def apply(v0: T, v1: T, v2: T, v3: T, v4: T, v5: T, v6: T, v7: T, v8: T, v9: T): R = vararg(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9) + + override def apply(v0: T, v1: T, v2: T, v3: T, v4: T, v5: T, v6: T, v7: T, v8: T, v9: T, v10: T): R = vararg(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10) + + override def apply(v0: T, v1: T, v2: T, v3: T, v4: T, v5: T, v6: T, v7: T, v8: T, v9: T, v10: T, v11: T): R = vararg(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11) + + override def apply(v0: T, v1: T, v2: T, v3: T, v4: T, v5: T, v6: T, v7: T, v8: T, v9: T, v10: T, v11: T, v12: T): R = vararg(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12) + + override def apply(v0: T, v1: T, v2: T, v3: T, v4: T, v5: T, v6: T, v7: T, v8: T, v9: T, v10: T, v11: T, v12: T, v13: T): R = vararg(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13) + + override def apply(v0: T, v1: T, v2: T, v3: T, v4: T, v5: T, v6: T, v7: T, v8: T, v9: T, v10: T, v11: T, v12: T, v13: T, v14: T): R = vararg(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14) + + override def apply(v0: T, v1: T, v2: T, v3: T, v4: T, v5: T, v6: T, v7: T, v8: T, v9: T, v10: T, v11: T, v12: T, v13: T, v14: T, v15: T): R = vararg(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15) + + override def apply(v0: T, v1: T, v2: T, v3: T, v4: T, v5: T, v6: T, v7: T, v8: T, v9: T, v10: T, v11: T, v12: T, v13: T, v14: T, v15: T, v16: T): R = vararg(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16) + + override def apply(v0: T, v1: T, v2: T, v3: T, v4: T, v5: T, v6: T, v7: T, v8: T, v9: T, v10: T, v11: T, v12: T, v13: T, v14: T, v15: T, v16: T, v17: T): R = vararg(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17) + + override def apply(v0: T, v1: T, v2: T, v3: T, v4: T, v5: T, v6: T, v7: T, v8: T, v9: T, v10: T, v11: T, v12: T, v13: T, v14: T, v15: T, v16: T, v17: T, v18: T): R = vararg(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18) + + override def apply(v0: T, v1: T, v2: T, v3: T, v4: T, v5: T, v6: T, v7: T, v8: T, v9: T, v10: T, v11: T, v12: T, v13: T, v14: T, v15: T, v16: T, v17: T, v18: T, v19: T): R = vararg(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19) + + override def apply(v0: T, v1: T, v2: T, v3: T, v4: T, v5: T, v6: T, v7: T, v8: T, v9: T, v10: T, v11: T, v12: T, v13: T, v14: T, v15: T, v16: T, v17: T, v18: T, v19: T, v20: T): R = vararg(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20) + + override def apply(v0: T, v1: T, v2: T, v3: T, v4: T, v5: T, v6: T, v7: T, v8: T, v9: T, v10: T, v11: T, v12: T, v13: T, v14: T, v15: T, v16: T, v17: T, v18: T, v19: T, v20: T, v21: T): R = vararg(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21) +} diff --git a/examples/src/main/kotlin/org/jetbrains/kotlinx/spark/examples/Main.kt b/examples/src/main/kotlin/org/jetbrains/kotlinx/spark/examples/Main.kt index fc0a2888..cf0627c5 100644 --- a/examples/src/main/kotlin/org/jetbrains/kotlinx/spark/examples/Main.kt +++ b/examples/src/main/kotlin/org/jetbrains/kotlinx/spark/examples/Main.kt @@ -61,7 +61,7 @@ object Main { triples .leftJoin( right = pairs, - col = triples("_1").multiply(2) eq pairs("_1"), + col = triples.col("_1").multiply(2) eq pairs.col("_1"), ) // .also { it.printSchema() } .map { (triple, pair) -> Five(triple._1, triple._2, triple._3, pair?._1, pair?._2) } diff --git a/examples/src/main/kotlin/org/jetbrains/kotlinx/spark/examples/UDFs.kt b/examples/src/main/kotlin/org/jetbrains/kotlinx/spark/examples/UDFs.kt new file mode 100644 index 00000000..ac96dd44 --- /dev/null +++ b/examples/src/main/kotlin/org/jetbrains/kotlinx/spark/examples/UDFs.kt @@ -0,0 +1,450 @@ +/*- + * =LICENSE= + * Kotlin Spark API: Examples for Spark 3.2+ (Scala 2.12) + * ---------- + * Copyright (C) 2019 - 2022 JetBrains + * ---------- + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * =LICENSEEND= + */ +package org.jetbrains.kotlinx.spark.examples + +import org.apache.spark.sql.* +import org.apache.spark.sql.expressions.Aggregator +import org.apache.spark.sql.functions.* +import org.jetbrains.kotlinx.spark.api.* +import org.jetbrains.kotlinx.spark.api.tuples.* +import scala.Tuple2 +import scala.collection.mutable.WrappedArray + + +fun main() { + sparkExample() + smartNames() + functionToUDF() + strongTypingInDatasets() + UDAF() + varargUDFs() +} + + +/** + * https://spark.apache.org/docs/latest/sql-ref-functions-udf-scalar.html + * adapted directly for Kotlin: + * */ +private fun sparkExample(): Unit = withSpark { + + // Define and register a zero-argument non-deterministic UDF + // UDF is deterministic by default, i.e. produces the same result for the same input. + val random = udf(nondeterministic = true) { -> Math.random() } + udf.register("random", random) + spark.sql("SELECT random()").show() + // +--------+ + // |random()| + // +--------+ + // |xxxxxxxx| + // +--------+ + + // Define and register a one-argument UDF + val plusOne = udf { x: Int -> x + 1 } + udf.register("plusOne", plusOne) + spark.sql("SELECT plusOne(5)").show() + // +----------+ + // |plusOne(5)| + // +----------+ + // | 6| + // +----------+ + + // Define a two-argument UDF and register it with Spark in one step + udf.register("strLenKotlin") { str: String, int: Int -> str.length + int } + spark.sql("SELECT strLenKotlin('test', 1)").show() + // +---------------------+ + // |strLenKotlin(test, 1)| + // +---------------------+ + // | 5| + // +---------------------+ + + // UDF in a WHERE clause + udf.register("oneArgFilter") { n: Long -> n > 5 } + spark.range(1, 10).createOrReplaceTempView("test") + spark.sql("SELECT * FROM test WHERE oneArgFilter(id)").show() + // +---+ + // | id| + // +---+ + // | 6| + // | 7| + // | 8| + // | 9| + // +---+ +} + +/** + * Shows how Kotlin's UDF wrappers can carry a name which saves you time and errors. + */ +private fun smartNames(): Unit = withSpark { + + // remember the plusOne function from sparkExample? + val plusOne = udf { x: Int -> x + 1 } + udf.register("plusOne", plusOne) + spark.sql("SELECT plusOne(5)").show() + + + // As you can see, there is just too many places where "plusOne" is written and we developers are lazy + // So, the Kotlin spark api introduces NamedUserDefinedFunctions! + // The register call will no longer require a name and the name of the named udf will simply be used + val plusOneNamed = udf("plusOneNamed") { x: Int -> x + 1 } + udf.register(plusOneNamed) + spark.sql("SELECT plusOneNamed(5)").show() + + + // You can still supply a name at any moment to replace it + udf.register("plusOneNamed1", plusOneNamed) + udf.register(plusOneNamed.withName("plusOneNamed2")) + + + // Finally, we can even use some Kotlin reflection magic to achieve the following + // (myUdf.register() does the same as udf.register(), just a tiny bit shorter) + val plusOneFinal by udf { x: Int -> x + 1 } + plusOneFinal.register() + spark.sql("SELECT plusOneFinal(5)").show() +// +---------------+ +// |plusOneFinal(5)| +// +---------------+ +// | 6| +// +---------------+ + +} + + +/** + * Shows how UDFs can be created from normal functions as well. + */ +private fun functionToUDF(): Unit = withSpark { + + // Say we want to convert a normal readable function to a UDF + fun plusOne(x: Int) = x + 1 + + // We can use reflection for that! And as you can see, we get a named udf as well + val plusOneUDF: NamedUserDefinedFunction1 = udf(::plusOne) + + // This means we can even create and register this udf without any name explicitly supplied + // in a single line! + udf.register(::plusOne) + spark.sql("SELECT plusOne(5)").show() +// +----------+ +// |plusOne(5)| +// +----------+ +// | 6| +// +----------+ + + // It also works for functions as lambda values: + val minusOneUDF: NamedUserDefinedFunction1 = udf(::minusOne) + + // And as usual, you define a new name if you like: + udf("newName", ::minusOne) +} + +private val minusOne = { x: Int -> x - 1 } + +/** + * Shows how UDFs in Kotlin carry typing information, which allows you to do + * typesafe column operations with them. + */ +private fun strongTypingInDatasets() = withSpark { + data class User(val name: String, val age: Int?) + val ds: Dataset = dsOf( + User("A", null), + User("B", 23), + User("C", 60), + User("D", 14), + ).showDS() +// +----+----+ +// |name| age| +// +----+----+ +// | A|null| +// | B| 23| +// | C| 60| +// | D| 14| +// +----+----+ + + + // UDFs can also be used, no registering needed, to perform operations on columns + // using the Dataset API. UDFs are not as optimized as other Spark functions in terms of + // raw performance, however, in return you get infinitely more versatility. + // UDFs are usually executed using the [apply] method present in them, but, + // of course, we had to Kotlin-ify those too, which means you can do: + val replaceMissingAge = udf { age: Int?, value: Int -> age ?: value } + + val result1: Dataset> = ds.select( + col(User::name), replaceMissingAge(col(User::age), typedLit(-1)) + ).showDS() +// +----+------------+ +// |name|UDF(age, -1)| +// +----+------------+ +// | A| -1| +// | B| 23| +// | C| 60| +// | D| 14| +// +----+------------+ + + // As you can see, the resulting dataset type is Tuple2 + // This is possible since we know what types go in and our of the replaceMissingAge udf. + // We can thus provide TypedColumns instead of normal ones which the select function takes + // advantage of! + + // NOTE: In UDFs, iterables, lists, arrays and such need to be represented as WrappedArray + val toJson by udf { age: Int, name: String, pets: WrappedArray -> + """{ "age" : $age, "name" : "$name", "pets" : [${pets.asKotlinIterable().joinToString { "\"$it\"" }}] }""" + } + + // Also when you are using Dataframes (untyped Datasets), you can still provide type hints for columns manually + // if you want to receive type hints after calling the UDF + val df: Dataset = dfOf( + colNames = arrayOf("name", "age", "pets"), + t("Alice", 12, emptyList()), + t("Bob", 24, listOf("Dog", "Cat")), + t("Charlie", 18, listOf("Bunny")), + ).showDS() +// +-------+---+----------+ +// | name|age| pets| +// +-------+---+----------+ +// | Alice| 12| []| +// | Bob| 24|[Dog, Cat]| +// |Charlie| 18| [Bunny]| +// +-------+---+----------+ + + val result2 = df.select( + toJson( + col<_, Int>("age"), + col<_, String>("name"), + col>("pets").asSeq(), +// or `col<_, WrappedArray>("pets")` if you want to be less strict + ) + ).showDS(truncate = false) +// +-------------------------------------------------------+ +// |toJson(age, name, pets) | +// +-------------------------------------------------------+ +// |{ "age" : 12, "name" : "Alice", "pets" : [] } | +// |{ "age" : 24, "name" : "Bob", "pets" : ["Dog", "Cat"] }| +// |{ "age" : 18, "name" : "Charlie", "pets" : ["Bunny"] } | +// +-------------------------------------------------------+ +} + +data class Employee(val name: String, val salary: Long) +data class Average(var sum: Long, var count: Long) + +private object MyAverage : Aggregator() { + // A zero value for this aggregation. Should satisfy the property that any b + zero = b + + override fun zero(): Average = Average(0L, 0L) + + // Combine two values to produce a new value. For performance, the function may modify `buffer` + // and return it instead of constructing a new object + override fun reduce(buffer: Average, employee: Employee): Average { + buffer.sum += employee.salary + buffer.count += 1L + return buffer + } + + // Merge two intermediate values + override fun merge(b1: Average, b2: Average): Average { + b1.sum += b2.sum + b1.count += b2.count + return b1 + } + + // Transform the output of the reduction + override fun finish(reduction: Average): Double = reduction.sum.toDouble() / reduction.count + + // Specifies the Encoder for the intermediate value type + override fun bufferEncoder(): Encoder = encoder() + + // Specifies the Encoder for the final output value type + override fun outputEncoder(): Encoder = encoder() + +} + +/** + * Shows how UDAFs can be used from Kotlin. + */ +private fun UDAF() = withSpark { + // First let's go over the example from Spark for User defined aggregate functions: + // https://spark.apache.org/docs/latest/sql-ref-functions-udf-aggregate.html + // See above for Employee, Average, and MyAverage + + val ds: Dataset = dsOf( + Employee("Michael", 3000), + Employee("Andy", 4500), + Employee("Justin", 3500), + Employee("Berta", 4000), + ).showDS() +// +-------+------+ +// | name|salary| +// +-------+------+ +// |Michael| 3000| +// | Andy| 4500| +// | Justin| 3500| +// | Berta| 4000| +// +-------+------+ + + // Convert the function to a `TypedColumn` and give it a name + val averageSalary: TypedColumn = MyAverage.toColumn().name("average_salary") + val result1: Dataset = ds.select(averageSalary) + .showDS() +// +--------------+ +// |average_salary| +// +--------------+ +// | 3750.0| +// +--------------+ + + // While this method can work on all columns of a Dataset, if we want to be able + // to select the columns specifically, we need to convert MyAverage to a UDAF + // Let's first create a new one with Long as input: + val myAverage = aggregatorOf( + zero = { Average(0L, 0L) }, + reduce = { buffer, it -> + buffer.sum += it + buffer.count += 1 + buffer + }, + merge = { buffer, it -> + buffer.sum += it.sum + buffer.count += it.count + buffer + }, + finish = { it.sum.toDouble() / it.count }, + ) + + // Now we need to define a name, otherwise it will default to "Aggregator", since that's + // the name of the class `aggregatorOf` will implement and return. + // We can register it again for SQL or call it directly in Dataset select + val myAverageUdf = udaf("myAverage", myAverage).register() + + ds.createOrReplaceTempView("employees") + spark.sql("""SELECT myAverage(salary) as average_salary from employees""") + .showDS() +// +--------------+ +// |average_salary| +// +--------------+ +// | 3750.0| +// +--------------+ + + val result2: Dataset = ds.select( + myAverageUdf( + col(Employee::salary) + ).name("average_salary") + ).showDS() +// +--------------+ +// |average_salary| +// +--------------+ +// | 3750.0| +// +--------------+ + + // Finally, if you don't need an aggregator directly but just a udaf, you can use something like this: + val udaf: UserDefinedFunction1 = udaf( + zero = { Average(0L, 0L) }, + reduce = { buffer, it -> + buffer.sum += it + buffer.count += 1 + buffer + }, + merge = { buffer, it -> + buffer.sum += it.sum + buffer.count += it.count + buffer + }, + finish = { it.sum.toDouble() / it.count }, + ) + + // Or you can even register it right away (note a name is required) + val registeredUdaf: NamedUserDefinedFunction1 = udf.register( + name = "average", + zero = { Average(0L, 0L) }, + reduce = { buffer, it -> + buffer.sum += it + buffer.count += 1 + buffer + }, + merge = { buffer, it -> + buffer.sum += it.sum + buffer.count += it.count + buffer + }, + finish = { it.sum.toDouble() / it.count }, + ) +} + +/** + * Shows the new and unique vararg UDFs the Kotlin Spark API has to offer and how to use them. + */ +private fun varargUDFs() = withSpark { + // Finally, let's go over something unique to the Kotlin version of the Spark API: Simple Vararg UDFs + + // Wouldn't it be nice to convert a function like this into a UDF you can call with any number of columns? + fun sumOf(vararg double: Double): Double = double.sum() + // Well, why don't we try ;) + + // As you can see, we get a `NamedUserDefinedFunctionVararg` + val sumUDF = udf.register(::sumOf) + + data class Values(val v1: Double, val v2: Double, val v3: Double, val v4: Double) + val ds = dsOf( + Values(1.0, 2.0, 3.0, 4.0), + Values(4.0, 3.0, 2.0, 1.0), + Values(1.0, 1.0, 1.0, 1.0), + ).showDS() +// +---+---+---+---+ +// | v1| v2| v3| v4| +// +---+---+---+---+ +// |1.0|2.0|3.0|4.0| +// |4.0|3.0|2.0|1.0| +// |1.0|1.0|1.0|1.0| +// +---+---+---+---+ + + ds.createOrReplaceTempView("values") + spark.sql("""SELECT sumOf(v1, v4), sumOf(), sumOf(v1, v2, v3, v4) FROM values""") + .showDS() +// +-------------+-------+---------------------+ +// |sumOf(v1, v4)|sumOf()|sumOf(v1, v2, v3, v4)| +// +-------------+-------+---------------------+ +// | 5.0| 0.0| 10.0| +// | 5.0| 0.0| 10.0| +// | 2.0| 0.0| 4.0| +// +-------------+-------+---------------------+ + + val result = ds.select( + sumUDF(col(Values::v1), col(Values::v4)), + sumUDF(), + sumUDF(col(Values::v1), col(Values::v2), col(Values::v3), col(Values::v4)), + ).showDS() +// +-------------+-------+---------------------+ +// |sumOf(v1, v4)|sumOf()|sumOf(v1, v2, v3, v4)| +// +-------------+-------+---------------------+ +// | 5.0| 0.0| 10.0| +// | 5.0| 0.0| 10.0| +// | 2.0| 0.0| 4.0| +// +-------------+-------+---------------------+ + + + // As you can see, it just works :), up to 22 parameters! + // In fact, since UDFs don't support arrays (only scala's WrappedArray), any udf that contains just an array + // as parameter will become a vararg udf: + udf.register("joinToString") { strings: Array -> strings.joinToString(separator = "-") } + spark.sql("""SELECT joinToString("a", "hi there", "test"), joinToString(), joinToString("b", "c")""") + .showDS() +// +-------------------------------+--------------+------------------+ +// |joinToString(a, hi there, test)|joinToString()|joinToString(b, c)| +// +-------------------------------+--------------+------------------+ +// | a-hi there-test| | b-c| +// +-------------------------------+--------------+------------------+ +} diff --git a/examples/src/main/kotlin/org/jetbrains/kotlinx/spark/examples/WordCount.kt b/examples/src/main/kotlin/org/jetbrains/kotlinx/spark/examples/WordCount.kt index c08a9df5..30c5c340 100644 --- a/examples/src/main/kotlin/org/jetbrains/kotlinx/spark/examples/WordCount.kt +++ b/examples/src/main/kotlin/org/jetbrains/kotlinx/spark/examples/WordCount.kt @@ -35,7 +35,7 @@ fun main() { .cleanup() .groupByKey { it } .mapGroups { k, iter -> k X iter.asSequence().count() } - .sort { arrayOf(it(colName = "_2").desc()) } + .sort { arrayOf(it.col("_2").desc()) } .limit(20) .map { it.swap() } .show(false) diff --git a/jupyter/src/main/kotlin/org/jetbrains/kotlinx/spark/api/jupyter/SparkIntegration.kt b/jupyter/src/main/kotlin/org/jetbrains/kotlinx/spark/api/jupyter/SparkIntegration.kt index bafbfcdf..3752fdf9 100644 --- a/jupyter/src/main/kotlin/org/jetbrains/kotlinx/spark/api/jupyter/SparkIntegration.kt +++ b/jupyter/src/main/kotlin/org/jetbrains/kotlinx/spark/api/jupyter/SparkIntegration.kt @@ -65,6 +65,8 @@ internal class SparkIntegration : Integration() { inline fun dsOf(vararg arg: T): Dataset = spark.dsOf(*arg)""".trimIndent(), """ inline fun dfOf(vararg arg: T): Dataset = spark.dfOf(*arg)""".trimIndent(), + """ + inline fun emptyDataset(): Dataset = spark.emptyDataset(encoder())""".trimIndent(), """ inline fun dfOf(colNames: Array, vararg arg: T): Dataset = spark.dfOf(colNames, *arg)""".trimIndent(), """ diff --git a/kotlin-spark-api/3.2/src/main/kotlin/org/jetbrains/kotlinx/spark/api/Column.kt b/kotlin-spark-api/3.2/src/main/kotlin/org/jetbrains/kotlinx/spark/api/Column.kt index 72e8a9b7..bb0d6d20 100644 --- a/kotlin-spark-api/3.2/src/main/kotlin/org/jetbrains/kotlinx/spark/api/Column.kt +++ b/kotlin-spark-api/3.2/src/main/kotlin/org/jetbrains/kotlinx/spark/api/Column.kt @@ -31,12 +31,38 @@ import org.apache.spark.sql.TypedColumn import org.apache.spark.sql.functions import kotlin.reflect.KProperty1 +/** + * Selects column based on the column name and returns it as a [TypedColumn]. + * + * For example: + * ```kotlin + * dataset.col<_, Int>("a") + * ``` + * + * @note The column name can also reference to a nested column like `a.b`. + */ +inline fun Dataset.col(colName: String): TypedColumn = + org.jetbrains.kotlinx.spark.api.col(colName) + +/** + * Selects column based on the column name and returns it as a [TypedColumn]. + * + * For example: + * ```kotlin + * dataset<_, Int>("a") + * ``` + * @note The column name can also reference to a nested column like `a.b`. + */ +inline operator fun Dataset.invoke(colName: String): TypedColumn = + org.jetbrains.kotlinx.spark.api.col(colName) + /** * Selects column based on the column name and returns it as a [Column]. * * @note The column name can also reference to a nested column like `a.b`. + * */ -operator fun Dataset.invoke(colName: String): Column = col(colName) +operator fun Dataset<*>.invoke(colName: String): Column = apply(colName) /** * Helper function to quickly get a [TypedColumn] (or [Column]) from a dataset in a refactor-safe manner. @@ -47,8 +73,8 @@ operator fun Dataset.invoke(colName: String): Column = col(colName) * @see invoke */ @Suppress("UNCHECKED_CAST") -inline fun Dataset.col(column: KProperty1): TypedColumn = - col(column.name).`as`() as TypedColumn +inline fun Dataset.col(column: KProperty1): TypedColumn = + col(column.name).`as`() /** @@ -59,9 +85,18 @@ inline fun Dataset.col(column: KProperty1): Type * ``` * @see col */ -inline operator fun Dataset.invoke(column: KProperty1): TypedColumn = col(column) +inline operator fun Dataset.invoke(column: KProperty1): TypedColumn = col(column) +/** + * Can be used to create a [TypedColumn] for a simple [Dataset] + * with just one single column called "value". + */ +inline fun Dataset.singleCol(colName: String = "value"): TypedColumn { + require(schema().fields().size == 1) { "This Dataset<${T::class.simpleName}> contains more than 1 column" } + return org.jetbrains.kotlinx.spark.api.singleCol(colName) +} + @Suppress("FunctionName") @Deprecated( message = "Changed to \"`===`\" to better reflect Scala API.", @@ -402,11 +437,60 @@ operator fun Column.get(key: Any): Column = getItem(key) * * ``` * val df: Dataset = ... - * val typedColumn: Dataset = df.selectTyped( col("a").`as`() ) + * val typedColumn: Dataset = df.select( col("a").`as`<_, Int>() ) + * ``` + * + * @see typed + */ +@Suppress("UNCHECKED_CAST") +inline fun Column.`as`(): TypedColumn = `as`(encoder()) as TypedColumn + +/** + * Provides a type hint about the expected return value of this column. This information can + * be used by operations such as `select` on a [Dataset] to automatically convert the + * results into the correct JVM types. + * + * ``` + * val df: Dataset = ... + * val typedColumn: Dataset = df.select( col("a").`as`<_, Int>() ) + * ``` + * + * @see typed + */ +@Suppress("UNCHECKED_CAST") +inline fun TypedColumn.`as`(): TypedColumn = `as`(encoder()) as TypedColumn + +/** + * Provides a type hint about the expected return value of this column. This information can + * be used by operations such as `select` on a [Dataset] to automatically convert the + * results into the correct JVM types. + * + * ``` + * val df: Dataset = ... + * val typedColumn: Dataset = df.select( col("a").typed<_, Int>() ) * ``` + * + * @see as */ + @Suppress("UNCHECKED_CAST") -inline fun Column.`as`(): TypedColumn = `as`(encoder()) +inline fun Column.typed(): TypedColumn = `as`() + +/** + * Provides a type hint about the expected return value of this column. This information can + * be used by operations such as `select` on a [Dataset] to automatically convert the + * results into the correct JVM types. + * + * ``` + * val df: Dataset = ... + * val typedColumn: Dataset = df.select( col("a").typed<_, Int>() ) + * ``` + * + * @see as + */ +@Suppress("UNCHECKED_CAST") +inline fun TypedColumn.typed(): TypedColumn = `as`() + /** * Creates a [Column] of literal value. @@ -420,13 +504,49 @@ inline fun Column.`as`(): TypedColumn = `as`(encoder()) */ fun lit(a: Any): Column = functions.lit(a) +/** + * Creates a [Column] of literal value. + * + * The passed in object is returned directly if it is already a [Column]. + * If the object is a Scala Symbol, it is converted into a [Column] also. + * Otherwise, a new [Column] is created to represent the literal value. + * The difference between this function and [lit] is that this function + * can handle types and parameterized scala types e.g.: List, Seq and Map. + * + */ +inline fun typedLit(literal: U): TypedColumn = functions.lit(literal).typed() + +/** + * Returns a [TypedColumn] based on the given column name and type [DsType]. + * + * This is just a shortcut to the function from [org.apache.spark.sql.functions] combined with an [as] call. + * For all the functions, simply add `import org.apache.spark.sql.functions.*` to your file. + * + * @see col + * @see as + */ +inline fun col(colName: String): TypedColumn = functions.col(colName).`as`() + +/** + * Can be used to create a [TypedColumn] for a simple [Dataset] + * with just one single column called "value". + */ +inline fun singleCol(colName: String = "value"): TypedColumn = functions.col(colName).`as`() + +/** + * Returns a [Column] based on the given column name. + * + */ +fun col(colName: String): Column = functions.col(colName) + /** * Returns a [Column] based on the given class attribute, not connected to a dataset. * ```kotlin * val dataset: Dataset = ... - * val new: Dataset> = dataset.select( col(YourClass::a), col(YourClass::b) ) + * val new: Dataset> = dataset.select( col(YourClass::a), col(YourClass::b) ) * ``` + * @see col */ @Suppress("UNCHECKED_CAST") -inline fun col(column: KProperty1): TypedColumn = - functions.col(column.name).`as`() as TypedColumn +inline fun col(column: KProperty1): TypedColumn = + functions.col(column.name).`as`() diff --git a/kotlin-spark-api/3.2/src/main/kotlin/org/jetbrains/kotlinx/spark/api/SparkSession.kt b/kotlin-spark-api/3.2/src/main/kotlin/org/jetbrains/kotlinx/spark/api/SparkSession.kt index a740efad..94971f43 100644 --- a/kotlin-spark-api/3.2/src/main/kotlin/org/jetbrains/kotlinx/spark/api/SparkSession.kt +++ b/kotlin-spark-api/3.2/src/main/kotlin/org/jetbrains/kotlinx/spark/api/SparkSession.kt @@ -72,6 +72,9 @@ class KSparkSession(val spark: SparkSession) { /** Utility method to create dataset from vararg arguments. */ inline fun dsOf(vararg arg: T): Dataset = spark.dsOf(*arg) + /** Creates new empty dataset of type [T]. */ + inline fun emptyDataset(): Dataset = spark.emptyDataset(encoder()) + /** Utility method to create dataframe from *array or vararg arguments */ inline fun dfOf(vararg arg: T): Dataset = spark.dfOf(*arg) @@ -109,6 +112,14 @@ class KSparkSession(val spark: SparkSession) { * it is present in the query. */ val udf: UDFRegistration get() = spark.udf() + + inline fun > + NAMED_UDF.register(): NAMED_UDF = + this@KSparkSession.udf.register(namedUdf = this) + + inline fun > + UserDefinedFunction.register(name: String): NAMED_UDF = + this@KSparkSession.udf.register(name = name, udf = this) } /** diff --git a/kotlin-spark-api/3.2/src/main/kotlin/org/jetbrains/kotlinx/spark/api/StreamingKeyValues.kt b/kotlin-spark-api/3.2/src/main/kotlin/org/jetbrains/kotlinx/spark/api/StreamingKeyValues.kt index f9044b5b..692bb4c5 100644 --- a/kotlin-spark-api/3.2/src/main/kotlin/org/jetbrains/kotlinx/spark/api/StreamingKeyValues.kt +++ b/kotlin-spark-api/3.2/src/main/kotlin/org/jetbrains/kotlinx/spark/api/StreamingKeyValues.kt @@ -247,7 +247,7 @@ fun JavaDStream>.reduceByKeyAndWindow( /* slideDuration = */ slideDuration, /* numPartitions = */ numPartitions, /* filterFunc = */ filterFunc?.let { - { tuple -> + { tuple: Tuple2 -> filterFunc(tuple) } } @@ -289,7 +289,7 @@ fun JavaDStream>.reduceByKeyAndWindow( /* slideDuration = */ slideDuration, /* partitioner = */ partitioner, /* filterFunc = */ filterFunc?.let { - { tuple -> + { tuple: Tuple2 -> filterFunc(tuple) } } diff --git a/kotlin-spark-api/3.2/src/main/kotlin/org/jetbrains/kotlinx/spark/api/UDFRegister.kt b/kotlin-spark-api/3.2/src/main/kotlin/org/jetbrains/kotlinx/spark/api/UDFRegister.kt index 05d72675..ccbdc01b 100644 --- a/kotlin-spark-api/3.2/src/main/kotlin/org/jetbrains/kotlinx/spark/api/UDFRegister.kt +++ b/kotlin-spark-api/3.2/src/main/kotlin/org/jetbrains/kotlinx/spark/api/UDFRegister.kt @@ -27,57 +27,17 @@ package org.jetbrains.kotlinx.spark.api import org.apache.spark.sql.Column -import org.apache.spark.sql.DataTypeWithClass import org.apache.spark.sql.UDFRegistration import org.apache.spark.sql.api.java.* import org.apache.spark.sql.functions -import org.apache.spark.sql.types.DataType -import scala.collection.mutable.WrappedArray -import kotlin.reflect.KClass -import kotlin.reflect.full.isSubclassOf import kotlin.reflect.typeOf -/** Unwraps [DataTypeWithClass]. */ -fun DataType.unWrap(): DataType = - when (this) { - is DataTypeWithClass -> DataType.fromJson(dt().json()) - else -> this - } - -/** - * Checks if [this] is of a valid type for a UDF, otherwise it throws a [TypeOfUDFParameterNotSupportedException] - */ -@PublishedApi -internal fun KClass<*>.checkForValidType(parameterName: String) { - if (this == String::class || isSubclassOf(WrappedArray::class)) - return // Most of the time we need strings or WrappedArrays - - if (isSubclassOf(Iterable::class) - || java.isArray - || isSubclassOf(Map::class) - || isSubclassOf(Array::class) - || isSubclassOf(ByteArray::class) - || isSubclassOf(CharArray::class) - || isSubclassOf(ShortArray::class) - || isSubclassOf(IntArray::class) - || isSubclassOf(LongArray::class) - || isSubclassOf(FloatArray::class) - || isSubclassOf(DoubleArray::class) - || isSubclassOf(BooleanArray::class) - ) throw TypeOfUDFParameterNotSupportedException(this, parameterName) -} - -/** - * An exception thrown when the UDF is generated with illegal types for the parameters - */ -class TypeOfUDFParameterNotSupportedException(kClass: KClass<*>, parameterName: String) : IllegalArgumentException( - "Parameter $parameterName is subclass of ${kClass.qualifiedName}. If you need to process an array use ${WrappedArray::class.qualifiedName}." -) /** * A wrapper for a UDF with 0 arguments. * @property udfName the name of the UDF */ +@Deprecated("Use new UDF notation", replaceWith = ReplaceWith("UserDefinedFunction0")) class UDFWrapper0(private val udfName: String) { /** * Calls the [functions.callUDF] for the UDF with the [udfName] and the given columns. @@ -91,6 +51,7 @@ class UDFWrapper0(private val udfName: String) { * Registers the [func] with its [name] in [this]. */ @OptIn(ExperimentalStdlibApi::class) +@Deprecated("Use new UDF notation", ReplaceWith("this.register(name, func)"), DeprecationLevel.HIDDEN) inline fun UDFRegistration.register(name: String, noinline func: () -> R): UDFWrapper0 { register(name, UDF0(func), schema(typeOf()).unWrap()) return UDFWrapper0(name) @@ -100,6 +61,7 @@ inline fun UDFRegistration.register(name: String, noinline func: () * A wrapper for a UDF with 1 arguments. * @property udfName the name of the UDF */ +@Deprecated("Use new UDF notation", replaceWith = ReplaceWith("UserDefinedFunction1")) class UDFWrapper1(private val udfName: String) { /** * Calls the [functions.callUDF] for the UDF with the [udfName] and the given columns. @@ -113,6 +75,7 @@ class UDFWrapper1(private val udfName: String) { * Registers the [func] with its [name] in [this]. */ @OptIn(ExperimentalStdlibApi::class) +@Deprecated("Use new UDF notation", ReplaceWith("this.register(name, func)"), DeprecationLevel.HIDDEN) inline fun UDFRegistration.register(name: String, noinline func: (T0) -> R): UDFWrapper1 { T0::class.checkForValidType("T0") register(name, UDF1(func), schema(typeOf()).unWrap()) @@ -123,6 +86,7 @@ inline fun UDFRegistration.register(name: String, noinli * A wrapper for a UDF with 2 arguments. * @property udfName the name of the UDF */ +@Deprecated("Use new UDF notation", replaceWith = ReplaceWith("UserDefinedFunction2")) class UDFWrapper2(private val udfName: String) { /** * Calls the [functions.callUDF] for the UDF with the [udfName] and the given columns. @@ -136,6 +100,7 @@ class UDFWrapper2(private val udfName: String) { * Registers the [func] with its [name] in [this]. */ @OptIn(ExperimentalStdlibApi::class) +@Deprecated("Use new UDF notation", ReplaceWith("this.register(name, func)"), DeprecationLevel.HIDDEN) inline fun UDFRegistration.register( name: String, noinline func: (T0, T1) -> R, @@ -150,6 +115,7 @@ inline fun UDFRegistration.register( * A wrapper for a UDF with 3 arguments. * @property udfName the name of the UDF */ +@Deprecated("Use new UDF notation", replaceWith = ReplaceWith("UserDefinedFunction3")) class UDFWrapper3(private val udfName: String) { /** * Calls the [functions.callUDF] for the UDF with the [udfName] and the given columns. @@ -163,6 +129,7 @@ class UDFWrapper3(private val udfName: String) { * Registers the [func] with its [name] in [this]. */ @OptIn(ExperimentalStdlibApi::class) +@Deprecated("Use new UDF notation", ReplaceWith("this.register(name, func)"), DeprecationLevel.HIDDEN) inline fun UDFRegistration.register( name: String, noinline func: (T0, T1, T2) -> R, @@ -178,6 +145,7 @@ inline fun UDFRegistration.regis * A wrapper for a UDF with 4 arguments. * @property udfName the name of the UDF */ +@Deprecated("Use new UDF notation", replaceWith = ReplaceWith("UserDefinedFunction4")) class UDFWrapper4(private val udfName: String) { /** * Calls the [functions.callUDF] for the UDF with the [udfName] and the given columns. @@ -191,6 +159,7 @@ class UDFWrapper4(private val udfName: String) { * Registers the [func] with its [name] in [this]. */ @OptIn(ExperimentalStdlibApi::class) +@Deprecated("Use new UDF notation", ReplaceWith("this.register(name, func)"), DeprecationLevel.HIDDEN) inline fun UDFRegistration.register( name: String, noinline func: (T0, T1, T2, T3) -> R, @@ -207,6 +176,7 @@ inline fun UDFRegist * A wrapper for a UDF with 5 arguments. * @property udfName the name of the UDF */ +@Deprecated("Use new UDF notation", replaceWith = ReplaceWith("UserDefinedFunction5")) class UDFWrapper5(private val udfName: String) { /** * Calls the [functions.callUDF] for the UDF with the [udfName] and the given columns. @@ -220,6 +190,7 @@ class UDFWrapper5(private val udfName: String) { * Registers the [func] with its [name] in [this]. */ @OptIn(ExperimentalStdlibApi::class) +@Deprecated("Use new UDF notation", ReplaceWith("this.register(name, func)"), DeprecationLevel.HIDDEN) inline fun UDFRegistration.register( name: String, noinline func: (T0, T1, T2, T3, T4) -> R, @@ -237,6 +208,7 @@ inline fun UDFRegistration.register( name: String, noinline func: (T0, T1, T2, T3, T4, T5) -> R, @@ -275,6 +248,7 @@ inline fun UDFRegistration.register( name: String, noinline func: (T0, T1, T2, T3, T4, T5, T6) -> R, @@ -315,6 +290,7 @@ inline fun UDFRegistration.register( name: String, noinline func: (T0, T1, T2, T3, T4, T5, T6, T7) -> R, @@ -357,6 +334,7 @@ inline fun UDFRegistration.register( name: String, noinline func: (T0, T1, T2, T3, T4, T5, T6, T7, T8) -> R, @@ -401,6 +380,7 @@ inline fun UDFRegistration.register( name: String, noinline func: (T0, T1, T2, T3, T4, T5, T6, T7, T8, T9) -> R, @@ -459,6 +440,7 @@ inline fun UDFRegistration.register( name: String, noinline func: (T0, T1, T2, T3, T4, T5, T6, T7, T8, T9, T10) -> R, @@ -520,6 +503,7 @@ inline fun UDFRegistration.register( name: String, noinline func: (T0, T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11) -> R, @@ -584,6 +569,7 @@ inline fun UDFRegistration.register( name: String, noinline func: (T0, T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12) -> R, @@ -651,6 +638,7 @@ inline fun UDFRegistration.register( name: String, noinline func: (T0, T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13) -> R, @@ -721,6 +710,7 @@ inline fun UDFRegistration.register( name: String, noinline func: (T0, T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14) -> R, @@ -794,6 +785,7 @@ inline fun UDFRegistration.register( name: String, noinline func: (T0, T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15) -> R, @@ -870,6 +863,7 @@ inline fun UDFRegistration.register( name: String, noinline func: (T0, T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16) -> R, @@ -949,6 +944,7 @@ inline fun UDFRegistration.register( name: String, noinline func: (T0, T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17) -> R, @@ -1031,6 +1028,7 @@ inline fun UDFRegistration.register( name: String, noinline func: (T0, T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18) -> R, @@ -1116,6 +1115,7 @@ inline fun UDFRegistration.register( name: String, noinline func: (T0, T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18, T19) -> R, @@ -1204,6 +1205,7 @@ inline fun UDFRegistration.register( name: String, noinline func: (T0, T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18, T19, T20) -> R, @@ -1295,6 +1298,7 @@ inline fun UDFRegistration.register( name: String, noinline func: (T0, T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18, T19, T20, T21) -> R, diff --git a/kotlin-spark-api/3.2/src/main/kotlin/org/jetbrains/kotlinx/spark/api/UserDefinedAggregateFunction.kt b/kotlin-spark-api/3.2/src/main/kotlin/org/jetbrains/kotlinx/spark/api/UserDefinedAggregateFunction.kt new file mode 100644 index 00000000..595fe0fa --- /dev/null +++ b/kotlin-spark-api/3.2/src/main/kotlin/org/jetbrains/kotlinx/spark/api/UserDefinedAggregateFunction.kt @@ -0,0 +1,287 @@ +/*- + * =LICENSE= + * Kotlin Spark API: API for Spark 3.2+ (Scala 2.12) + * ---------- + * Copyright (C) 2019 - 2022 JetBrains + * ---------- + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * =LICENSEEND= + */ +package org.jetbrains.kotlinx.spark.api + +import org.apache.spark.sql.Encoder +import org.apache.spark.sql.UDFRegistration +import org.apache.spark.sql.expressions.Aggregator +import org.apache.spark.sql.functions +import java.io.Serializable +import kotlin.reflect.typeOf + +/** Creates an [Aggregator] in functional manner. + * + * @param zero A zero value for this aggregation. Should satisfy the property that any b + zero = b. + * @param reduce Combine two values to produce a new value. For performance, the function may modify `b` and + * return it instead of constructing new object for b. + * @param merge Merge two intermediate values. + * @param finish Transform the output of the reduction. + * @param bufferEncoder Optional. Specifies the `Encoder` for the intermediate value type. + * @param outputEncoder Optional. Specifies the `Encoder` for the final output value type. + * */ +inline fun aggregatorOf( + noinline zero: () -> BUF, + noinline reduce: (b: BUF, a: IN) -> BUF, + noinline merge: (b1: BUF, b2: BUF) -> BUF, + noinline finish: (reduction: BUF) -> OUT, + bufferEncoder: Encoder = encoder(), + outputEncoder: Encoder = encoder(), +): Aggregator = Aggregator(zero, reduce, merge, finish, bufferEncoder, outputEncoder) + +class Aggregator( + zero: () -> BUF, + reduce: (b: BUF, a: IN) -> BUF, + merge: (b1: BUF, b2: BUF) -> BUF, + finish: (reduction: BUF) -> OUT, + private val bufferEncoder: Encoder, + private val outputEncoder: Encoder, +) : Aggregator(), Serializable { + + private val _zero: () -> BUF = zero + private val _reduce: (b: BUF, a: IN) -> BUF = reduce + private val _merge: (b1: BUF, b2: BUF) -> BUF = merge + private val _finish: (reduction: BUF) -> OUT = finish + + override fun zero(): BUF = _zero() + override fun reduce(b: BUF, a: IN): BUF = _reduce(b, a) + override fun merge(b1: BUF, b2: BUF): BUF = _merge(b1, b2) + override fun finish(reduction: BUF): OUT = _finish(reduction) + override fun bufferEncoder(): Encoder = bufferEncoder + override fun outputEncoder(): Encoder = outputEncoder +} + + +/** + * Obtains a [NamedUserDefinedFunction1] that wraps the given [agg] so that it may be used with Data Frames. + * @see functions.udaf + * + * @param name Optional. Tries to obtain name from the class of [agg] if not supplied. + * Use [udafUnnamed] if no name is wanted. + * @param agg the given [Aggregator] to convert into a UDAF. Can also be created using [aggregatorOf]. + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * + * @return a [NamedUserDefinedFunction1] that can be used as an aggregating expression + * + * @see udaf for a named variant. + */ +inline fun > udaf( + agg: AGG, + nondeterministic: Boolean = false, +): NamedUserDefinedFunction1 = udaf( + name = agg::class.simpleName + ?: error("Could not obtain name from [agg], either supply a name or use [udafUnnamed()]"), + agg = agg, + nondeterministic = nondeterministic, +) + +/** + * Obtains a [NamedUserDefinedFunction1] that wraps the given [agg] so that it may be used with Data Frames. + * @see functions.udaf + * + * @param name Optional. Tries to obtain name from the class of [agg] if not supplied. + * Use [udafUnnamed] if no name is wanted. + * @param agg the given [Aggregator] to convert into a UDAF. Can also be created using [aggregatorOf]. + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * + * @return a [NamedUserDefinedFunction1] that can be used as an aggregating expression + * + * @see udaf for a named variant. + */ +inline fun > udaf( + name: String, + agg: AGG, + nondeterministic: Boolean = false, +): NamedUserDefinedFunction1 = + udafUnnamed(agg = agg, nondeterministic = nondeterministic).withName(name) + +/** + * Obtains a [UserDefinedFunction1] that wraps the given [agg] so that it may be used with Data Frames. + * @see functions.udaf + * + * @param agg the given [Aggregator] to convert into a UDAF. Can also be created using [aggregatorOf]. + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * + * @return a [UserDefinedFunction1] that can be used as an aggregating expression + * + * @see udaf for a named variant. + */ +inline fun > udafUnnamed( + agg: AGG, + nondeterministic: Boolean = false, +): UserDefinedFunction1 { + IN::class.checkForValidType("IN") + + return UserDefinedFunction1( + udf = functions.udaf(agg, encoder()) + .let { if (nondeterministic) it.asNondeterministic() else it } + .let { if (typeOf().isMarkedNullable) it else it.asNonNullable() }, + encoder = encoder(), + ) +} + +/** + * Obtains a [UserDefinedFunction1] created from an [Aggregator] created by the given arguments + * so that it may be used with Data Frames. + * @see functions.udaf + * @see aggregatorOf + * + * @param zero A zero value for this aggregation. Should satisfy the property that any b + zero = b. + * @param reduce Combine two values to produce a new value. For performance, the function may modify `b` and + * return it instead of constructing new object for b. + * @param merge Merge two intermediate values. + * @param finish Transform the output of the reduction. + * @param bufferEncoder Optional. Specifies the `Encoder` for the intermediate value type. + * @param outputEncoder Optional. Specifies the `Encoder` for the final output value type. + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * + * @return a [UserDefinedFunction1] that can be used as an aggregating expression + * + * @see udaf for a named variant. + */ +inline fun udaf( + noinline zero: () -> BUF, + noinline reduce: (b: BUF, a: IN) -> BUF, + noinline merge: (b1: BUF, b2: BUF) -> BUF, + noinline finish: (reduction: BUF) -> OUT, + bufferEncoder: Encoder = encoder(), + outputEncoder: Encoder = encoder(), + nondeterministic: Boolean = false, +): UserDefinedFunction1 = udafUnnamed( + aggregatorOf( + zero = zero, + reduce = reduce, + merge = merge, + finish = finish, + bufferEncoder = bufferEncoder, + outputEncoder = outputEncoder, + ), + nondeterministic = nondeterministic, +) + + +/** + * Obtains a [NamedUserDefinedFunction1] that wraps the given [agg] so that it may be used with Data Frames. + * so that it may be used with Data Frames. + * @see functions.udaf + * @see aggregatorOf + * + * @param name Optional. Name for the UDAF. + * @param zero A zero value for this aggregation. Should satisfy the property that any b + zero = b. + * @param reduce Combine two values to produce a new value. For performance, the function may modify `b` and + * return it instead of constructing new object for b. + * @param merge Merge two intermediate values. + * @param finish Transform the output of the reduction. + * @param bufferEncoder Optional. Specifies the `Encoder` for the intermediate value type. + * @param outputEncoder Optional. Specifies the `Encoder` for the final output value type. + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * + * @return a [UserDefinedFunction1] that can be used as an aggregating expression + * + * @see udafUnnamed for an unnamed variant. + */ +inline fun udaf( + name: String, + noinline zero: () -> BUF, + noinline reduce: (b: BUF, a: IN) -> BUF, + noinline merge: (b1: BUF, b2: BUF) -> BUF, + noinline finish: (reduction: BUF) -> OUT, + bufferEncoder: Encoder = encoder(), + outputEncoder: Encoder = encoder(), + nondeterministic: Boolean = false, +): NamedUserDefinedFunction1 = udaf( + name = name, + agg = aggregatorOf( + zero = zero, + reduce = reduce, + merge = merge, + finish = finish, + bufferEncoder = bufferEncoder, + outputEncoder = outputEncoder, + ), + nondeterministic = nondeterministic, +) + +/** + * Registers [agg] as a UDAF for SQL. Returns the UDAF as [NamedUserDefinedFunction]. + * Obtains a [NamedUserDefinedFunction1] that wraps the given [agg] so that it may be used with Data Frames. + * @see UDFRegistration.register + * @see functions.udaf + * + * @param agg the given [Aggregator] to convert into a UDAF. Can also be created using [aggregatorOf]. + * @param name Optional. Tries to obtain name from the class of [agg] if not supplied. + * Use [udafUnnamed] if no name is wanted. + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * + * @return a [NamedUserDefinedFunction1] that can be used as an aggregating expression + */ +inline fun UDFRegistration.register( + name: String, + agg: Aggregator, + nondeterministic: Boolean = false, +): NamedUserDefinedFunction1 = register(udaf(name, agg, nondeterministic)) + +/** + * Registers [agg] as a UDAF for SQL. Returns the UDAF as [NamedUserDefinedFunction]. + * Obtains a [NamedUserDefinedFunction1] that wraps the given [agg] so that it may be used with Data Frames. + * @see UDFRegistration.register + * @see functions.udaf + * + * @param agg the given [Aggregator] to convert into a UDAF. Can also be created using [aggregatorOf]. + * @param name Optional. Tries to obtain name from the class of [agg] if not supplied. + * Use [udafUnnamed] if no name is wanted. + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * + * @return a [NamedUserDefinedFunction1] that can be used as an aggregating expression + */ +inline fun UDFRegistration.register( + agg: Aggregator, + nondeterministic: Boolean = false, +): NamedUserDefinedFunction1 = register(udaf(agg, nondeterministic)) + +/** + * Registers a UDAF for SQL based on the given arguments. Returns the UDAF as [NamedUserDefinedFunction]. + * Obtains a [NamedUserDefinedFunction1] that wraps the given [agg] so that it may be used with Data Frames. + * @see UDFRegistration.register + * @see functions.udaf + * + * @param name Optional. Name for the UDAF. + * @param zero A zero value for this aggregation. Should satisfy the property that any b + zero = b. + * @param reduce Combine two values to produce a new value. For performance, the function may modify `b` and + * return it instead of constructing new object for b. + * @param merge Merge two intermediate values. + * @param finish Transform the output of the reduction. + * @param bufferEncoder Optional. Specifies the `Encoder` for the intermediate value type. + * @param outputEncoder Optional. Specifies the `Encoder` for the final output value type. + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * + * @return a [NamedUserDefinedFunction1] that can be used as an aggregating expression. + */ +inline fun UDFRegistration.register( + name: String, + noinline zero: () -> BUF, + noinline reduce: (b: BUF, a: IN) -> BUF, + noinline merge: (b1: BUF, b2: BUF) -> BUF, + noinline finish: (reduction: BUF) -> OUT, + bufferEncoder: Encoder = encoder(), + outputEncoder: Encoder = encoder(), + nondeterministic: Boolean = false, +): NamedUserDefinedFunction1 = register( + udaf(name, zero, reduce, merge, finish, bufferEncoder, outputEncoder, nondeterministic) +) diff --git a/kotlin-spark-api/3.2/src/main/kotlin/org/jetbrains/kotlinx/spark/api/UserDefinedFunction.kt b/kotlin-spark-api/3.2/src/main/kotlin/org/jetbrains/kotlinx/spark/api/UserDefinedFunction.kt new file mode 100644 index 00000000..4a773efe --- /dev/null +++ b/kotlin-spark-api/3.2/src/main/kotlin/org/jetbrains/kotlinx/spark/api/UserDefinedFunction.kt @@ -0,0 +1,162 @@ +/*- + * =LICENSE= + * Kotlin Spark API: API for Spark 3.2+ (Scala 2.12) + * ---------- + * Copyright (C) 2019 - 2022 JetBrains + * ---------- + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * =LICENSEEND= + */ +@file:Suppress("unused") + +package org.jetbrains.kotlinx.spark.api + +import org.apache.spark.sql.* +import org.apache.spark.sql.types.DataType +import scala.collection.Seq +import scala.collection.mutable.WrappedArray +import kotlin.reflect.KClass +import kotlin.reflect.KProperty +import kotlin.reflect.full.isSubclassOf +import kotlin.reflect.full.primaryConstructor +import org.apache.spark.sql.expressions.UserDefinedFunction as SparkUserDefinedFunction + +/** Unwraps [DataTypeWithClass]. */ +fun DataType.unWrap(): DataType = + when (this) { + is DataTypeWithClass -> DataType.fromJson(dt().json()) + else -> this + } + +/** + * Checks if [this] is of a valid type for a UDF, otherwise it throws a [TypeOfUDFParameterNotSupportedException] + */ +@PublishedApi +internal fun KClass<*>.checkForValidType(parameterName: String) { + if (this == String::class || isSubclassOf(WrappedArray::class) || isSubclassOf(Seq::class)) + return // Most of the time we need strings or WrappedArrays/Seqs + + if (isSubclassOf(Iterable::class) + || java.isArray + || isSubclassOf(Char::class) + || isSubclassOf(Map::class) + || isSubclassOf(Array::class) + || isSubclassOf(ByteArray::class) + || isSubclassOf(CharArray::class) + || isSubclassOf(ShortArray::class) + || isSubclassOf(IntArray::class) + || isSubclassOf(LongArray::class) + || isSubclassOf(FloatArray::class) + || isSubclassOf(DoubleArray::class) + || isSubclassOf(BooleanArray::class) + ) throw TypeOfUDFParameterNotSupportedException(this, parameterName) +} + +/** + * An exception thrown when the UDF is generated with illegal types for the parameters + */ +class TypeOfUDFParameterNotSupportedException(kClass: KClass<*>, parameterName: String) : IllegalArgumentException( + "Parameter $parameterName is subclass of ${kClass.qualifiedName}. If you need to process an array use ${Seq::class.qualifiedName}. You can convert any typed array/list-like column using [asSeq()]." +) + +@JvmName("arrayColumnAsSeq") +fun TypedColumn>.asSeq(): TypedColumn> = typed() +@JvmName("iterableColumnAsSeq") +fun > TypedColumn.asSeq(): TypedColumn> = typed() +@JvmName("byteArrayColumnAsSeq") +fun TypedColumn.asSeq(): TypedColumn> = typed() +@JvmName("charArrayColumnAsSeq") +fun TypedColumn.asSeq(): TypedColumn> = typed() +@JvmName("shortArrayColumnAsSeq") +fun TypedColumn.asSeq(): TypedColumn> = typed() +@JvmName("intArrayColumnAsSeq") +fun TypedColumn.asSeq(): TypedColumn> = typed() +@JvmName("longArrayColumnAsSeq") +fun TypedColumn.asSeq(): TypedColumn> = typed() +@JvmName("floatArrayColumnAsSeq") +fun TypedColumn.asSeq(): TypedColumn> = typed() +@JvmName("doubleArrayColumnAsSeq") +fun TypedColumn.asSeq(): TypedColumn> = typed() +@JvmName("booleanArrayColumnAsSeq") +fun TypedColumn.asSeq(): TypedColumn> = typed() + + +/** + * Registers a user-defined function (UDF) with name, for a UDF that's already defined using the Dataset + * API (i.e. of type [NamedUserDefinedFunction]). + * @see UDFRegistration.register + */ +inline fun > UDFRegistration.register( + namedUdf: NamedUdf, +): NamedUdf = namedUdf.copy(udf = register(namedUdf.name, namedUdf.udf)) + +inline fun > UDFRegistration.register( + name: String, + udf: UserDefinedFunction, +): NamedUdf = udf.withName(name).copy(udf = register(name, udf.udf)) + +/** + * Typed wrapper around [SparkUserDefinedFunction] with defined encoder. + * + * @param Return the return type of the udf + * @param NamedUdf a type reference to the named version of the [SparkUserDefinedFunction] implementing class + */ +sealed interface UserDefinedFunction { + val udf: SparkUserDefinedFunction + val encoder: Encoder + + /** Returns true when the UDF can return a nullable value. */ + val nullable: Boolean get() = udf.nullable() + + /** Returns true iff the UDF is deterministic, i.e. the UDF produces the same output given the same input. */ + val deterministic: Boolean get() = udf.deterministic() + + /** Converts this [UserDefinedFunction] to a [NamedUserDefinedFunction]. */ + fun withName(name: String): NamedUdf + + /** + * Converts this [UserDefinedFunction] to a [NamedUserDefinedFunction]. + * @see withName + */ + operator fun getValue(thisRef: Any?, property: KProperty<*>): NamedUdf +} + +/** + * Typed and named wrapper around [SparkUserDefinedFunction] with defined encoder. + * + * @param Return the return type of the udf + * @param This a self reference to the named version of the [SparkUserDefinedFunction] implementing class. + * Unfortunately needed to allow functions to treat any [NamedTypedUserDefinedFunction] as a normal [TypedUserDefinedFunction]. + */ +sealed interface NamedUserDefinedFunction : UserDefinedFunction { + val name: String +} + +/** Copy method for all [NamedUserDefinedFunction] functions. */ +inline fun > T.copy( + name: String = this.name, + udf: SparkUserDefinedFunction = this.udf, + encoder: Encoder = this.encoder, +): T = T::class.primaryConstructor!!.run { + callBy( + parameters.associateWith { + when (it.name) { + NamedUserDefinedFunction<*, *>::name.name -> name + NamedUserDefinedFunction<*, *>::udf.name -> udf + NamedUserDefinedFunction<*, *>::encoder.name -> encoder + else -> error("Wrong arguments") + } + } + ) +} + diff --git a/kotlin-spark-api/3.2/src/main/kotlin/org/jetbrains/kotlinx/spark/api/UserDefinedFunctionVararg.kt b/kotlin-spark-api/3.2/src/main/kotlin/org/jetbrains/kotlinx/spark/api/UserDefinedFunctionVararg.kt new file mode 100644 index 00000000..cf0c066e --- /dev/null +++ b/kotlin-spark-api/3.2/src/main/kotlin/org/jetbrains/kotlinx/spark/api/UserDefinedFunctionVararg.kt @@ -0,0 +1,1688 @@ +/*- + * =LICENSE= + * Kotlin Spark API: API for Spark 3.2+ (Scala 2.12) + * ---------- + * Copyright (C) 2019 - 2022 JetBrains + * ---------- + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * =LICENSEEND= + */ +package org.jetbrains.kotlinx.spark.api + + +import org.apache.spark.sql.* +import org.jetbrains.kotlinx.spark.extensions.VarargUnwrapper +import org.apache.spark.sql.api.java.* +import org.apache.spark.sql.internal.SQLConf +import kotlin.reflect.* +import org.apache.spark.sql.expressions.UserDefinedFunction as SparkUserDefinedFunction + +/** + * Instance of a UDF with vararg arguments of the same type. + * This UDF can be invoked with (typed) columns in a [Dataset.select] or [selectTyped] call. + * Alternatively it can be registered for SQL calls using [register]. + * + * @see org.apache.spark.sql.expressions.UserDefinedFunction + * @see NamedUserDefinedFunctionVararg + * @see udf + */ +open class UserDefinedFunctionVararg( + override val udf: SparkUserDefinedFunction, + override val encoder: Encoder, +): UserDefinedFunction> { + + /** + * Allows this UDF to be called in typed manner using columns in a [Dataset.selectTyped] call. + * @see typedCol to create typed columns. + * @see org.apache.spark.sql.expressions.UserDefinedFunction.apply + */ + operator fun invoke(vararg params: TypedColumn): TypedColumn = udf.apply(*params).`as`(encoder) as TypedColumn + + /** Returns named variant of this UDF. */ + override fun withName(name: String): NamedUserDefinedFunctionVararg = NamedUserDefinedFunctionVararg( + name = name, + udf = udf, + encoder = encoder, + ) + + /** + * Returns named variant of this UDF. + * @see withName + */ + override fun getValue(thisRef: Any?, property: KProperty<*>): NamedUserDefinedFunctionVararg = + withName(property.name) +} + +/** + * Instance of a UDF with vararg arguments of the same type with name. + * This UDF can be invoked with (typed) columns in a [Dataset.select] or [selectTyped] call. + * Alternatively it can be registered for SQL calls using [register]. + * + * @see org.apache.spark.sql.expressions.UserDefinedFunction + * @see UserDefinedFunctionVararg + * @see udf + */ +class NamedUserDefinedFunctionVararg( + override val name: String, + udf: SparkUserDefinedFunction, + encoder: Encoder, +): NamedUserDefinedFunction>, + UserDefinedFunctionVararg(udf = udf.withName(name), encoder = encoder) + +@PublishedApi +internal inline fun withAllowUntypedScalaUDF(block: () -> R): R { + val sqlConf = SQLConf.get() + val confString = "spark.sql.legacy.allowUntypedScalaUDF" + val prev = sqlConf.getConfString(confString, "false") + sqlConf.setConfString(confString, "true") + return try { + block() + } finally { + sqlConf.setConfString(confString, prev) + } +} + + + + +/** + * Defines a named vararg UDF ([NamedUserDefinedFunctionVararg]) instance based on the (lambda) function [varargFunc]. + * For example: `val myUdf = udf("myUdf") { t1: ByteArray -> ... }` + * Name can also be supplied using delegate: `val myUdf by udf { t1: ByteArray -> ... }` + * @see UserDefinedFunction.getValue + * + * If you want to process a column containing an ByteArray instead, use WrappedArray. + * + * @param name The name for this UDF. + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @param varargFunc The function to convert to a UDF. Can be a lambda. + */ +@JvmName("udfVarargByte") +inline fun udf( + name: String, + nondeterministic: Boolean = false, + varargFunc: UDF1, +): NamedUserDefinedFunctionVararg = + udf(nondeterministic, varargFunc).withName(name) + +/** + * Defines a vararg UDF ([UserDefinedFunctionVararg]) instance based on the (lambda) function [varargFunc]. + * For example: `val myUdf = udf { t1: ByteArray -> ... }` + * + * If you want to process a column containing an ByteArray instead, use WrappedArray. + * + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @param varargFunc The function to convert to a UDF. Can be a lambda. + */ +@JvmName("udfVarargByte") +inline fun udf( + nondeterministic: Boolean = false, + varargFunc: UDF1, +): UserDefinedFunctionVararg { + + + return withAllowUntypedScalaUDF { + UserDefinedFunctionVararg( + udf = functions.udf(VarargUnwrapper(varargFunc) { i, init -> ByteArray(i, init::call) }, schema(typeOf()).unWrap()) + .let { if (nondeterministic) it.asNondeterministic() else it } + .let { if (typeOf().isMarkedNullable) it else it.asNonNullable() }, + encoder = encoder(), + ) + } +} +/** + * Defines and registers a named vararg UDF ([NamedUserDefinedFunctionVararg]) instance based on the (lambda) function [varargFunc]. + * For example: `val myUdf = udf.register("myUdf") { t1: ByteArray -> ... }` + * + * If you want to process a column containing an ByteArray instead, use WrappedArray. + * + * @param name The name for this UDF. + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @param varargFunc The function to convert to a UDF. Can be a lambda. + */ +@JvmName("registerVarargByte") +inline fun UDFRegistration.register( + name: String, + nondeterministic: Boolean = false, + varargFunc: UDF1, +): NamedUserDefinedFunctionVararg = + register(udf(name, nondeterministic, varargFunc)) +/** + * Creates a vararg UDF ([NamedUserDefinedFunctionVararg]) from a function reference adapting its name by reflection. + * For example: `val myUdf = udf(::myFunction)` + * + * If you want to process a column containing an ByteArray instead, use WrappedArray. + * + * @param varargFunc function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +@JvmName("udfVarargByte") +inline fun udf( + varargFunc: KProperty0<(ByteArray) -> R>, + nondeterministic: Boolean = false, +): NamedUserDefinedFunctionVararg = udf(varargFunc.name, varargFunc, nondeterministic) + +/** + * Creates a vararg UDF ([NamedUserDefinedFunctionVararg]) from a function reference. + * For example: `val myUdf = udf("myFunction", ::myFunction)` + * + * If you want to process a column containing an ByteArray instead, use WrappedArray. + * + * @param name Optional. Name for the UDF. + * @param varargFunc function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +@JvmName("udfVarargByte") +inline fun udf( + name: String, + varargFunc: KProperty0<(ByteArray) -> R>, + nondeterministic: Boolean = false, +): NamedUserDefinedFunctionVararg = udf(name, nondeterministic, varargFunc.get()) + +/** + * Creates and registers a vararg UDF ([NamedUserDefinedFunctionVararg]) from a function reference adapting its name by reflection. + * For example: `val myUdf = udf.register(::myFunction)` + * + * If you want to process a column containing an ByteArray instead, use WrappedArray. + * + * @param varargFunc function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +@JvmName("registerVarargByte") +inline fun UDFRegistration.register( + varargFunc: KProperty0<(ByteArray) -> R>, + nondeterministic: Boolean = false, +): NamedUserDefinedFunctionVararg = register(udf(varargFunc, nondeterministic)) + +/** + * Creates and registers a vararg UDF ([NamedUserDefinedFunctionVararg]) from a function reference. + * For example: `val myUdf = udf.register("myFunction", ::myFunction)` + * + * If you want to process a column containing an ByteArray instead, use WrappedArray. + * + * @param name Optional. Name for the UDF. + * @param varargFunc function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +@JvmName("registerVarargByte") +inline fun UDFRegistration.register( + name: String, + varargFunc: KProperty0<(ByteArray) -> R>, + nondeterministic: Boolean = false, +): NamedUserDefinedFunctionVararg = register(udf(name, varargFunc, nondeterministic)) + +/** + * Creates a vararg UDF ([NamedUserDefinedFunctionVararg]) from a function reference adapting its name by reflection. + * For example: `val myUdf = udf(::myFunction)` + * + * If you want to process a column containing an ByteArray instead, use WrappedArray. + * + * @param varargFunc function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +@JvmName("udfVarargByte") +inline fun udf( + varargFunc: KFunction1, + nondeterministic: Boolean = false, +): NamedUserDefinedFunctionVararg = udf(varargFunc.name, varargFunc, nondeterministic) + +/** + * Creates a vararg UDF ([NamedUserDefinedFunctionVararg]) from a function reference. + * For example: `val myUdf = udf("myFunction", ::myFunction)` + * + * If you want to process a column containing an ByteArray instead, use WrappedArray. + * + * @param name Optional. Name for the UDF. + * @param varargFunc function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +@JvmName("udfVarargByte") +inline fun udf( + name: String, + varargFunc: KFunction1, + nondeterministic: Boolean = false, +): NamedUserDefinedFunctionVararg = udf(name, nondeterministic, varargFunc) + +/** + * Creates and registers a vararg UDF ([NamedUserDefinedFunctionVararg]) from a function reference adapting its name by reflection. + * For example: `val myUdf = udf.register(::myFunction)` + * + * If you want to process a column containing an ByteArray instead, use WrappedArray. + * + * @param varargFunc function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +@JvmName("registerVarargByte") +inline fun UDFRegistration.register( + varargFunc: KFunction1, + nondeterministic: Boolean = false, +): NamedUserDefinedFunctionVararg = register(udf(varargFunc, nondeterministic)) + +/** + * Creates and registers a vararg UDF ([NamedUserDefinedFunctionVararg]) from a function reference. + * For example: `val myUdf = udf.register("myFunction", ::myFunction)` + * + * If you want to process a column containing an ByteArray instead, use WrappedArray. + * + * @param name Optional. Name for the UDF. + * @param varargFunc function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +@JvmName("registerVarargByte") +inline fun UDFRegistration.register( + name: String, + varargFunc: KFunction1, + nondeterministic: Boolean = false, +): NamedUserDefinedFunctionVararg = register(udf(name, varargFunc, nondeterministic)) + + +/** + * Defines a named vararg UDF ([NamedUserDefinedFunctionVararg]) instance based on the (lambda) function [varargFunc]. + * For example: `val myUdf = udf("myUdf") { t1: ShortArray -> ... }` + * Name can also be supplied using delegate: `val myUdf by udf { t1: ShortArray -> ... }` + * @see UserDefinedFunction.getValue + * + * If you want to process a column containing an ShortArray instead, use WrappedArray. + * + * @param name The name for this UDF. + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @param varargFunc The function to convert to a UDF. Can be a lambda. + */ +@JvmName("udfVarargShort") +inline fun udf( + name: String, + nondeterministic: Boolean = false, + varargFunc: UDF1, +): NamedUserDefinedFunctionVararg = + udf(nondeterministic, varargFunc).withName(name) + +/** + * Defines a vararg UDF ([UserDefinedFunctionVararg]) instance based on the (lambda) function [varargFunc]. + * For example: `val myUdf = udf { t1: ShortArray -> ... }` + * + * If you want to process a column containing an ShortArray instead, use WrappedArray. + * + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @param varargFunc The function to convert to a UDF. Can be a lambda. + */ +@JvmName("udfVarargShort") +inline fun udf( + nondeterministic: Boolean = false, + varargFunc: UDF1, +): UserDefinedFunctionVararg { + + + return withAllowUntypedScalaUDF { + UserDefinedFunctionVararg( + udf = functions.udf(VarargUnwrapper(varargFunc) { i, init -> ShortArray(i, init::call) }, schema(typeOf()).unWrap()) + .let { if (nondeterministic) it.asNondeterministic() else it } + .let { if (typeOf().isMarkedNullable) it else it.asNonNullable() }, + encoder = encoder(), + ) + } +} +/** + * Defines and registers a named vararg UDF ([NamedUserDefinedFunctionVararg]) instance based on the (lambda) function [varargFunc]. + * For example: `val myUdf = udf.register("myUdf") { t1: ShortArray -> ... }` + * + * If you want to process a column containing an ShortArray instead, use WrappedArray. + * + * @param name The name for this UDF. + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @param varargFunc The function to convert to a UDF. Can be a lambda. + */ +@JvmName("registerVarargShort") +inline fun UDFRegistration.register( + name: String, + nondeterministic: Boolean = false, + varargFunc: UDF1, +): NamedUserDefinedFunctionVararg = + register(udf(name, nondeterministic, varargFunc)) +/** + * Creates a vararg UDF ([NamedUserDefinedFunctionVararg]) from a function reference adapting its name by reflection. + * For example: `val myUdf = udf(::myFunction)` + * + * If you want to process a column containing an ShortArray instead, use WrappedArray. + * + * @param varargFunc function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +@JvmName("udfVarargShort") +inline fun udf( + varargFunc: KProperty0<(ShortArray) -> R>, + nondeterministic: Boolean = false, +): NamedUserDefinedFunctionVararg = udf(varargFunc.name, varargFunc, nondeterministic) + +/** + * Creates a vararg UDF ([NamedUserDefinedFunctionVararg]) from a function reference. + * For example: `val myUdf = udf("myFunction", ::myFunction)` + * + * If you want to process a column containing an ShortArray instead, use WrappedArray. + * + * @param name Optional. Name for the UDF. + * @param varargFunc function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +@JvmName("udfVarargShort") +inline fun udf( + name: String, + varargFunc: KProperty0<(ShortArray) -> R>, + nondeterministic: Boolean = false, +): NamedUserDefinedFunctionVararg = udf(name, nondeterministic, varargFunc.get()) + +/** + * Creates and registers a vararg UDF ([NamedUserDefinedFunctionVararg]) from a function reference adapting its name by reflection. + * For example: `val myUdf = udf.register(::myFunction)` + * + * If you want to process a column containing an ShortArray instead, use WrappedArray. + * + * @param varargFunc function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +@JvmName("registerVarargShort") +inline fun UDFRegistration.register( + varargFunc: KProperty0<(ShortArray) -> R>, + nondeterministic: Boolean = false, +): NamedUserDefinedFunctionVararg = register(udf(varargFunc, nondeterministic)) + +/** + * Creates and registers a vararg UDF ([NamedUserDefinedFunctionVararg]) from a function reference. + * For example: `val myUdf = udf.register("myFunction", ::myFunction)` + * + * If you want to process a column containing an ShortArray instead, use WrappedArray. + * + * @param name Optional. Name for the UDF. + * @param varargFunc function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +@JvmName("registerVarargShort") +inline fun UDFRegistration.register( + name: String, + varargFunc: KProperty0<(ShortArray) -> R>, + nondeterministic: Boolean = false, +): NamedUserDefinedFunctionVararg = register(udf(name, varargFunc, nondeterministic)) + +/** + * Creates a vararg UDF ([NamedUserDefinedFunctionVararg]) from a function reference adapting its name by reflection. + * For example: `val myUdf = udf(::myFunction)` + * + * If you want to process a column containing an ShortArray instead, use WrappedArray. + * + * @param varargFunc function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +@JvmName("udfVarargShort") +inline fun udf( + varargFunc: KFunction1, + nondeterministic: Boolean = false, +): NamedUserDefinedFunctionVararg = udf(varargFunc.name, varargFunc, nondeterministic) + +/** + * Creates a vararg UDF ([NamedUserDefinedFunctionVararg]) from a function reference. + * For example: `val myUdf = udf("myFunction", ::myFunction)` + * + * If you want to process a column containing an ShortArray instead, use WrappedArray. + * + * @param name Optional. Name for the UDF. + * @param varargFunc function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +@JvmName("udfVarargShort") +inline fun udf( + name: String, + varargFunc: KFunction1, + nondeterministic: Boolean = false, +): NamedUserDefinedFunctionVararg = udf(name, nondeterministic, varargFunc) + +/** + * Creates and registers a vararg UDF ([NamedUserDefinedFunctionVararg]) from a function reference adapting its name by reflection. + * For example: `val myUdf = udf.register(::myFunction)` + * + * If you want to process a column containing an ShortArray instead, use WrappedArray. + * + * @param varargFunc function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +@JvmName("registerVarargShort") +inline fun UDFRegistration.register( + varargFunc: KFunction1, + nondeterministic: Boolean = false, +): NamedUserDefinedFunctionVararg = register(udf(varargFunc, nondeterministic)) + +/** + * Creates and registers a vararg UDF ([NamedUserDefinedFunctionVararg]) from a function reference. + * For example: `val myUdf = udf.register("myFunction", ::myFunction)` + * + * If you want to process a column containing an ShortArray instead, use WrappedArray. + * + * @param name Optional. Name for the UDF. + * @param varargFunc function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +@JvmName("registerVarargShort") +inline fun UDFRegistration.register( + name: String, + varargFunc: KFunction1, + nondeterministic: Boolean = false, +): NamedUserDefinedFunctionVararg = register(udf(name, varargFunc, nondeterministic)) + + +/** + * Defines a named vararg UDF ([NamedUserDefinedFunctionVararg]) instance based on the (lambda) function [varargFunc]. + * For example: `val myUdf = udf("myUdf") { t1: IntArray -> ... }` + * Name can also be supplied using delegate: `val myUdf by udf { t1: IntArray -> ... }` + * @see UserDefinedFunction.getValue + * + * If you want to process a column containing an IntArray instead, use WrappedArray. + * + * @param name The name for this UDF. + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @param varargFunc The function to convert to a UDF. Can be a lambda. + */ +@JvmName("udfVarargInt") +inline fun udf( + name: String, + nondeterministic: Boolean = false, + varargFunc: UDF1, +): NamedUserDefinedFunctionVararg = + udf(nondeterministic, varargFunc).withName(name) + +/** + * Defines a vararg UDF ([UserDefinedFunctionVararg]) instance based on the (lambda) function [varargFunc]. + * For example: `val myUdf = udf { t1: IntArray -> ... }` + * + * If you want to process a column containing an IntArray instead, use WrappedArray. + * + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @param varargFunc The function to convert to a UDF. Can be a lambda. + */ +@JvmName("udfVarargInt") +inline fun udf( + nondeterministic: Boolean = false, + varargFunc: UDF1, +): UserDefinedFunctionVararg { + + + return withAllowUntypedScalaUDF { + UserDefinedFunctionVararg( + udf = functions.udf(VarargUnwrapper(varargFunc) { i, init -> IntArray(i, init::call) }, schema(typeOf()).unWrap()) + .let { if (nondeterministic) it.asNondeterministic() else it } + .let { if (typeOf().isMarkedNullable) it else it.asNonNullable() }, + encoder = encoder(), + ) + } +} +/** + * Defines and registers a named vararg UDF ([NamedUserDefinedFunctionVararg]) instance based on the (lambda) function [varargFunc]. + * For example: `val myUdf = udf.register("myUdf") { t1: IntArray -> ... }` + * + * If you want to process a column containing an IntArray instead, use WrappedArray. + * + * @param name The name for this UDF. + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @param varargFunc The function to convert to a UDF. Can be a lambda. + */ +@JvmName("registerVarargInt") +inline fun UDFRegistration.register( + name: String, + nondeterministic: Boolean = false, + varargFunc: UDF1, +): NamedUserDefinedFunctionVararg = + register(udf(name, nondeterministic, varargFunc)) +/** + * Creates a vararg UDF ([NamedUserDefinedFunctionVararg]) from a function reference adapting its name by reflection. + * For example: `val myUdf = udf(::myFunction)` + * + * If you want to process a column containing an IntArray instead, use WrappedArray. + * + * @param varargFunc function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +@JvmName("udfVarargInt") +inline fun udf( + varargFunc: KProperty0<(IntArray) -> R>, + nondeterministic: Boolean = false, +): NamedUserDefinedFunctionVararg = udf(varargFunc.name, varargFunc, nondeterministic) + +/** + * Creates a vararg UDF ([NamedUserDefinedFunctionVararg]) from a function reference. + * For example: `val myUdf = udf("myFunction", ::myFunction)` + * + * If you want to process a column containing an IntArray instead, use WrappedArray. + * + * @param name Optional. Name for the UDF. + * @param varargFunc function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +@JvmName("udfVarargInt") +inline fun udf( + name: String, + varargFunc: KProperty0<(IntArray) -> R>, + nondeterministic: Boolean = false, +): NamedUserDefinedFunctionVararg = udf(name, nondeterministic, varargFunc.get()) + +/** + * Creates and registers a vararg UDF ([NamedUserDefinedFunctionVararg]) from a function reference adapting its name by reflection. + * For example: `val myUdf = udf.register(::myFunction)` + * + * If you want to process a column containing an IntArray instead, use WrappedArray. + * + * @param varargFunc function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +@JvmName("registerVarargInt") +inline fun UDFRegistration.register( + varargFunc: KProperty0<(IntArray) -> R>, + nondeterministic: Boolean = false, +): NamedUserDefinedFunctionVararg = register(udf(varargFunc, nondeterministic)) + +/** + * Creates and registers a vararg UDF ([NamedUserDefinedFunctionVararg]) from a function reference. + * For example: `val myUdf = udf.register("myFunction", ::myFunction)` + * + * If you want to process a column containing an IntArray instead, use WrappedArray. + * + * @param name Optional. Name for the UDF. + * @param varargFunc function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +@JvmName("registerVarargInt") +inline fun UDFRegistration.register( + name: String, + varargFunc: KProperty0<(IntArray) -> R>, + nondeterministic: Boolean = false, +): NamedUserDefinedFunctionVararg = register(udf(name, varargFunc, nondeterministic)) + +/** + * Creates a vararg UDF ([NamedUserDefinedFunctionVararg]) from a function reference adapting its name by reflection. + * For example: `val myUdf = udf(::myFunction)` + * + * If you want to process a column containing an IntArray instead, use WrappedArray. + * + * @param varargFunc function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +@JvmName("udfVarargInt") +inline fun udf( + varargFunc: KFunction1, + nondeterministic: Boolean = false, +): NamedUserDefinedFunctionVararg = udf(varargFunc.name, varargFunc, nondeterministic) + +/** + * Creates a vararg UDF ([NamedUserDefinedFunctionVararg]) from a function reference. + * For example: `val myUdf = udf("myFunction", ::myFunction)` + * + * If you want to process a column containing an IntArray instead, use WrappedArray. + * + * @param name Optional. Name for the UDF. + * @param varargFunc function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +@JvmName("udfVarargInt") +inline fun udf( + name: String, + varargFunc: KFunction1, + nondeterministic: Boolean = false, +): NamedUserDefinedFunctionVararg = udf(name, nondeterministic, varargFunc) + +/** + * Creates and registers a vararg UDF ([NamedUserDefinedFunctionVararg]) from a function reference adapting its name by reflection. + * For example: `val myUdf = udf.register(::myFunction)` + * + * If you want to process a column containing an IntArray instead, use WrappedArray. + * + * @param varargFunc function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +@JvmName("registerVarargInt") +inline fun UDFRegistration.register( + varargFunc: KFunction1, + nondeterministic: Boolean = false, +): NamedUserDefinedFunctionVararg = register(udf(varargFunc, nondeterministic)) + +/** + * Creates and registers a vararg UDF ([NamedUserDefinedFunctionVararg]) from a function reference. + * For example: `val myUdf = udf.register("myFunction", ::myFunction)` + * + * If you want to process a column containing an IntArray instead, use WrappedArray. + * + * @param name Optional. Name for the UDF. + * @param varargFunc function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +@JvmName("registerVarargInt") +inline fun UDFRegistration.register( + name: String, + varargFunc: KFunction1, + nondeterministic: Boolean = false, +): NamedUserDefinedFunctionVararg = register(udf(name, varargFunc, nondeterministic)) + + +/** + * Defines a named vararg UDF ([NamedUserDefinedFunctionVararg]) instance based on the (lambda) function [varargFunc]. + * For example: `val myUdf = udf("myUdf") { t1: LongArray -> ... }` + * Name can also be supplied using delegate: `val myUdf by udf { t1: LongArray -> ... }` + * @see UserDefinedFunction.getValue + * + * If you want to process a column containing an LongArray instead, use WrappedArray. + * + * @param name The name for this UDF. + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @param varargFunc The function to convert to a UDF. Can be a lambda. + */ +@JvmName("udfVarargLong") +inline fun udf( + name: String, + nondeterministic: Boolean = false, + varargFunc: UDF1, +): NamedUserDefinedFunctionVararg = + udf(nondeterministic, varargFunc).withName(name) + +/** + * Defines a vararg UDF ([UserDefinedFunctionVararg]) instance based on the (lambda) function [varargFunc]. + * For example: `val myUdf = udf { t1: LongArray -> ... }` + * + * If you want to process a column containing an LongArray instead, use WrappedArray. + * + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @param varargFunc The function to convert to a UDF. Can be a lambda. + */ +@JvmName("udfVarargLong") +inline fun udf( + nondeterministic: Boolean = false, + varargFunc: UDF1, +): UserDefinedFunctionVararg { + + + return withAllowUntypedScalaUDF { + UserDefinedFunctionVararg( + udf = functions.udf(VarargUnwrapper(varargFunc) { i, init -> LongArray(i, init::call) }, schema(typeOf()).unWrap()) + .let { if (nondeterministic) it.asNondeterministic() else it } + .let { if (typeOf().isMarkedNullable) it else it.asNonNullable() }, + encoder = encoder(), + ) + } +} +/** + * Defines and registers a named vararg UDF ([NamedUserDefinedFunctionVararg]) instance based on the (lambda) function [varargFunc]. + * For example: `val myUdf = udf.register("myUdf") { t1: LongArray -> ... }` + * + * If you want to process a column containing an LongArray instead, use WrappedArray. + * + * @param name The name for this UDF. + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @param varargFunc The function to convert to a UDF. Can be a lambda. + */ +@JvmName("registerVarargLong") +inline fun UDFRegistration.register( + name: String, + nondeterministic: Boolean = false, + varargFunc: UDF1, +): NamedUserDefinedFunctionVararg = + register(udf(name, nondeterministic, varargFunc)) +/** + * Creates a vararg UDF ([NamedUserDefinedFunctionVararg]) from a function reference adapting its name by reflection. + * For example: `val myUdf = udf(::myFunction)` + * + * If you want to process a column containing an LongArray instead, use WrappedArray. + * + * @param varargFunc function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +@JvmName("udfVarargLong") +inline fun udf( + varargFunc: KProperty0<(LongArray) -> R>, + nondeterministic: Boolean = false, +): NamedUserDefinedFunctionVararg = udf(varargFunc.name, varargFunc, nondeterministic) + +/** + * Creates a vararg UDF ([NamedUserDefinedFunctionVararg]) from a function reference. + * For example: `val myUdf = udf("myFunction", ::myFunction)` + * + * If you want to process a column containing an LongArray instead, use WrappedArray. + * + * @param name Optional. Name for the UDF. + * @param varargFunc function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +@JvmName("udfVarargLong") +inline fun udf( + name: String, + varargFunc: KProperty0<(LongArray) -> R>, + nondeterministic: Boolean = false, +): NamedUserDefinedFunctionVararg = udf(name, nondeterministic, varargFunc.get()) + +/** + * Creates and registers a vararg UDF ([NamedUserDefinedFunctionVararg]) from a function reference adapting its name by reflection. + * For example: `val myUdf = udf.register(::myFunction)` + * + * If you want to process a column containing an LongArray instead, use WrappedArray. + * + * @param varargFunc function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +@JvmName("registerVarargLong") +inline fun UDFRegistration.register( + varargFunc: KProperty0<(LongArray) -> R>, + nondeterministic: Boolean = false, +): NamedUserDefinedFunctionVararg = register(udf(varargFunc, nondeterministic)) + +/** + * Creates and registers a vararg UDF ([NamedUserDefinedFunctionVararg]) from a function reference. + * For example: `val myUdf = udf.register("myFunction", ::myFunction)` + * + * If you want to process a column containing an LongArray instead, use WrappedArray. + * + * @param name Optional. Name for the UDF. + * @param varargFunc function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +@JvmName("registerVarargLong") +inline fun UDFRegistration.register( + name: String, + varargFunc: KProperty0<(LongArray) -> R>, + nondeterministic: Boolean = false, +): NamedUserDefinedFunctionVararg = register(udf(name, varargFunc, nondeterministic)) + +/** + * Creates a vararg UDF ([NamedUserDefinedFunctionVararg]) from a function reference adapting its name by reflection. + * For example: `val myUdf = udf(::myFunction)` + * + * If you want to process a column containing an LongArray instead, use WrappedArray. + * + * @param varargFunc function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +@JvmName("udfVarargLong") +inline fun udf( + varargFunc: KFunction1, + nondeterministic: Boolean = false, +): NamedUserDefinedFunctionVararg = udf(varargFunc.name, varargFunc, nondeterministic) + +/** + * Creates a vararg UDF ([NamedUserDefinedFunctionVararg]) from a function reference. + * For example: `val myUdf = udf("myFunction", ::myFunction)` + * + * If you want to process a column containing an LongArray instead, use WrappedArray. + * + * @param name Optional. Name for the UDF. + * @param varargFunc function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +@JvmName("udfVarargLong") +inline fun udf( + name: String, + varargFunc: KFunction1, + nondeterministic: Boolean = false, +): NamedUserDefinedFunctionVararg = udf(name, nondeterministic, varargFunc) + +/** + * Creates and registers a vararg UDF ([NamedUserDefinedFunctionVararg]) from a function reference adapting its name by reflection. + * For example: `val myUdf = udf.register(::myFunction)` + * + * If you want to process a column containing an LongArray instead, use WrappedArray. + * + * @param varargFunc function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +@JvmName("registerVarargLong") +inline fun UDFRegistration.register( + varargFunc: KFunction1, + nondeterministic: Boolean = false, +): NamedUserDefinedFunctionVararg = register(udf(varargFunc, nondeterministic)) + +/** + * Creates and registers a vararg UDF ([NamedUserDefinedFunctionVararg]) from a function reference. + * For example: `val myUdf = udf.register("myFunction", ::myFunction)` + * + * If you want to process a column containing an LongArray instead, use WrappedArray. + * + * @param name Optional. Name for the UDF. + * @param varargFunc function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +@JvmName("registerVarargLong") +inline fun UDFRegistration.register( + name: String, + varargFunc: KFunction1, + nondeterministic: Boolean = false, +): NamedUserDefinedFunctionVararg = register(udf(name, varargFunc, nondeterministic)) + + +/** + * Defines a named vararg UDF ([NamedUserDefinedFunctionVararg]) instance based on the (lambda) function [varargFunc]. + * For example: `val myUdf = udf("myUdf") { t1: FloatArray -> ... }` + * Name can also be supplied using delegate: `val myUdf by udf { t1: FloatArray -> ... }` + * @see UserDefinedFunction.getValue + * + * If you want to process a column containing an FloatArray instead, use WrappedArray. + * + * @param name The name for this UDF. + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @param varargFunc The function to convert to a UDF. Can be a lambda. + */ +@JvmName("udfVarargFloat") +inline fun udf( + name: String, + nondeterministic: Boolean = false, + varargFunc: UDF1, +): NamedUserDefinedFunctionVararg = + udf(nondeterministic, varargFunc).withName(name) + +/** + * Defines a vararg UDF ([UserDefinedFunctionVararg]) instance based on the (lambda) function [varargFunc]. + * For example: `val myUdf = udf { t1: FloatArray -> ... }` + * + * If you want to process a column containing an FloatArray instead, use WrappedArray. + * + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @param varargFunc The function to convert to a UDF. Can be a lambda. + */ +@JvmName("udfVarargFloat") +inline fun udf( + nondeterministic: Boolean = false, + varargFunc: UDF1, +): UserDefinedFunctionVararg { + + + return withAllowUntypedScalaUDF { + UserDefinedFunctionVararg( + udf = functions.udf(VarargUnwrapper(varargFunc) { i, init -> FloatArray(i, init::call) }, schema(typeOf()).unWrap()) + .let { if (nondeterministic) it.asNondeterministic() else it } + .let { if (typeOf().isMarkedNullable) it else it.asNonNullable() }, + encoder = encoder(), + ) + } +} +/** + * Defines and registers a named vararg UDF ([NamedUserDefinedFunctionVararg]) instance based on the (lambda) function [varargFunc]. + * For example: `val myUdf = udf.register("myUdf") { t1: FloatArray -> ... }` + * + * If you want to process a column containing an FloatArray instead, use WrappedArray. + * + * @param name The name for this UDF. + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @param varargFunc The function to convert to a UDF. Can be a lambda. + */ +@JvmName("registerVarargFloat") +inline fun UDFRegistration.register( + name: String, + nondeterministic: Boolean = false, + varargFunc: UDF1, +): NamedUserDefinedFunctionVararg = + register(udf(name, nondeterministic, varargFunc)) +/** + * Creates a vararg UDF ([NamedUserDefinedFunctionVararg]) from a function reference adapting its name by reflection. + * For example: `val myUdf = udf(::myFunction)` + * + * If you want to process a column containing an FloatArray instead, use WrappedArray. + * + * @param varargFunc function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +@JvmName("udfVarargFloat") +inline fun udf( + varargFunc: KProperty0<(FloatArray) -> R>, + nondeterministic: Boolean = false, +): NamedUserDefinedFunctionVararg = udf(varargFunc.name, varargFunc, nondeterministic) + +/** + * Creates a vararg UDF ([NamedUserDefinedFunctionVararg]) from a function reference. + * For example: `val myUdf = udf("myFunction", ::myFunction)` + * + * If you want to process a column containing an FloatArray instead, use WrappedArray. + * + * @param name Optional. Name for the UDF. + * @param varargFunc function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +@JvmName("udfVarargFloat") +inline fun udf( + name: String, + varargFunc: KProperty0<(FloatArray) -> R>, + nondeterministic: Boolean = false, +): NamedUserDefinedFunctionVararg = udf(name, nondeterministic, varargFunc.get()) + +/** + * Creates and registers a vararg UDF ([NamedUserDefinedFunctionVararg]) from a function reference adapting its name by reflection. + * For example: `val myUdf = udf.register(::myFunction)` + * + * If you want to process a column containing an FloatArray instead, use WrappedArray. + * + * @param varargFunc function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +@JvmName("registerVarargFloat") +inline fun UDFRegistration.register( + varargFunc: KProperty0<(FloatArray) -> R>, + nondeterministic: Boolean = false, +): NamedUserDefinedFunctionVararg = register(udf(varargFunc, nondeterministic)) + +/** + * Creates and registers a vararg UDF ([NamedUserDefinedFunctionVararg]) from a function reference. + * For example: `val myUdf = udf.register("myFunction", ::myFunction)` + * + * If you want to process a column containing an FloatArray instead, use WrappedArray. + * + * @param name Optional. Name for the UDF. + * @param varargFunc function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +@JvmName("registerVarargFloat") +inline fun UDFRegistration.register( + name: String, + varargFunc: KProperty0<(FloatArray) -> R>, + nondeterministic: Boolean = false, +): NamedUserDefinedFunctionVararg = register(udf(name, varargFunc, nondeterministic)) + +/** + * Creates a vararg UDF ([NamedUserDefinedFunctionVararg]) from a function reference adapting its name by reflection. + * For example: `val myUdf = udf(::myFunction)` + * + * If you want to process a column containing an FloatArray instead, use WrappedArray. + * + * @param varargFunc function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +@JvmName("udfVarargFloat") +inline fun udf( + varargFunc: KFunction1, + nondeterministic: Boolean = false, +): NamedUserDefinedFunctionVararg = udf(varargFunc.name, varargFunc, nondeterministic) + +/** + * Creates a vararg UDF ([NamedUserDefinedFunctionVararg]) from a function reference. + * For example: `val myUdf = udf("myFunction", ::myFunction)` + * + * If you want to process a column containing an FloatArray instead, use WrappedArray. + * + * @param name Optional. Name for the UDF. + * @param varargFunc function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +@JvmName("udfVarargFloat") +inline fun udf( + name: String, + varargFunc: KFunction1, + nondeterministic: Boolean = false, +): NamedUserDefinedFunctionVararg = udf(name, nondeterministic, varargFunc) + +/** + * Creates and registers a vararg UDF ([NamedUserDefinedFunctionVararg]) from a function reference adapting its name by reflection. + * For example: `val myUdf = udf.register(::myFunction)` + * + * If you want to process a column containing an FloatArray instead, use WrappedArray. + * + * @param varargFunc function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +@JvmName("registerVarargFloat") +inline fun UDFRegistration.register( + varargFunc: KFunction1, + nondeterministic: Boolean = false, +): NamedUserDefinedFunctionVararg = register(udf(varargFunc, nondeterministic)) + +/** + * Creates and registers a vararg UDF ([NamedUserDefinedFunctionVararg]) from a function reference. + * For example: `val myUdf = udf.register("myFunction", ::myFunction)` + * + * If you want to process a column containing an FloatArray instead, use WrappedArray. + * + * @param name Optional. Name for the UDF. + * @param varargFunc function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +@JvmName("registerVarargFloat") +inline fun UDFRegistration.register( + name: String, + varargFunc: KFunction1, + nondeterministic: Boolean = false, +): NamedUserDefinedFunctionVararg = register(udf(name, varargFunc, nondeterministic)) + + +/** + * Defines a named vararg UDF ([NamedUserDefinedFunctionVararg]) instance based on the (lambda) function [varargFunc]. + * For example: `val myUdf = udf("myUdf") { t1: DoubleArray -> ... }` + * Name can also be supplied using delegate: `val myUdf by udf { t1: DoubleArray -> ... }` + * @see UserDefinedFunction.getValue + * + * If you want to process a column containing an DoubleArray instead, use WrappedArray. + * + * @param name The name for this UDF. + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @param varargFunc The function to convert to a UDF. Can be a lambda. + */ +@JvmName("udfVarargDouble") +inline fun udf( + name: String, + nondeterministic: Boolean = false, + varargFunc: UDF1, +): NamedUserDefinedFunctionVararg = + udf(nondeterministic, varargFunc).withName(name) + +/** + * Defines a vararg UDF ([UserDefinedFunctionVararg]) instance based on the (lambda) function [varargFunc]. + * For example: `val myUdf = udf { t1: DoubleArray -> ... }` + * + * If you want to process a column containing an DoubleArray instead, use WrappedArray. + * + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @param varargFunc The function to convert to a UDF. Can be a lambda. + */ +@JvmName("udfVarargDouble") +inline fun udf( + nondeterministic: Boolean = false, + varargFunc: UDF1, +): UserDefinedFunctionVararg { + + + return withAllowUntypedScalaUDF { + UserDefinedFunctionVararg( + udf = functions.udf(VarargUnwrapper(varargFunc) { i, init -> DoubleArray(i, init::call) }, schema(typeOf()).unWrap()) + .let { if (nondeterministic) it.asNondeterministic() else it } + .let { if (typeOf().isMarkedNullable) it else it.asNonNullable() }, + encoder = encoder(), + ) + } +} +/** + * Defines and registers a named vararg UDF ([NamedUserDefinedFunctionVararg]) instance based on the (lambda) function [varargFunc]. + * For example: `val myUdf = udf.register("myUdf") { t1: DoubleArray -> ... }` + * + * If you want to process a column containing an DoubleArray instead, use WrappedArray. + * + * @param name The name for this UDF. + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @param varargFunc The function to convert to a UDF. Can be a lambda. + */ +@JvmName("registerVarargDouble") +inline fun UDFRegistration.register( + name: String, + nondeterministic: Boolean = false, + varargFunc: UDF1, +): NamedUserDefinedFunctionVararg = + register(udf(name, nondeterministic, varargFunc)) +/** + * Creates a vararg UDF ([NamedUserDefinedFunctionVararg]) from a function reference adapting its name by reflection. + * For example: `val myUdf = udf(::myFunction)` + * + * If you want to process a column containing an DoubleArray instead, use WrappedArray. + * + * @param varargFunc function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +@JvmName("udfVarargDouble") +inline fun udf( + varargFunc: KProperty0<(DoubleArray) -> R>, + nondeterministic: Boolean = false, +): NamedUserDefinedFunctionVararg = udf(varargFunc.name, varargFunc, nondeterministic) + +/** + * Creates a vararg UDF ([NamedUserDefinedFunctionVararg]) from a function reference. + * For example: `val myUdf = udf("myFunction", ::myFunction)` + * + * If you want to process a column containing an DoubleArray instead, use WrappedArray. + * + * @param name Optional. Name for the UDF. + * @param varargFunc function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +@JvmName("udfVarargDouble") +inline fun udf( + name: String, + varargFunc: KProperty0<(DoubleArray) -> R>, + nondeterministic: Boolean = false, +): NamedUserDefinedFunctionVararg = udf(name, nondeterministic, varargFunc.get()) + +/** + * Creates and registers a vararg UDF ([NamedUserDefinedFunctionVararg]) from a function reference adapting its name by reflection. + * For example: `val myUdf = udf.register(::myFunction)` + * + * If you want to process a column containing an DoubleArray instead, use WrappedArray. + * + * @param varargFunc function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +@JvmName("registerVarargDouble") +inline fun UDFRegistration.register( + varargFunc: KProperty0<(DoubleArray) -> R>, + nondeterministic: Boolean = false, +): NamedUserDefinedFunctionVararg = register(udf(varargFunc, nondeterministic)) + +/** + * Creates and registers a vararg UDF ([NamedUserDefinedFunctionVararg]) from a function reference. + * For example: `val myUdf = udf.register("myFunction", ::myFunction)` + * + * If you want to process a column containing an DoubleArray instead, use WrappedArray. + * + * @param name Optional. Name for the UDF. + * @param varargFunc function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +@JvmName("registerVarargDouble") +inline fun UDFRegistration.register( + name: String, + varargFunc: KProperty0<(DoubleArray) -> R>, + nondeterministic: Boolean = false, +): NamedUserDefinedFunctionVararg = register(udf(name, varargFunc, nondeterministic)) + +/** + * Creates a vararg UDF ([NamedUserDefinedFunctionVararg]) from a function reference adapting its name by reflection. + * For example: `val myUdf = udf(::myFunction)` + * + * If you want to process a column containing an DoubleArray instead, use WrappedArray. + * + * @param varargFunc function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +@JvmName("udfVarargDouble") +inline fun udf( + varargFunc: KFunction1, + nondeterministic: Boolean = false, +): NamedUserDefinedFunctionVararg = udf(varargFunc.name, varargFunc, nondeterministic) + +/** + * Creates a vararg UDF ([NamedUserDefinedFunctionVararg]) from a function reference. + * For example: `val myUdf = udf("myFunction", ::myFunction)` + * + * If you want to process a column containing an DoubleArray instead, use WrappedArray. + * + * @param name Optional. Name for the UDF. + * @param varargFunc function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +@JvmName("udfVarargDouble") +inline fun udf( + name: String, + varargFunc: KFunction1, + nondeterministic: Boolean = false, +): NamedUserDefinedFunctionVararg = udf(name, nondeterministic, varargFunc) + +/** + * Creates and registers a vararg UDF ([NamedUserDefinedFunctionVararg]) from a function reference adapting its name by reflection. + * For example: `val myUdf = udf.register(::myFunction)` + * + * If you want to process a column containing an DoubleArray instead, use WrappedArray. + * + * @param varargFunc function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +@JvmName("registerVarargDouble") +inline fun UDFRegistration.register( + varargFunc: KFunction1, + nondeterministic: Boolean = false, +): NamedUserDefinedFunctionVararg = register(udf(varargFunc, nondeterministic)) + +/** + * Creates and registers a vararg UDF ([NamedUserDefinedFunctionVararg]) from a function reference. + * For example: `val myUdf = udf.register("myFunction", ::myFunction)` + * + * If you want to process a column containing an DoubleArray instead, use WrappedArray. + * + * @param name Optional. Name for the UDF. + * @param varargFunc function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +@JvmName("registerVarargDouble") +inline fun UDFRegistration.register( + name: String, + varargFunc: KFunction1, + nondeterministic: Boolean = false, +): NamedUserDefinedFunctionVararg = register(udf(name, varargFunc, nondeterministic)) + + +/** + * Defines a named vararg UDF ([NamedUserDefinedFunctionVararg]) instance based on the (lambda) function [varargFunc]. + * For example: `val myUdf = udf("myUdf") { t1: BooleanArray -> ... }` + * Name can also be supplied using delegate: `val myUdf by udf { t1: BooleanArray -> ... }` + * @see UserDefinedFunction.getValue + * + * If you want to process a column containing an BooleanArray instead, use WrappedArray. + * + * @param name The name for this UDF. + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @param varargFunc The function to convert to a UDF. Can be a lambda. + */ +@JvmName("udfVarargBoolean") +inline fun udf( + name: String, + nondeterministic: Boolean = false, + varargFunc: UDF1, +): NamedUserDefinedFunctionVararg = + udf(nondeterministic, varargFunc).withName(name) + +/** + * Defines a vararg UDF ([UserDefinedFunctionVararg]) instance based on the (lambda) function [varargFunc]. + * For example: `val myUdf = udf { t1: BooleanArray -> ... }` + * + * If you want to process a column containing an BooleanArray instead, use WrappedArray. + * + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @param varargFunc The function to convert to a UDF. Can be a lambda. + */ +@JvmName("udfVarargBoolean") +inline fun udf( + nondeterministic: Boolean = false, + varargFunc: UDF1, +): UserDefinedFunctionVararg { + + + return withAllowUntypedScalaUDF { + UserDefinedFunctionVararg( + udf = functions.udf(VarargUnwrapper(varargFunc) { i, init -> BooleanArray(i, init::call) }, schema(typeOf()).unWrap()) + .let { if (nondeterministic) it.asNondeterministic() else it } + .let { if (typeOf().isMarkedNullable) it else it.asNonNullable() }, + encoder = encoder(), + ) + } +} +/** + * Defines and registers a named vararg UDF ([NamedUserDefinedFunctionVararg]) instance based on the (lambda) function [varargFunc]. + * For example: `val myUdf = udf.register("myUdf") { t1: BooleanArray -> ... }` + * + * If you want to process a column containing an BooleanArray instead, use WrappedArray. + * + * @param name The name for this UDF. + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @param varargFunc The function to convert to a UDF. Can be a lambda. + */ +@JvmName("registerVarargBoolean") +inline fun UDFRegistration.register( + name: String, + nondeterministic: Boolean = false, + varargFunc: UDF1, +): NamedUserDefinedFunctionVararg = + register(udf(name, nondeterministic, varargFunc)) +/** + * Creates a vararg UDF ([NamedUserDefinedFunctionVararg]) from a function reference adapting its name by reflection. + * For example: `val myUdf = udf(::myFunction)` + * + * If you want to process a column containing an BooleanArray instead, use WrappedArray. + * + * @param varargFunc function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +@JvmName("udfVarargBoolean") +inline fun udf( + varargFunc: KProperty0<(BooleanArray) -> R>, + nondeterministic: Boolean = false, +): NamedUserDefinedFunctionVararg = udf(varargFunc.name, varargFunc, nondeterministic) + +/** + * Creates a vararg UDF ([NamedUserDefinedFunctionVararg]) from a function reference. + * For example: `val myUdf = udf("myFunction", ::myFunction)` + * + * If you want to process a column containing an BooleanArray instead, use WrappedArray. + * + * @param name Optional. Name for the UDF. + * @param varargFunc function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +@JvmName("udfVarargBoolean") +inline fun udf( + name: String, + varargFunc: KProperty0<(BooleanArray) -> R>, + nondeterministic: Boolean = false, +): NamedUserDefinedFunctionVararg = udf(name, nondeterministic, varargFunc.get()) + +/** + * Creates and registers a vararg UDF ([NamedUserDefinedFunctionVararg]) from a function reference adapting its name by reflection. + * For example: `val myUdf = udf.register(::myFunction)` + * + * If you want to process a column containing an BooleanArray instead, use WrappedArray. + * + * @param varargFunc function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +@JvmName("registerVarargBoolean") +inline fun UDFRegistration.register( + varargFunc: KProperty0<(BooleanArray) -> R>, + nondeterministic: Boolean = false, +): NamedUserDefinedFunctionVararg = register(udf(varargFunc, nondeterministic)) + +/** + * Creates and registers a vararg UDF ([NamedUserDefinedFunctionVararg]) from a function reference. + * For example: `val myUdf = udf.register("myFunction", ::myFunction)` + * + * If you want to process a column containing an BooleanArray instead, use WrappedArray. + * + * @param name Optional. Name for the UDF. + * @param varargFunc function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +@JvmName("registerVarargBoolean") +inline fun UDFRegistration.register( + name: String, + varargFunc: KProperty0<(BooleanArray) -> R>, + nondeterministic: Boolean = false, +): NamedUserDefinedFunctionVararg = register(udf(name, varargFunc, nondeterministic)) + +/** + * Creates a vararg UDF ([NamedUserDefinedFunctionVararg]) from a function reference adapting its name by reflection. + * For example: `val myUdf = udf(::myFunction)` + * + * If you want to process a column containing an BooleanArray instead, use WrappedArray. + * + * @param varargFunc function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +@JvmName("udfVarargBoolean") +inline fun udf( + varargFunc: KFunction1, + nondeterministic: Boolean = false, +): NamedUserDefinedFunctionVararg = udf(varargFunc.name, varargFunc, nondeterministic) + +/** + * Creates a vararg UDF ([NamedUserDefinedFunctionVararg]) from a function reference. + * For example: `val myUdf = udf("myFunction", ::myFunction)` + * + * If you want to process a column containing an BooleanArray instead, use WrappedArray. + * + * @param name Optional. Name for the UDF. + * @param varargFunc function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +@JvmName("udfVarargBoolean") +inline fun udf( + name: String, + varargFunc: KFunction1, + nondeterministic: Boolean = false, +): NamedUserDefinedFunctionVararg = udf(name, nondeterministic, varargFunc) + +/** + * Creates and registers a vararg UDF ([NamedUserDefinedFunctionVararg]) from a function reference adapting its name by reflection. + * For example: `val myUdf = udf.register(::myFunction)` + * + * If you want to process a column containing an BooleanArray instead, use WrappedArray. + * + * @param varargFunc function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +@JvmName("registerVarargBoolean") +inline fun UDFRegistration.register( + varargFunc: KFunction1, + nondeterministic: Boolean = false, +): NamedUserDefinedFunctionVararg = register(udf(varargFunc, nondeterministic)) + +/** + * Creates and registers a vararg UDF ([NamedUserDefinedFunctionVararg]) from a function reference. + * For example: `val myUdf = udf.register("myFunction", ::myFunction)` + * + * If you want to process a column containing an BooleanArray instead, use WrappedArray. + * + * @param name Optional. Name for the UDF. + * @param varargFunc function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +@JvmName("registerVarargBoolean") +inline fun UDFRegistration.register( + name: String, + varargFunc: KFunction1, + nondeterministic: Boolean = false, +): NamedUserDefinedFunctionVararg = register(udf(name, varargFunc, nondeterministic)) + + +/** + * Defines a named vararg UDF ([NamedUserDefinedFunctionVararg]) instance based on the (lambda) function [varargFunc]. + * For example: `val myUdf = udf("myUdf") { t1: Array -> ... }` + * Name can also be supplied using delegate: `val myUdf by udf { t1: Array -> ... }` + * @see UserDefinedFunction.getValue + * + * If you want to process a column containing an Array instead, use WrappedArray. + * + * @param name The name for this UDF. + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @param varargFunc The function to convert to a UDF. Can be a lambda. + */ +@JvmName("udfVarargT") +inline fun udf( + name: String, + nondeterministic: Boolean = false, + varargFunc: UDF1, R>, +): NamedUserDefinedFunctionVararg = + udf(nondeterministic, varargFunc).withName(name) + +/** + * Defines a vararg UDF ([UserDefinedFunctionVararg]) instance based on the (lambda) function [varargFunc]. + * For example: `val myUdf = udf { t1: Array -> ... }` + * + * If you want to process a column containing an Array instead, use WrappedArray. + * + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @param varargFunc The function to convert to a UDF. Can be a lambda. + */ +@JvmName("udfVarargT") +inline fun udf( + nondeterministic: Boolean = false, + varargFunc: UDF1, R>, +): UserDefinedFunctionVararg { + T::class.checkForValidType("T") + + return withAllowUntypedScalaUDF { + UserDefinedFunctionVararg( + udf = functions.udf(VarargUnwrapper(varargFunc) { i, init -> Array(i, init::call) }, schema(typeOf()).unWrap()) + .let { if (nondeterministic) it.asNondeterministic() else it } + .let { if (typeOf().isMarkedNullable) it else it.asNonNullable() }, + encoder = encoder(), + ) + } +} +/** + * Defines and registers a named vararg UDF ([NamedUserDefinedFunctionVararg]) instance based on the (lambda) function [varargFunc]. + * For example: `val myUdf = udf.register("myUdf") { t1: Array -> ... }` + * + * If you want to process a column containing an Array instead, use WrappedArray. + * + * @param name The name for this UDF. + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @param varargFunc The function to convert to a UDF. Can be a lambda. + */ +@JvmName("registerVarargT") +inline fun UDFRegistration.register( + name: String, + nondeterministic: Boolean = false, + varargFunc: UDF1, R>, +): NamedUserDefinedFunctionVararg = + register(udf(name, nondeterministic, varargFunc)) +/** + * Creates a vararg UDF ([NamedUserDefinedFunctionVararg]) from a function reference adapting its name by reflection. + * For example: `val myUdf = udf(::myFunction)` + * + * If you want to process a column containing an Array instead, use WrappedArray. + * + * @param varargFunc function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +@JvmName("udfVarargT") +inline fun udf( + varargFunc: KProperty0<(Array) -> R>, + nondeterministic: Boolean = false, +): NamedUserDefinedFunctionVararg = udf(varargFunc.name, varargFunc, nondeterministic) + +/** + * Creates a vararg UDF ([NamedUserDefinedFunctionVararg]) from a function reference. + * For example: `val myUdf = udf("myFunction", ::myFunction)` + * + * If you want to process a column containing an Array instead, use WrappedArray. + * + * @param name Optional. Name for the UDF. + * @param varargFunc function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +@JvmName("udfVarargT") +inline fun udf( + name: String, + varargFunc: KProperty0<(Array) -> R>, + nondeterministic: Boolean = false, +): NamedUserDefinedFunctionVararg = udf(name, nondeterministic, varargFunc.get()) + +/** + * Creates and registers a vararg UDF ([NamedUserDefinedFunctionVararg]) from a function reference adapting its name by reflection. + * For example: `val myUdf = udf.register(::myFunction)` + * + * If you want to process a column containing an Array instead, use WrappedArray. + * + * @param varargFunc function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +@JvmName("registerVarargT") +inline fun UDFRegistration.register( + varargFunc: KProperty0<(Array) -> R>, + nondeterministic: Boolean = false, +): NamedUserDefinedFunctionVararg = register(udf(varargFunc, nondeterministic)) + +/** + * Creates and registers a vararg UDF ([NamedUserDefinedFunctionVararg]) from a function reference. + * For example: `val myUdf = udf.register("myFunction", ::myFunction)` + * + * If you want to process a column containing an Array instead, use WrappedArray. + * + * @param name Optional. Name for the UDF. + * @param varargFunc function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +@JvmName("registerVarargT") +inline fun UDFRegistration.register( + name: String, + varargFunc: KProperty0<(Array) -> R>, + nondeterministic: Boolean = false, +): NamedUserDefinedFunctionVararg = register(udf(name, varargFunc, nondeterministic)) + +/** + * Creates a vararg UDF ([NamedUserDefinedFunctionVararg]) from a function reference adapting its name by reflection. + * For example: `val myUdf = udf(::myFunction)` + * + * If you want to process a column containing an Array instead, use WrappedArray. + * + * @param varargFunc function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +@JvmName("udfVarargT") +inline fun udf( + varargFunc: KFunction1, R>, + nondeterministic: Boolean = false, +): NamedUserDefinedFunctionVararg = udf(varargFunc.name, varargFunc, nondeterministic) + +/** + * Creates a vararg UDF ([NamedUserDefinedFunctionVararg]) from a function reference. + * For example: `val myUdf = udf("myFunction", ::myFunction)` + * + * If you want to process a column containing an Array instead, use WrappedArray. + * + * @param name Optional. Name for the UDF. + * @param varargFunc function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +@JvmName("udfVarargT") +inline fun udf( + name: String, + varargFunc: KFunction1, R>, + nondeterministic: Boolean = false, +): NamedUserDefinedFunctionVararg = udf(name, nondeterministic, varargFunc) + +/** + * Creates and registers a vararg UDF ([NamedUserDefinedFunctionVararg]) from a function reference adapting its name by reflection. + * For example: `val myUdf = udf.register(::myFunction)` + * + * If you want to process a column containing an Array instead, use WrappedArray. + * + * @param varargFunc function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +@JvmName("registerVarargT") +inline fun UDFRegistration.register( + varargFunc: KFunction1, R>, + nondeterministic: Boolean = false, +): NamedUserDefinedFunctionVararg = register(udf(varargFunc, nondeterministic)) + +/** + * Creates and registers a vararg UDF ([NamedUserDefinedFunctionVararg]) from a function reference. + * For example: `val myUdf = udf.register("myFunction", ::myFunction)` + * + * If you want to process a column containing an Array instead, use WrappedArray. + * + * @param name Optional. Name for the UDF. + * @param varargFunc function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +@JvmName("registerVarargT") +inline fun UDFRegistration.register( + name: String, + varargFunc: KFunction1, R>, + nondeterministic: Boolean = false, +): NamedUserDefinedFunctionVararg = register(udf(name, varargFunc, nondeterministic)) + diff --git a/kotlin-spark-api/3.2/src/main/kotlin/org/jetbrains/kotlinx/spark/api/UserDefinedFunctions.kt b/kotlin-spark-api/3.2/src/main/kotlin/org/jetbrains/kotlinx/spark/api/UserDefinedFunctions.kt new file mode 100644 index 00000000..9b006e35 --- /dev/null +++ b/kotlin-spark-api/3.2/src/main/kotlin/org/jetbrains/kotlinx/spark/api/UserDefinedFunctions.kt @@ -0,0 +1,5436 @@ +/*- + * =LICENSE= + * Kotlin Spark API: API for Spark 3.2+ (Scala 2.12) + * ---------- + * Copyright (C) 2019 - 2022 JetBrains + * ---------- + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * =LICENSEEND= + */ +package org.jetbrains.kotlinx.spark.api + +import org.apache.spark.sql.* +import org.apache.spark.sql.api.java.* +import kotlin.reflect.* +import org.apache.spark.sql.expressions.UserDefinedFunction as SparkUserDefinedFunction + + +private fun UserDefinedFunction<*, *>.invokeUntyped(vararg params: Column): Column = udf.apply(*params) + +/** + * Instance of a UDF with 0 arguments. + * This UDF can be invoked with (typed) columns in a [Dataset.select] or [selectTyped] call. + * Alternatively it can be registered for SQL calls using [register]. + * + * @see org.apache.spark.sql.expressions.UserDefinedFunction + * @see NamedUserDefinedFunction0 + * @see udf + */ +open class UserDefinedFunction0( + override val udf: SparkUserDefinedFunction, + override val encoder: Encoder, +): UserDefinedFunction> { + + /** + * Allows this UDF to be called in typed manner using columns in a [Dataset.selectTyped] call. + * @see typedCol to create typed columns. + * @see org.apache.spark.sql.expressions.UserDefinedFunction.apply + */ + operator fun invoke(): TypedColumn = invokeUntyped(*arrayOf()).`as`(encoder) as TypedColumn + + + + /** + * Returns an expression that invokes the UDF in untyped manner, using the given arguments. + * @see org.apache.spark.sql.expressions.UserDefinedFunction.apply + */ + fun invokeUntyped(): Column = invokeUntyped(*arrayOf()) + + /** Returns named variant of this UDF. */ + override fun withName(name: String): NamedUserDefinedFunction0 = NamedUserDefinedFunction0( + name = name, + udf = udf, + encoder = encoder, + ) + + /** + * Returns named variant of this UDF. + * @see withName + */ + override fun getValue(thisRef: Any?, property: KProperty<*>): NamedUserDefinedFunction0 = + withName(property.name) +} + +/** + * Instance of a UDF with 0 arguments with name. + * This UDF can be invoked with (typed) columns in a [Dataset.select] or [selectTyped] call. + * Alternatively it can be registered for SQL calls using [register]. + * + * @see org.apache.spark.sql.expressions.UserDefinedFunction + * @see UserDefinedFunction0 + * @see udf + */ +class NamedUserDefinedFunction0( + override val name: String, + udf: SparkUserDefinedFunction, + encoder: Encoder, +): NamedUserDefinedFunction>, + UserDefinedFunction0(udf = udf.withName(name), encoder = encoder) + +/** + * Creates a UDF ([NamedUserDefinedFunction0]) from a function reference adapting its name by reflection. + * For example: `val myUdf = udf(::myFunction)` + * @param func function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +inline fun udf( + func: KProperty0<() -> R>, + nondeterministic: Boolean = false, +): NamedUserDefinedFunction0 = udf(func.name, func, nondeterministic) + +/** + * Creates a UDF ([NamedUserDefinedFunction0]) from a function reference. + * For example: `val myUdf = udf("myFunction", ::myFunction)` + * @param name Optional. Name for the UDF. + * @param func function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +inline fun udf( + name: String, + func: KProperty0<() -> R>, + nondeterministic: Boolean = false, +): NamedUserDefinedFunction0 = udf(name, nondeterministic, func.get()) + +/** + * Creates and registers a UDF ([NamedUserDefinedFunction0]) from a function reference adapting its name by reflection. + * For example: `val myUdf = udf.register(::myFunction)` + * @param func function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +inline fun UDFRegistration.register( + func: KProperty0<() -> R>, + nondeterministic: Boolean = false, +): NamedUserDefinedFunction0 = register(udf(func, nondeterministic)) + +/** + * Creates and registers a UDF ([NamedUserDefinedFunction0]) from a function reference. + * For example: `val myUdf = udf.register("myFunction", ::myFunction)` + * @param name Optional. Name for the UDF. + * @param func function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +inline fun UDFRegistration.register( + name: String, + func: KProperty0<() -> R>, + nondeterministic: Boolean = false, +): NamedUserDefinedFunction0 = register(udf(name, func, nondeterministic)) + +/** + * Creates a UDF ([NamedUserDefinedFunction0]) from a function reference adapting its name by reflection. + * For example: `val myUdf = udf(::myFunction)` + * @param func function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +inline fun udf( + func: KFunction0, + nondeterministic: Boolean = false, +): NamedUserDefinedFunction0 = udf(func.name, func, nondeterministic) + +/** + * Creates a UDF ([NamedUserDefinedFunction0]) from a function reference. + * For example: `val myUdf = udf("myFunction", ::myFunction)` + * @param name Optional. Name for the UDF. + * @param func function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +inline fun udf( + name: String, + func: KFunction0, + nondeterministic: Boolean = false, +): NamedUserDefinedFunction0 = udf(name, nondeterministic, func) + +/** + * Creates and registers a UDF ([NamedUserDefinedFunction0]) from a function reference adapting its name by reflection. + * For example: `val myUdf = udf.register(::myFunction)` + * @param func function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +inline fun UDFRegistration.register( + func: KFunction0, + nondeterministic: Boolean = false, +): NamedUserDefinedFunction0 = register(udf(func, nondeterministic)) + +/** + * Creates and registers a UDF ([NamedUserDefinedFunction0]) from a function reference. + * For example: `val myUdf = udf.register("myFunction", ::myFunction)` + * @param name Optional. Name for the UDF. + * @param func function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +inline fun UDFRegistration.register( + name: String, + func: KFunction0, + nondeterministic: Boolean = false, +): NamedUserDefinedFunction0 = register(udf(name, func, nondeterministic)) + + +/** Kotlin wrapper around UDF interface to ensure nullability in types. */ +fun interface UDF0 : org.apache.spark.sql.api.java.UDF0 { override fun call(): R } + +/** + * Defines a named UDF ([NamedUserDefinedFunction0]) instance based on the (lambda) function [func]. + * For example: `val myUdf = udf("myUdf") { -> ... }` + * Name can also be supplied using delegate: `val myUdf by udf { -> ... }` + * @see UserDefinedFunction.getValue + * + * @param name The name for this UDF. + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @param func The function to convert to a UDF. Can be a lambda. + */ +inline fun udf( + name: String, + nondeterministic: Boolean = false, + func: UDF0, +): NamedUserDefinedFunction0 = + udf(nondeterministic, func).withName(name) + +/** + * Defines a UDF ([UserDefinedFunction0]) instance based on the (lambda) function [func]. + * For example: `val myUdf = udf { -> ... }` + * + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @param func The function to convert to a UDF. Can be a lambda. + */ +inline fun udf( + nondeterministic: Boolean = false, + func: UDF0, +): UserDefinedFunction0 { + + + return UserDefinedFunction0( + udf = functions.udf(func, schema(typeOf()).unWrap()) + .let { if (nondeterministic) it.asNondeterministic() else it } + .let { if (typeOf().isMarkedNullable) it else it.asNonNullable() }, + encoder = encoder(), + ) +} + +/** + * Defines and registers a named UDF ([NamedUserDefinedFunction0]) instance based on the (lambda) function [func]. + * For example: `val myUdf = udf.register("myUdf") { -> ... }` + * + * @param name The name for this UDF. + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @param func The function to convert to a UDF. Can be a lambda. + */ +inline fun UDFRegistration.register( + name: String, + nondeterministic: Boolean = false, + func: UDF0, +): NamedUserDefinedFunction0 = + register(udf(name, nondeterministic, func)) + +/** + * Instance of a UDF with 1 argument. + * This UDF can be invoked with (typed) columns in a [Dataset.select] or [selectTyped] call. + * Alternatively it can be registered for SQL calls using [register]. + * + * @see org.apache.spark.sql.expressions.UserDefinedFunction + * @see NamedUserDefinedFunction1 + * @see udf + * @see udaf + */ +open class UserDefinedFunction1( + override val udf: SparkUserDefinedFunction, + override val encoder: Encoder, +): UserDefinedFunction> { + + /** + * Allows this UDF to be called in typed manner using columns in a [Dataset.selectTyped] call. + * @see typedCol to create typed columns. + * @see org.apache.spark.sql.expressions.UserDefinedFunction.apply + */ + operator fun invoke(param0: TypedColumn): TypedColumn = invokeUntyped(*arrayOf(param0)).`as`(encoder) as TypedColumn + + /** + * Returns an expression that invokes the UDF in untyped manner, using the given arguments. + * @see org.apache.spark.sql.expressions.UserDefinedFunction.apply + */ + operator fun invoke(param0: Column): Column = invokeUntyped(*arrayOf(param0)) + + /** + * Returns an expression that invokes the UDF in untyped manner, using the given arguments. + * @see org.apache.spark.sql.expressions.UserDefinedFunction.apply + */ + fun invokeUntyped(param0: Column): Column = invokeUntyped(*arrayOf(param0)) + + /** Returns named variant of this UDF. */ + override fun withName(name: String): NamedUserDefinedFunction1 = NamedUserDefinedFunction1( + name = name, + udf = udf, + encoder = encoder, + ) + + /** + * Returns named variant of this UDF. + * @see withName + */ + override fun getValue(thisRef: Any?, property: KProperty<*>): NamedUserDefinedFunction1 = + withName(property.name) +} + +/** + * Instance of a UDF with 1 argument with name. + * This UDF can be invoked with (typed) columns in a [Dataset.select] or [selectTyped] call. + * Alternatively it can be registered for SQL calls using [register]. + * + * @see org.apache.spark.sql.expressions.UserDefinedFunction + * @see UserDefinedFunction1 + * @see udf + * @see udaf + */ +class NamedUserDefinedFunction1( + override val name: String, + udf: SparkUserDefinedFunction, + encoder: Encoder, +): NamedUserDefinedFunction>, + UserDefinedFunction1(udf = udf.withName(name), encoder = encoder) + +/** + * Creates a UDF ([NamedUserDefinedFunction1]) from a function reference adapting its name by reflection. + * For example: `val myUdf = udf(::myFunction)` + * @param func function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +inline fun udf( + func: KProperty0<(T1) -> R>, + nondeterministic: Boolean = false, +): NamedUserDefinedFunction1 = udf(func.name, func, nondeterministic) + +/** + * Creates a UDF ([NamedUserDefinedFunction1]) from a function reference. + * For example: `val myUdf = udf("myFunction", ::myFunction)` + * @param name Optional. Name for the UDF. + * @param func function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +inline fun udf( + name: String, + func: KProperty0<(T1) -> R>, + nondeterministic: Boolean = false, +): NamedUserDefinedFunction1 = udf(name, nondeterministic, func.get()) + +/** + * Creates and registers a UDF ([NamedUserDefinedFunction1]) from a function reference adapting its name by reflection. + * For example: `val myUdf = udf.register(::myFunction)` + * @param func function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +inline fun UDFRegistration.register( + func: KProperty0<(T1) -> R>, + nondeterministic: Boolean = false, +): NamedUserDefinedFunction1 = register(udf(func, nondeterministic)) + +/** + * Creates and registers a UDF ([NamedUserDefinedFunction1]) from a function reference. + * For example: `val myUdf = udf.register("myFunction", ::myFunction)` + * @param name Optional. Name for the UDF. + * @param func function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +inline fun UDFRegistration.register( + name: String, + func: KProperty0<(T1) -> R>, + nondeterministic: Boolean = false, +): NamedUserDefinedFunction1 = register(udf(name, func, nondeterministic)) + +/** + * Creates a UDF ([NamedUserDefinedFunction1]) from a function reference adapting its name by reflection. + * For example: `val myUdf = udf(::myFunction)` + * @param func function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +inline fun udf( + func: KFunction1, + nondeterministic: Boolean = false, +): NamedUserDefinedFunction1 = udf(func.name, func, nondeterministic) + +/** + * Creates a UDF ([NamedUserDefinedFunction1]) from a function reference. + * For example: `val myUdf = udf("myFunction", ::myFunction)` + * @param name Optional. Name for the UDF. + * @param func function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +inline fun udf( + name: String, + func: KFunction1, + nondeterministic: Boolean = false, +): NamedUserDefinedFunction1 = udf(name, nondeterministic, func) + +/** + * Creates and registers a UDF ([NamedUserDefinedFunction1]) from a function reference adapting its name by reflection. + * For example: `val myUdf = udf.register(::myFunction)` + * @param func function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +inline fun UDFRegistration.register( + func: KFunction1, + nondeterministic: Boolean = false, +): NamedUserDefinedFunction1 = register(udf(func, nondeterministic)) + +/** + * Creates and registers a UDF ([NamedUserDefinedFunction1]) from a function reference. + * For example: `val myUdf = udf.register("myFunction", ::myFunction)` + * @param name Optional. Name for the UDF. + * @param func function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +inline fun UDFRegistration.register( + name: String, + func: KFunction1, + nondeterministic: Boolean = false, +): NamedUserDefinedFunction1 = register(udf(name, func, nondeterministic)) + + +/** Kotlin wrapper around UDF interface to ensure nullability in types. */ +fun interface UDF1 : org.apache.spark.sql.api.java.UDF1 { override fun call(t1: T1): R } + +/** + * Defines a named UDF ([NamedUserDefinedFunction1]) instance based on the (lambda) function [func]. + * For example: `val myUdf = udf("myUdf") { t1: T1 -> ... }` + * Name can also be supplied using delegate: `val myUdf by udf { t1: T1 -> ... }` + * @see UserDefinedFunction.getValue + * + * @param name The name for this UDF. + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @param func The function to convert to a UDF. Can be a lambda. + */ +inline fun udf( + name: String, + nondeterministic: Boolean = false, + func: UDF1, +): NamedUserDefinedFunction1 = + udf(nondeterministic, func).withName(name) + +/** + * Defines a UDF ([UserDefinedFunction1]) instance based on the (lambda) function [func]. + * For example: `val myUdf = udf { t1: T1 -> ... }` + * + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @param func The function to convert to a UDF. Can be a lambda. + */ +inline fun udf( + nondeterministic: Boolean = false, + func: UDF1, +): UserDefinedFunction1 { + T1::class.checkForValidType("T1") + + return UserDefinedFunction1( + udf = functions.udf(func, schema(typeOf()).unWrap()) + .let { if (nondeterministic) it.asNondeterministic() else it } + .let { if (typeOf().isMarkedNullable) it else it.asNonNullable() }, + encoder = encoder(), + ) +} + +/** + * Defines and registers a named UDF ([NamedUserDefinedFunction1]) instance based on the (lambda) function [func]. + * For example: `val myUdf = udf.register("myUdf") { t1: T1 -> ... }` + * + * @param name The name for this UDF. + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @param func The function to convert to a UDF. Can be a lambda. + */ +inline fun UDFRegistration.register( + name: String, + nondeterministic: Boolean = false, + func: UDF1, +): NamedUserDefinedFunction1 = + register(udf(name, nondeterministic, func)) + +/** + * Instance of a UDF with 2 arguments. + * This UDF can be invoked with (typed) columns in a [Dataset.select] or [selectTyped] call. + * Alternatively it can be registered for SQL calls using [register]. + * + * @see org.apache.spark.sql.expressions.UserDefinedFunction + * @see NamedUserDefinedFunction2 + * @see udf + */ +open class UserDefinedFunction2( + override val udf: SparkUserDefinedFunction, + override val encoder: Encoder, +): UserDefinedFunction> { + + /** + * Allows this UDF to be called in typed manner using columns in a [Dataset.selectTyped] call. + * @see typedCol to create typed columns. + * @see org.apache.spark.sql.expressions.UserDefinedFunction.apply + */ + operator fun invoke(param0: TypedColumn, param1: TypedColumn): TypedColumn = invokeUntyped(*arrayOf(param0, param1)).`as`(encoder) as TypedColumn + + /** + * Returns an expression that invokes the UDF in untyped manner, using the given arguments. + * @see org.apache.spark.sql.expressions.UserDefinedFunction.apply + */ + operator fun invoke(param0: Column, param1: Column): Column = invokeUntyped(*arrayOf(param0, param1)) + + /** + * Returns an expression that invokes the UDF in untyped manner, using the given arguments. + * @see org.apache.spark.sql.expressions.UserDefinedFunction.apply + */ + fun invokeUntyped(param0: Column, param1: Column): Column = invokeUntyped(*arrayOf(param0, param1)) + + /** Returns named variant of this UDF. */ + override fun withName(name: String): NamedUserDefinedFunction2 = NamedUserDefinedFunction2( + name = name, + udf = udf, + encoder = encoder, + ) + + /** + * Returns named variant of this UDF. + * @see withName + */ + override fun getValue(thisRef: Any?, property: KProperty<*>): NamedUserDefinedFunction2 = + withName(property.name) +} + +/** + * Instance of a UDF with 2 arguments with name. + * This UDF can be invoked with (typed) columns in a [Dataset.select] or [selectTyped] call. + * Alternatively it can be registered for SQL calls using [register]. + * + * @see org.apache.spark.sql.expressions.UserDefinedFunction + * @see UserDefinedFunction2 + * @see udf + */ +class NamedUserDefinedFunction2( + override val name: String, + udf: SparkUserDefinedFunction, + encoder: Encoder, +): NamedUserDefinedFunction>, + UserDefinedFunction2(udf = udf.withName(name), encoder = encoder) + +/** + * Creates a UDF ([NamedUserDefinedFunction2]) from a function reference adapting its name by reflection. + * For example: `val myUdf = udf(::myFunction)` + * @param func function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +inline fun udf( + func: KProperty0<(T1, T2) -> R>, + nondeterministic: Boolean = false, +): NamedUserDefinedFunction2 = udf(func.name, func, nondeterministic) + +/** + * Creates a UDF ([NamedUserDefinedFunction2]) from a function reference. + * For example: `val myUdf = udf("myFunction", ::myFunction)` + * @param name Optional. Name for the UDF. + * @param func function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +inline fun udf( + name: String, + func: KProperty0<(T1, T2) -> R>, + nondeterministic: Boolean = false, +): NamedUserDefinedFunction2 = udf(name, nondeterministic, func.get()) + +/** + * Creates and registers a UDF ([NamedUserDefinedFunction2]) from a function reference adapting its name by reflection. + * For example: `val myUdf = udf.register(::myFunction)` + * @param func function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +inline fun UDFRegistration.register( + func: KProperty0<(T1, T2) -> R>, + nondeterministic: Boolean = false, +): NamedUserDefinedFunction2 = register(udf(func, nondeterministic)) + +/** + * Creates and registers a UDF ([NamedUserDefinedFunction2]) from a function reference. + * For example: `val myUdf = udf.register("myFunction", ::myFunction)` + * @param name Optional. Name for the UDF. + * @param func function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +inline fun UDFRegistration.register( + name: String, + func: KProperty0<(T1, T2) -> R>, + nondeterministic: Boolean = false, +): NamedUserDefinedFunction2 = register(udf(name, func, nondeterministic)) + +/** + * Creates a UDF ([NamedUserDefinedFunction2]) from a function reference adapting its name by reflection. + * For example: `val myUdf = udf(::myFunction)` + * @param func function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +inline fun udf( + func: KFunction2, + nondeterministic: Boolean = false, +): NamedUserDefinedFunction2 = udf(func.name, func, nondeterministic) + +/** + * Creates a UDF ([NamedUserDefinedFunction2]) from a function reference. + * For example: `val myUdf = udf("myFunction", ::myFunction)` + * @param name Optional. Name for the UDF. + * @param func function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +inline fun udf( + name: String, + func: KFunction2, + nondeterministic: Boolean = false, +): NamedUserDefinedFunction2 = udf(name, nondeterministic, func) + +/** + * Creates and registers a UDF ([NamedUserDefinedFunction2]) from a function reference adapting its name by reflection. + * For example: `val myUdf = udf.register(::myFunction)` + * @param func function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +inline fun UDFRegistration.register( + func: KFunction2, + nondeterministic: Boolean = false, +): NamedUserDefinedFunction2 = register(udf(func, nondeterministic)) + +/** + * Creates and registers a UDF ([NamedUserDefinedFunction2]) from a function reference. + * For example: `val myUdf = udf.register("myFunction", ::myFunction)` + * @param name Optional. Name for the UDF. + * @param func function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +inline fun UDFRegistration.register( + name: String, + func: KFunction2, + nondeterministic: Boolean = false, +): NamedUserDefinedFunction2 = register(udf(name, func, nondeterministic)) + + +/** Kotlin wrapper around UDF interface to ensure nullability in types. */ +fun interface UDF2 : org.apache.spark.sql.api.java.UDF2 { override fun call(t1: T1, t2: T2): R } + +/** + * Defines a named UDF ([NamedUserDefinedFunction2]) instance based on the (lambda) function [func]. + * For example: `val myUdf = udf("myUdf") { t1: T1, t2: T2 -> ... }` + * Name can also be supplied using delegate: `val myUdf by udf { t1: T1, t2: T2 -> ... }` + * @see UserDefinedFunction.getValue + * + * @param name The name for this UDF. + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @param func The function to convert to a UDF. Can be a lambda. + */ +inline fun udf( + name: String, + nondeterministic: Boolean = false, + func: UDF2, +): NamedUserDefinedFunction2 = + udf(nondeterministic, func).withName(name) + +/** + * Defines a UDF ([UserDefinedFunction2]) instance based on the (lambda) function [func]. + * For example: `val myUdf = udf { t1: T1, t2: T2 -> ... }` + * + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @param func The function to convert to a UDF. Can be a lambda. + */ +inline fun udf( + nondeterministic: Boolean = false, + func: UDF2, +): UserDefinedFunction2 { + T1::class.checkForValidType("T1") + T2::class.checkForValidType("T2") + + return UserDefinedFunction2( + udf = functions.udf(func, schema(typeOf()).unWrap()) + .let { if (nondeterministic) it.asNondeterministic() else it } + .let { if (typeOf().isMarkedNullable) it else it.asNonNullable() }, + encoder = encoder(), + ) +} + +/** + * Defines and registers a named UDF ([NamedUserDefinedFunction2]) instance based on the (lambda) function [func]. + * For example: `val myUdf = udf.register("myUdf") { t1: T1, t2: T2 -> ... }` + * + * @param name The name for this UDF. + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @param func The function to convert to a UDF. Can be a lambda. + */ +inline fun UDFRegistration.register( + name: String, + nondeterministic: Boolean = false, + func: UDF2, +): NamedUserDefinedFunction2 = + register(udf(name, nondeterministic, func)) + +/** + * Instance of a UDF with 3 arguments. + * This UDF can be invoked with (typed) columns in a [Dataset.select] or [selectTyped] call. + * Alternatively it can be registered for SQL calls using [register]. + * + * @see org.apache.spark.sql.expressions.UserDefinedFunction + * @see NamedUserDefinedFunction3 + * @see udf + */ +open class UserDefinedFunction3( + override val udf: SparkUserDefinedFunction, + override val encoder: Encoder, +): UserDefinedFunction> { + + /** + * Allows this UDF to be called in typed manner using columns in a [Dataset.selectTyped] call. + * @see typedCol to create typed columns. + * @see org.apache.spark.sql.expressions.UserDefinedFunction.apply + */ + operator fun invoke(param0: TypedColumn, param1: TypedColumn, param2: TypedColumn): TypedColumn = invokeUntyped(*arrayOf(param0, param1, param2)).`as`(encoder) as TypedColumn + + /** + * Returns an expression that invokes the UDF in untyped manner, using the given arguments. + * @see org.apache.spark.sql.expressions.UserDefinedFunction.apply + */ + operator fun invoke(param0: Column, param1: Column, param2: Column): Column = invokeUntyped(*arrayOf(param0, param1, param2)) + + /** + * Returns an expression that invokes the UDF in untyped manner, using the given arguments. + * @see org.apache.spark.sql.expressions.UserDefinedFunction.apply + */ + fun invokeUntyped(param0: Column, param1: Column, param2: Column): Column = invokeUntyped(*arrayOf(param0, param1, param2)) + + /** Returns named variant of this UDF. */ + override fun withName(name: String): NamedUserDefinedFunction3 = NamedUserDefinedFunction3( + name = name, + udf = udf, + encoder = encoder, + ) + + /** + * Returns named variant of this UDF. + * @see withName + */ + override fun getValue(thisRef: Any?, property: KProperty<*>): NamedUserDefinedFunction3 = + withName(property.name) +} + +/** + * Instance of a UDF with 3 arguments with name. + * This UDF can be invoked with (typed) columns in a [Dataset.select] or [selectTyped] call. + * Alternatively it can be registered for SQL calls using [register]. + * + * @see org.apache.spark.sql.expressions.UserDefinedFunction + * @see UserDefinedFunction3 + * @see udf + */ +class NamedUserDefinedFunction3( + override val name: String, + udf: SparkUserDefinedFunction, + encoder: Encoder, +): NamedUserDefinedFunction>, + UserDefinedFunction3(udf = udf.withName(name), encoder = encoder) + +/** + * Creates a UDF ([NamedUserDefinedFunction3]) from a function reference adapting its name by reflection. + * For example: `val myUdf = udf(::myFunction)` + * @param func function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +inline fun udf( + func: KProperty0<(T1, T2, T3) -> R>, + nondeterministic: Boolean = false, +): NamedUserDefinedFunction3 = udf(func.name, func, nondeterministic) + +/** + * Creates a UDF ([NamedUserDefinedFunction3]) from a function reference. + * For example: `val myUdf = udf("myFunction", ::myFunction)` + * @param name Optional. Name for the UDF. + * @param func function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +inline fun udf( + name: String, + func: KProperty0<(T1, T2, T3) -> R>, + nondeterministic: Boolean = false, +): NamedUserDefinedFunction3 = udf(name, nondeterministic, func.get()) + +/** + * Creates and registers a UDF ([NamedUserDefinedFunction3]) from a function reference adapting its name by reflection. + * For example: `val myUdf = udf.register(::myFunction)` + * @param func function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +inline fun UDFRegistration.register( + func: KProperty0<(T1, T2, T3) -> R>, + nondeterministic: Boolean = false, +): NamedUserDefinedFunction3 = register(udf(func, nondeterministic)) + +/** + * Creates and registers a UDF ([NamedUserDefinedFunction3]) from a function reference. + * For example: `val myUdf = udf.register("myFunction", ::myFunction)` + * @param name Optional. Name for the UDF. + * @param func function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +inline fun UDFRegistration.register( + name: String, + func: KProperty0<(T1, T2, T3) -> R>, + nondeterministic: Boolean = false, +): NamedUserDefinedFunction3 = register(udf(name, func, nondeterministic)) + +/** + * Creates a UDF ([NamedUserDefinedFunction3]) from a function reference adapting its name by reflection. + * For example: `val myUdf = udf(::myFunction)` + * @param func function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +inline fun udf( + func: KFunction3, + nondeterministic: Boolean = false, +): NamedUserDefinedFunction3 = udf(func.name, func, nondeterministic) + +/** + * Creates a UDF ([NamedUserDefinedFunction3]) from a function reference. + * For example: `val myUdf = udf("myFunction", ::myFunction)` + * @param name Optional. Name for the UDF. + * @param func function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +inline fun udf( + name: String, + func: KFunction3, + nondeterministic: Boolean = false, +): NamedUserDefinedFunction3 = udf(name, nondeterministic, func) + +/** + * Creates and registers a UDF ([NamedUserDefinedFunction3]) from a function reference adapting its name by reflection. + * For example: `val myUdf = udf.register(::myFunction)` + * @param func function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +inline fun UDFRegistration.register( + func: KFunction3, + nondeterministic: Boolean = false, +): NamedUserDefinedFunction3 = register(udf(func, nondeterministic)) + +/** + * Creates and registers a UDF ([NamedUserDefinedFunction3]) from a function reference. + * For example: `val myUdf = udf.register("myFunction", ::myFunction)` + * @param name Optional. Name for the UDF. + * @param func function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +inline fun UDFRegistration.register( + name: String, + func: KFunction3, + nondeterministic: Boolean = false, +): NamedUserDefinedFunction3 = register(udf(name, func, nondeterministic)) + + +/** Kotlin wrapper around UDF interface to ensure nullability in types. */ +fun interface UDF3 : org.apache.spark.sql.api.java.UDF3 { override fun call(t1: T1, t2: T2, t3: T3): R } + +/** + * Defines a named UDF ([NamedUserDefinedFunction3]) instance based on the (lambda) function [func]. + * For example: `val myUdf = udf("myUdf") { t1: T1, t2: T2, t3: T3 -> ... }` + * Name can also be supplied using delegate: `val myUdf by udf { t1: T1, t2: T2, t3: T3 -> ... }` + * @see UserDefinedFunction.getValue + * + * @param name The name for this UDF. + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @param func The function to convert to a UDF. Can be a lambda. + */ +inline fun udf( + name: String, + nondeterministic: Boolean = false, + func: UDF3, +): NamedUserDefinedFunction3 = + udf(nondeterministic, func).withName(name) + +/** + * Defines a UDF ([UserDefinedFunction3]) instance based on the (lambda) function [func]. + * For example: `val myUdf = udf { t1: T1, t2: T2, t3: T3 -> ... }` + * + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @param func The function to convert to a UDF. Can be a lambda. + */ +inline fun udf( + nondeterministic: Boolean = false, + func: UDF3, +): UserDefinedFunction3 { + T1::class.checkForValidType("T1") + T2::class.checkForValidType("T2") + T3::class.checkForValidType("T3") + + return UserDefinedFunction3( + udf = functions.udf(func, schema(typeOf()).unWrap()) + .let { if (nondeterministic) it.asNondeterministic() else it } + .let { if (typeOf().isMarkedNullable) it else it.asNonNullable() }, + encoder = encoder(), + ) +} + +/** + * Defines and registers a named UDF ([NamedUserDefinedFunction3]) instance based on the (lambda) function [func]. + * For example: `val myUdf = udf.register("myUdf") { t1: T1, t2: T2, t3: T3 -> ... }` + * + * @param name The name for this UDF. + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @param func The function to convert to a UDF. Can be a lambda. + */ +inline fun UDFRegistration.register( + name: String, + nondeterministic: Boolean = false, + func: UDF3, +): NamedUserDefinedFunction3 = + register(udf(name, nondeterministic, func)) + +/** + * Instance of a UDF with 4 arguments. + * This UDF can be invoked with (typed) columns in a [Dataset.select] or [selectTyped] call. + * Alternatively it can be registered for SQL calls using [register]. + * + * @see org.apache.spark.sql.expressions.UserDefinedFunction + * @see NamedUserDefinedFunction4 + * @see udf + */ +open class UserDefinedFunction4( + override val udf: SparkUserDefinedFunction, + override val encoder: Encoder, +): UserDefinedFunction> { + + /** + * Allows this UDF to be called in typed manner using columns in a [Dataset.selectTyped] call. + * @see typedCol to create typed columns. + * @see org.apache.spark.sql.expressions.UserDefinedFunction.apply + */ + operator fun invoke(param0: TypedColumn, param1: TypedColumn, param2: TypedColumn, param3: TypedColumn): TypedColumn = invokeUntyped(*arrayOf(param0, param1, param2, param3)).`as`(encoder) as TypedColumn + + /** + * Returns an expression that invokes the UDF in untyped manner, using the given arguments. + * @see org.apache.spark.sql.expressions.UserDefinedFunction.apply + */ + operator fun invoke(param0: Column, param1: Column, param2: Column, param3: Column): Column = invokeUntyped(*arrayOf(param0, param1, param2, param3)) + + /** + * Returns an expression that invokes the UDF in untyped manner, using the given arguments. + * @see org.apache.spark.sql.expressions.UserDefinedFunction.apply + */ + fun invokeUntyped(param0: Column, param1: Column, param2: Column, param3: Column): Column = invokeUntyped(*arrayOf(param0, param1, param2, param3)) + + /** Returns named variant of this UDF. */ + override fun withName(name: String): NamedUserDefinedFunction4 = NamedUserDefinedFunction4( + name = name, + udf = udf, + encoder = encoder, + ) + + /** + * Returns named variant of this UDF. + * @see withName + */ + override fun getValue(thisRef: Any?, property: KProperty<*>): NamedUserDefinedFunction4 = + withName(property.name) +} + +/** + * Instance of a UDF with 4 arguments with name. + * This UDF can be invoked with (typed) columns in a [Dataset.select] or [selectTyped] call. + * Alternatively it can be registered for SQL calls using [register]. + * + * @see org.apache.spark.sql.expressions.UserDefinedFunction + * @see UserDefinedFunction4 + * @see udf + */ +class NamedUserDefinedFunction4( + override val name: String, + udf: SparkUserDefinedFunction, + encoder: Encoder, +): NamedUserDefinedFunction>, + UserDefinedFunction4(udf = udf.withName(name), encoder = encoder) + +/** + * Creates a UDF ([NamedUserDefinedFunction4]) from a function reference adapting its name by reflection. + * For example: `val myUdf = udf(::myFunction)` + * @param func function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +inline fun udf( + func: KProperty0<(T1, T2, T3, T4) -> R>, + nondeterministic: Boolean = false, +): NamedUserDefinedFunction4 = udf(func.name, func, nondeterministic) + +/** + * Creates a UDF ([NamedUserDefinedFunction4]) from a function reference. + * For example: `val myUdf = udf("myFunction", ::myFunction)` + * @param name Optional. Name for the UDF. + * @param func function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +inline fun udf( + name: String, + func: KProperty0<(T1, T2, T3, T4) -> R>, + nondeterministic: Boolean = false, +): NamedUserDefinedFunction4 = udf(name, nondeterministic, func.get()) + +/** + * Creates and registers a UDF ([NamedUserDefinedFunction4]) from a function reference adapting its name by reflection. + * For example: `val myUdf = udf.register(::myFunction)` + * @param func function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +inline fun UDFRegistration.register( + func: KProperty0<(T1, T2, T3, T4) -> R>, + nondeterministic: Boolean = false, +): NamedUserDefinedFunction4 = register(udf(func, nondeterministic)) + +/** + * Creates and registers a UDF ([NamedUserDefinedFunction4]) from a function reference. + * For example: `val myUdf = udf.register("myFunction", ::myFunction)` + * @param name Optional. Name for the UDF. + * @param func function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +inline fun UDFRegistration.register( + name: String, + func: KProperty0<(T1, T2, T3, T4) -> R>, + nondeterministic: Boolean = false, +): NamedUserDefinedFunction4 = register(udf(name, func, nondeterministic)) + +/** + * Creates a UDF ([NamedUserDefinedFunction4]) from a function reference adapting its name by reflection. + * For example: `val myUdf = udf(::myFunction)` + * @param func function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +inline fun udf( + func: KFunction4, + nondeterministic: Boolean = false, +): NamedUserDefinedFunction4 = udf(func.name, func, nondeterministic) + +/** + * Creates a UDF ([NamedUserDefinedFunction4]) from a function reference. + * For example: `val myUdf = udf("myFunction", ::myFunction)` + * @param name Optional. Name for the UDF. + * @param func function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +inline fun udf( + name: String, + func: KFunction4, + nondeterministic: Boolean = false, +): NamedUserDefinedFunction4 = udf(name, nondeterministic, func) + +/** + * Creates and registers a UDF ([NamedUserDefinedFunction4]) from a function reference adapting its name by reflection. + * For example: `val myUdf = udf.register(::myFunction)` + * @param func function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +inline fun UDFRegistration.register( + func: KFunction4, + nondeterministic: Boolean = false, +): NamedUserDefinedFunction4 = register(udf(func, nondeterministic)) + +/** + * Creates and registers a UDF ([NamedUserDefinedFunction4]) from a function reference. + * For example: `val myUdf = udf.register("myFunction", ::myFunction)` + * @param name Optional. Name for the UDF. + * @param func function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +inline fun UDFRegistration.register( + name: String, + func: KFunction4, + nondeterministic: Boolean = false, +): NamedUserDefinedFunction4 = register(udf(name, func, nondeterministic)) + + +/** Kotlin wrapper around UDF interface to ensure nullability in types. */ +fun interface UDF4 : org.apache.spark.sql.api.java.UDF4 { override fun call(t1: T1, t2: T2, t3: T3, t4: T4): R } + +/** + * Defines a named UDF ([NamedUserDefinedFunction4]) instance based on the (lambda) function [func]. + * For example: `val myUdf = udf("myUdf") { t1: T1, t2: T2, t3: T3, t4: T4 -> ... }` + * Name can also be supplied using delegate: `val myUdf by udf { t1: T1, t2: T2, t3: T3, t4: T4 -> ... }` + * @see UserDefinedFunction.getValue + * + * @param name The name for this UDF. + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @param func The function to convert to a UDF. Can be a lambda. + */ +inline fun udf( + name: String, + nondeterministic: Boolean = false, + func: UDF4, +): NamedUserDefinedFunction4 = + udf(nondeterministic, func).withName(name) + +/** + * Defines a UDF ([UserDefinedFunction4]) instance based on the (lambda) function [func]. + * For example: `val myUdf = udf { t1: T1, t2: T2, t3: T3, t4: T4 -> ... }` + * + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @param func The function to convert to a UDF. Can be a lambda. + */ +inline fun udf( + nondeterministic: Boolean = false, + func: UDF4, +): UserDefinedFunction4 { + T1::class.checkForValidType("T1") + T2::class.checkForValidType("T2") + T3::class.checkForValidType("T3") + T4::class.checkForValidType("T4") + + return UserDefinedFunction4( + udf = functions.udf(func, schema(typeOf()).unWrap()) + .let { if (nondeterministic) it.asNondeterministic() else it } + .let { if (typeOf().isMarkedNullable) it else it.asNonNullable() }, + encoder = encoder(), + ) +} + +/** + * Defines and registers a named UDF ([NamedUserDefinedFunction4]) instance based on the (lambda) function [func]. + * For example: `val myUdf = udf.register("myUdf") { t1: T1, t2: T2, t3: T3, t4: T4 -> ... }` + * + * @param name The name for this UDF. + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @param func The function to convert to a UDF. Can be a lambda. + */ +inline fun UDFRegistration.register( + name: String, + nondeterministic: Boolean = false, + func: UDF4, +): NamedUserDefinedFunction4 = + register(udf(name, nondeterministic, func)) + +/** + * Instance of a UDF with 5 arguments. + * This UDF can be invoked with (typed) columns in a [Dataset.select] or [selectTyped] call. + * Alternatively it can be registered for SQL calls using [register]. + * + * @see org.apache.spark.sql.expressions.UserDefinedFunction + * @see NamedUserDefinedFunction5 + * @see udf + */ +open class UserDefinedFunction5( + override val udf: SparkUserDefinedFunction, + override val encoder: Encoder, +): UserDefinedFunction> { + + /** + * Allows this UDF to be called in typed manner using columns in a [Dataset.selectTyped] call. + * @see typedCol to create typed columns. + * @see org.apache.spark.sql.expressions.UserDefinedFunction.apply + */ + operator fun invoke(param0: TypedColumn, param1: TypedColumn, param2: TypedColumn, param3: TypedColumn, param4: TypedColumn): TypedColumn = invokeUntyped(*arrayOf(param0, param1, param2, param3, param4)).`as`(encoder) as TypedColumn + + /** + * Returns an expression that invokes the UDF in untyped manner, using the given arguments. + * @see org.apache.spark.sql.expressions.UserDefinedFunction.apply + */ + operator fun invoke(param0: Column, param1: Column, param2: Column, param3: Column, param4: Column): Column = invokeUntyped(*arrayOf(param0, param1, param2, param3, param4)) + + /** + * Returns an expression that invokes the UDF in untyped manner, using the given arguments. + * @see org.apache.spark.sql.expressions.UserDefinedFunction.apply + */ + fun invokeUntyped(param0: Column, param1: Column, param2: Column, param3: Column, param4: Column): Column = invokeUntyped(*arrayOf(param0, param1, param2, param3, param4)) + + /** Returns named variant of this UDF. */ + override fun withName(name: String): NamedUserDefinedFunction5 = NamedUserDefinedFunction5( + name = name, + udf = udf, + encoder = encoder, + ) + + /** + * Returns named variant of this UDF. + * @see withName + */ + override fun getValue(thisRef: Any?, property: KProperty<*>): NamedUserDefinedFunction5 = + withName(property.name) +} + +/** + * Instance of a UDF with 5 arguments with name. + * This UDF can be invoked with (typed) columns in a [Dataset.select] or [selectTyped] call. + * Alternatively it can be registered for SQL calls using [register]. + * + * @see org.apache.spark.sql.expressions.UserDefinedFunction + * @see UserDefinedFunction5 + * @see udf + */ +class NamedUserDefinedFunction5( + override val name: String, + udf: SparkUserDefinedFunction, + encoder: Encoder, +): NamedUserDefinedFunction>, + UserDefinedFunction5(udf = udf.withName(name), encoder = encoder) + +/** + * Creates a UDF ([NamedUserDefinedFunction5]) from a function reference adapting its name by reflection. + * For example: `val myUdf = udf(::myFunction)` + * @param func function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +inline fun udf( + func: KProperty0<(T1, T2, T3, T4, T5) -> R>, + nondeterministic: Boolean = false, +): NamedUserDefinedFunction5 = udf(func.name, func, nondeterministic) + +/** + * Creates a UDF ([NamedUserDefinedFunction5]) from a function reference. + * For example: `val myUdf = udf("myFunction", ::myFunction)` + * @param name Optional. Name for the UDF. + * @param func function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +inline fun udf( + name: String, + func: KProperty0<(T1, T2, T3, T4, T5) -> R>, + nondeterministic: Boolean = false, +): NamedUserDefinedFunction5 = udf(name, nondeterministic, func.get()) + +/** + * Creates and registers a UDF ([NamedUserDefinedFunction5]) from a function reference adapting its name by reflection. + * For example: `val myUdf = udf.register(::myFunction)` + * @param func function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +inline fun UDFRegistration.register( + func: KProperty0<(T1, T2, T3, T4, T5) -> R>, + nondeterministic: Boolean = false, +): NamedUserDefinedFunction5 = register(udf(func, nondeterministic)) + +/** + * Creates and registers a UDF ([NamedUserDefinedFunction5]) from a function reference. + * For example: `val myUdf = udf.register("myFunction", ::myFunction)` + * @param name Optional. Name for the UDF. + * @param func function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +inline fun UDFRegistration.register( + name: String, + func: KProperty0<(T1, T2, T3, T4, T5) -> R>, + nondeterministic: Boolean = false, +): NamedUserDefinedFunction5 = register(udf(name, func, nondeterministic)) + +/** + * Creates a UDF ([NamedUserDefinedFunction5]) from a function reference adapting its name by reflection. + * For example: `val myUdf = udf(::myFunction)` + * @param func function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +inline fun udf( + func: KFunction5, + nondeterministic: Boolean = false, +): NamedUserDefinedFunction5 = udf(func.name, func, nondeterministic) + +/** + * Creates a UDF ([NamedUserDefinedFunction5]) from a function reference. + * For example: `val myUdf = udf("myFunction", ::myFunction)` + * @param name Optional. Name for the UDF. + * @param func function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +inline fun udf( + name: String, + func: KFunction5, + nondeterministic: Boolean = false, +): NamedUserDefinedFunction5 = udf(name, nondeterministic, func) + +/** + * Creates and registers a UDF ([NamedUserDefinedFunction5]) from a function reference adapting its name by reflection. + * For example: `val myUdf = udf.register(::myFunction)` + * @param func function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +inline fun UDFRegistration.register( + func: KFunction5, + nondeterministic: Boolean = false, +): NamedUserDefinedFunction5 = register(udf(func, nondeterministic)) + +/** + * Creates and registers a UDF ([NamedUserDefinedFunction5]) from a function reference. + * For example: `val myUdf = udf.register("myFunction", ::myFunction)` + * @param name Optional. Name for the UDF. + * @param func function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +inline fun UDFRegistration.register( + name: String, + func: KFunction5, + nondeterministic: Boolean = false, +): NamedUserDefinedFunction5 = register(udf(name, func, nondeterministic)) + + +/** Kotlin wrapper around UDF interface to ensure nullability in types. */ +fun interface UDF5 : org.apache.spark.sql.api.java.UDF5 { override fun call(t1: T1, t2: T2, t3: T3, t4: T4, t5: T5): R } + +/** + * Defines a named UDF ([NamedUserDefinedFunction5]) instance based on the (lambda) function [func]. + * For example: `val myUdf = udf("myUdf") { t1: T1, t2: T2, t3: T3, t4: T4, t5: T5 -> ... }` + * Name can also be supplied using delegate: `val myUdf by udf { t1: T1, t2: T2, t3: T3, t4: T4, t5: T5 -> ... }` + * @see UserDefinedFunction.getValue + * + * @param name The name for this UDF. + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @param func The function to convert to a UDF. Can be a lambda. + */ +inline fun udf( + name: String, + nondeterministic: Boolean = false, + func: UDF5, +): NamedUserDefinedFunction5 = + udf(nondeterministic, func).withName(name) + +/** + * Defines a UDF ([UserDefinedFunction5]) instance based on the (lambda) function [func]. + * For example: `val myUdf = udf { t1: T1, t2: T2, t3: T3, t4: T4, t5: T5 -> ... }` + * + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @param func The function to convert to a UDF. Can be a lambda. + */ +inline fun udf( + nondeterministic: Boolean = false, + func: UDF5, +): UserDefinedFunction5 { + T1::class.checkForValidType("T1") + T2::class.checkForValidType("T2") + T3::class.checkForValidType("T3") + T4::class.checkForValidType("T4") + T5::class.checkForValidType("T5") + + return UserDefinedFunction5( + udf = functions.udf(func, schema(typeOf()).unWrap()) + .let { if (nondeterministic) it.asNondeterministic() else it } + .let { if (typeOf().isMarkedNullable) it else it.asNonNullable() }, + encoder = encoder(), + ) +} + +/** + * Defines and registers a named UDF ([NamedUserDefinedFunction5]) instance based on the (lambda) function [func]. + * For example: `val myUdf = udf.register("myUdf") { t1: T1, t2: T2, t3: T3, t4: T4, t5: T5 -> ... }` + * + * @param name The name for this UDF. + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @param func The function to convert to a UDF. Can be a lambda. + */ +inline fun UDFRegistration.register( + name: String, + nondeterministic: Boolean = false, + func: UDF5, +): NamedUserDefinedFunction5 = + register(udf(name, nondeterministic, func)) + +/** + * Instance of a UDF with 6 arguments. + * This UDF can be invoked with (typed) columns in a [Dataset.select] or [selectTyped] call. + * Alternatively it can be registered for SQL calls using [register]. + * + * @see org.apache.spark.sql.expressions.UserDefinedFunction + * @see NamedUserDefinedFunction6 + * @see udf + */ +open class UserDefinedFunction6( + override val udf: SparkUserDefinedFunction, + override val encoder: Encoder, +): UserDefinedFunction> { + + /** + * Allows this UDF to be called in typed manner using columns in a [Dataset.selectTyped] call. + * @see typedCol to create typed columns. + * @see org.apache.spark.sql.expressions.UserDefinedFunction.apply + */ + operator fun invoke(param0: TypedColumn, param1: TypedColumn, param2: TypedColumn, param3: TypedColumn, param4: TypedColumn, param5: TypedColumn): TypedColumn = invokeUntyped(*arrayOf(param0, param1, param2, param3, param4, param5)).`as`(encoder) as TypedColumn + + /** + * Returns an expression that invokes the UDF in untyped manner, using the given arguments. + * @see org.apache.spark.sql.expressions.UserDefinedFunction.apply + */ + operator fun invoke(param0: Column, param1: Column, param2: Column, param3: Column, param4: Column, param5: Column): Column = invokeUntyped(*arrayOf(param0, param1, param2, param3, param4, param5)) + + /** + * Returns an expression that invokes the UDF in untyped manner, using the given arguments. + * @see org.apache.spark.sql.expressions.UserDefinedFunction.apply + */ + fun invokeUntyped(param0: Column, param1: Column, param2: Column, param3: Column, param4: Column, param5: Column): Column = invokeUntyped(*arrayOf(param0, param1, param2, param3, param4, param5)) + + /** Returns named variant of this UDF. */ + override fun withName(name: String): NamedUserDefinedFunction6 = NamedUserDefinedFunction6( + name = name, + udf = udf, + encoder = encoder, + ) + + /** + * Returns named variant of this UDF. + * @see withName + */ + override fun getValue(thisRef: Any?, property: KProperty<*>): NamedUserDefinedFunction6 = + withName(property.name) +} + +/** + * Instance of a UDF with 6 arguments with name. + * This UDF can be invoked with (typed) columns in a [Dataset.select] or [selectTyped] call. + * Alternatively it can be registered for SQL calls using [register]. + * + * @see org.apache.spark.sql.expressions.UserDefinedFunction + * @see UserDefinedFunction6 + * @see udf + */ +class NamedUserDefinedFunction6( + override val name: String, + udf: SparkUserDefinedFunction, + encoder: Encoder, +): NamedUserDefinedFunction>, + UserDefinedFunction6(udf = udf.withName(name), encoder = encoder) + +/** + * Creates a UDF ([NamedUserDefinedFunction6]) from a function reference adapting its name by reflection. + * For example: `val myUdf = udf(::myFunction)` + * @param func function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +inline fun udf( + func: KProperty0<(T1, T2, T3, T4, T5, T6) -> R>, + nondeterministic: Boolean = false, +): NamedUserDefinedFunction6 = udf(func.name, func, nondeterministic) + +/** + * Creates a UDF ([NamedUserDefinedFunction6]) from a function reference. + * For example: `val myUdf = udf("myFunction", ::myFunction)` + * @param name Optional. Name for the UDF. + * @param func function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +inline fun udf( + name: String, + func: KProperty0<(T1, T2, T3, T4, T5, T6) -> R>, + nondeterministic: Boolean = false, +): NamedUserDefinedFunction6 = udf(name, nondeterministic, func.get()) + +/** + * Creates and registers a UDF ([NamedUserDefinedFunction6]) from a function reference adapting its name by reflection. + * For example: `val myUdf = udf.register(::myFunction)` + * @param func function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +inline fun UDFRegistration.register( + func: KProperty0<(T1, T2, T3, T4, T5, T6) -> R>, + nondeterministic: Boolean = false, +): NamedUserDefinedFunction6 = register(udf(func, nondeterministic)) + +/** + * Creates and registers a UDF ([NamedUserDefinedFunction6]) from a function reference. + * For example: `val myUdf = udf.register("myFunction", ::myFunction)` + * @param name Optional. Name for the UDF. + * @param func function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +inline fun UDFRegistration.register( + name: String, + func: KProperty0<(T1, T2, T3, T4, T5, T6) -> R>, + nondeterministic: Boolean = false, +): NamedUserDefinedFunction6 = register(udf(name, func, nondeterministic)) + +/** + * Creates a UDF ([NamedUserDefinedFunction6]) from a function reference adapting its name by reflection. + * For example: `val myUdf = udf(::myFunction)` + * @param func function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +inline fun udf( + func: KFunction6, + nondeterministic: Boolean = false, +): NamedUserDefinedFunction6 = udf(func.name, func, nondeterministic) + +/** + * Creates a UDF ([NamedUserDefinedFunction6]) from a function reference. + * For example: `val myUdf = udf("myFunction", ::myFunction)` + * @param name Optional. Name for the UDF. + * @param func function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +inline fun udf( + name: String, + func: KFunction6, + nondeterministic: Boolean = false, +): NamedUserDefinedFunction6 = udf(name, nondeterministic, func) + +/** + * Creates and registers a UDF ([NamedUserDefinedFunction6]) from a function reference adapting its name by reflection. + * For example: `val myUdf = udf.register(::myFunction)` + * @param func function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +inline fun UDFRegistration.register( + func: KFunction6, + nondeterministic: Boolean = false, +): NamedUserDefinedFunction6 = register(udf(func, nondeterministic)) + +/** + * Creates and registers a UDF ([NamedUserDefinedFunction6]) from a function reference. + * For example: `val myUdf = udf.register("myFunction", ::myFunction)` + * @param name Optional. Name for the UDF. + * @param func function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +inline fun UDFRegistration.register( + name: String, + func: KFunction6, + nondeterministic: Boolean = false, +): NamedUserDefinedFunction6 = register(udf(name, func, nondeterministic)) + + +/** Kotlin wrapper around UDF interface to ensure nullability in types. */ +fun interface UDF6 : org.apache.spark.sql.api.java.UDF6 { override fun call(t1: T1, t2: T2, t3: T3, t4: T4, t5: T5, t6: T6): R } + +/** + * Defines a named UDF ([NamedUserDefinedFunction6]) instance based on the (lambda) function [func]. + * For example: `val myUdf = udf("myUdf") { t1: T1, t2: T2, t3: T3, t4: T4, t5: T5, t6: T6 -> ... }` + * Name can also be supplied using delegate: `val myUdf by udf { t1: T1, t2: T2, t3: T3, t4: T4, t5: T5, t6: T6 -> ... }` + * @see UserDefinedFunction.getValue + * + * @param name The name for this UDF. + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @param func The function to convert to a UDF. Can be a lambda. + */ +inline fun udf( + name: String, + nondeterministic: Boolean = false, + func: UDF6, +): NamedUserDefinedFunction6 = + udf(nondeterministic, func).withName(name) + +/** + * Defines a UDF ([UserDefinedFunction6]) instance based on the (lambda) function [func]. + * For example: `val myUdf = udf { t1: T1, t2: T2, t3: T3, t4: T4, t5: T5, t6: T6 -> ... }` + * + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @param func The function to convert to a UDF. Can be a lambda. + */ +inline fun udf( + nondeterministic: Boolean = false, + func: UDF6, +): UserDefinedFunction6 { + T1::class.checkForValidType("T1") + T2::class.checkForValidType("T2") + T3::class.checkForValidType("T3") + T4::class.checkForValidType("T4") + T5::class.checkForValidType("T5") + T6::class.checkForValidType("T6") + + return UserDefinedFunction6( + udf = functions.udf(func, schema(typeOf()).unWrap()) + .let { if (nondeterministic) it.asNondeterministic() else it } + .let { if (typeOf().isMarkedNullable) it else it.asNonNullable() }, + encoder = encoder(), + ) +} + +/** + * Defines and registers a named UDF ([NamedUserDefinedFunction6]) instance based on the (lambda) function [func]. + * For example: `val myUdf = udf.register("myUdf") { t1: T1, t2: T2, t3: T3, t4: T4, t5: T5, t6: T6 -> ... }` + * + * @param name The name for this UDF. + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @param func The function to convert to a UDF. Can be a lambda. + */ +inline fun UDFRegistration.register( + name: String, + nondeterministic: Boolean = false, + func: UDF6, +): NamedUserDefinedFunction6 = + register(udf(name, nondeterministic, func)) + +/** + * Instance of a UDF with 7 arguments. + * This UDF can be invoked with (typed) columns in a [Dataset.select] or [selectTyped] call. + * Alternatively it can be registered for SQL calls using [register]. + * + * @see org.apache.spark.sql.expressions.UserDefinedFunction + * @see NamedUserDefinedFunction7 + * @see udf + */ +open class UserDefinedFunction7( + override val udf: SparkUserDefinedFunction, + override val encoder: Encoder, +): UserDefinedFunction> { + + /** + * Allows this UDF to be called in typed manner using columns in a [Dataset.selectTyped] call. + * @see typedCol to create typed columns. + * @see org.apache.spark.sql.expressions.UserDefinedFunction.apply + */ + operator fun invoke(param0: TypedColumn, param1: TypedColumn, param2: TypedColumn, param3: TypedColumn, param4: TypedColumn, param5: TypedColumn, param6: TypedColumn): TypedColumn = invokeUntyped(*arrayOf(param0, param1, param2, param3, param4, param5, param6)).`as`(encoder) as TypedColumn + + /** + * Returns an expression that invokes the UDF in untyped manner, using the given arguments. + * @see org.apache.spark.sql.expressions.UserDefinedFunction.apply + */ + operator fun invoke(param0: Column, param1: Column, param2: Column, param3: Column, param4: Column, param5: Column, param6: Column): Column = invokeUntyped(*arrayOf(param0, param1, param2, param3, param4, param5, param6)) + + /** + * Returns an expression that invokes the UDF in untyped manner, using the given arguments. + * @see org.apache.spark.sql.expressions.UserDefinedFunction.apply + */ + fun invokeUntyped(param0: Column, param1: Column, param2: Column, param3: Column, param4: Column, param5: Column, param6: Column): Column = invokeUntyped(*arrayOf(param0, param1, param2, param3, param4, param5, param6)) + + /** Returns named variant of this UDF. */ + override fun withName(name: String): NamedUserDefinedFunction7 = NamedUserDefinedFunction7( + name = name, + udf = udf, + encoder = encoder, + ) + + /** + * Returns named variant of this UDF. + * @see withName + */ + override fun getValue(thisRef: Any?, property: KProperty<*>): NamedUserDefinedFunction7 = + withName(property.name) +} + +/** + * Instance of a UDF with 7 arguments with name. + * This UDF can be invoked with (typed) columns in a [Dataset.select] or [selectTyped] call. + * Alternatively it can be registered for SQL calls using [register]. + * + * @see org.apache.spark.sql.expressions.UserDefinedFunction + * @see UserDefinedFunction7 + * @see udf + */ +class NamedUserDefinedFunction7( + override val name: String, + udf: SparkUserDefinedFunction, + encoder: Encoder, +): NamedUserDefinedFunction>, + UserDefinedFunction7(udf = udf.withName(name), encoder = encoder) + +/** + * Creates a UDF ([NamedUserDefinedFunction7]) from a function reference adapting its name by reflection. + * For example: `val myUdf = udf(::myFunction)` + * @param func function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +inline fun udf( + func: KProperty0<(T1, T2, T3, T4, T5, T6, T7) -> R>, + nondeterministic: Boolean = false, +): NamedUserDefinedFunction7 = udf(func.name, func, nondeterministic) + +/** + * Creates a UDF ([NamedUserDefinedFunction7]) from a function reference. + * For example: `val myUdf = udf("myFunction", ::myFunction)` + * @param name Optional. Name for the UDF. + * @param func function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +inline fun udf( + name: String, + func: KProperty0<(T1, T2, T3, T4, T5, T6, T7) -> R>, + nondeterministic: Boolean = false, +): NamedUserDefinedFunction7 = udf(name, nondeterministic, func.get()) + +/** + * Creates and registers a UDF ([NamedUserDefinedFunction7]) from a function reference adapting its name by reflection. + * For example: `val myUdf = udf.register(::myFunction)` + * @param func function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +inline fun UDFRegistration.register( + func: KProperty0<(T1, T2, T3, T4, T5, T6, T7) -> R>, + nondeterministic: Boolean = false, +): NamedUserDefinedFunction7 = register(udf(func, nondeterministic)) + +/** + * Creates and registers a UDF ([NamedUserDefinedFunction7]) from a function reference. + * For example: `val myUdf = udf.register("myFunction", ::myFunction)` + * @param name Optional. Name for the UDF. + * @param func function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +inline fun UDFRegistration.register( + name: String, + func: KProperty0<(T1, T2, T3, T4, T5, T6, T7) -> R>, + nondeterministic: Boolean = false, +): NamedUserDefinedFunction7 = register(udf(name, func, nondeterministic)) + +/** + * Creates a UDF ([NamedUserDefinedFunction7]) from a function reference adapting its name by reflection. + * For example: `val myUdf = udf(::myFunction)` + * @param func function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +inline fun udf( + func: KFunction7, + nondeterministic: Boolean = false, +): NamedUserDefinedFunction7 = udf(func.name, func, nondeterministic) + +/** + * Creates a UDF ([NamedUserDefinedFunction7]) from a function reference. + * For example: `val myUdf = udf("myFunction", ::myFunction)` + * @param name Optional. Name for the UDF. + * @param func function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +inline fun udf( + name: String, + func: KFunction7, + nondeterministic: Boolean = false, +): NamedUserDefinedFunction7 = udf(name, nondeterministic, func) + +/** + * Creates and registers a UDF ([NamedUserDefinedFunction7]) from a function reference adapting its name by reflection. + * For example: `val myUdf = udf.register(::myFunction)` + * @param func function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +inline fun UDFRegistration.register( + func: KFunction7, + nondeterministic: Boolean = false, +): NamedUserDefinedFunction7 = register(udf(func, nondeterministic)) + +/** + * Creates and registers a UDF ([NamedUserDefinedFunction7]) from a function reference. + * For example: `val myUdf = udf.register("myFunction", ::myFunction)` + * @param name Optional. Name for the UDF. + * @param func function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +inline fun UDFRegistration.register( + name: String, + func: KFunction7, + nondeterministic: Boolean = false, +): NamedUserDefinedFunction7 = register(udf(name, func, nondeterministic)) + + +/** Kotlin wrapper around UDF interface to ensure nullability in types. */ +fun interface UDF7 : org.apache.spark.sql.api.java.UDF7 { override fun call(t1: T1, t2: T2, t3: T3, t4: T4, t5: T5, t6: T6, t7: T7): R } + +/** + * Defines a named UDF ([NamedUserDefinedFunction7]) instance based on the (lambda) function [func]. + * For example: `val myUdf = udf("myUdf") { t1: T1, t2: T2, t3: T3, t4: T4, t5: T5, t6: T6, t7: T7 -> ... }` + * Name can also be supplied using delegate: `val myUdf by udf { t1: T1, t2: T2, t3: T3, t4: T4, t5: T5, t6: T6, t7: T7 -> ... }` + * @see UserDefinedFunction.getValue + * + * @param name The name for this UDF. + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @param func The function to convert to a UDF. Can be a lambda. + */ +inline fun udf( + name: String, + nondeterministic: Boolean = false, + func: UDF7, +): NamedUserDefinedFunction7 = + udf(nondeterministic, func).withName(name) + +/** + * Defines a UDF ([UserDefinedFunction7]) instance based on the (lambda) function [func]. + * For example: `val myUdf = udf { t1: T1, t2: T2, t3: T3, t4: T4, t5: T5, t6: T6, t7: T7 -> ... }` + * + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @param func The function to convert to a UDF. Can be a lambda. + */ +inline fun udf( + nondeterministic: Boolean = false, + func: UDF7, +): UserDefinedFunction7 { + T1::class.checkForValidType("T1") + T2::class.checkForValidType("T2") + T3::class.checkForValidType("T3") + T4::class.checkForValidType("T4") + T5::class.checkForValidType("T5") + T6::class.checkForValidType("T6") + T7::class.checkForValidType("T7") + + return UserDefinedFunction7( + udf = functions.udf(func, schema(typeOf()).unWrap()) + .let { if (nondeterministic) it.asNondeterministic() else it } + .let { if (typeOf().isMarkedNullable) it else it.asNonNullable() }, + encoder = encoder(), + ) +} + +/** + * Defines and registers a named UDF ([NamedUserDefinedFunction7]) instance based on the (lambda) function [func]. + * For example: `val myUdf = udf.register("myUdf") { t1: T1, t2: T2, t3: T3, t4: T4, t5: T5, t6: T6, t7: T7 -> ... }` + * + * @param name The name for this UDF. + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @param func The function to convert to a UDF. Can be a lambda. + */ +inline fun UDFRegistration.register( + name: String, + nondeterministic: Boolean = false, + func: UDF7, +): NamedUserDefinedFunction7 = + register(udf(name, nondeterministic, func)) + +/** + * Instance of a UDF with 8 arguments. + * This UDF can be invoked with (typed) columns in a [Dataset.select] or [selectTyped] call. + * Alternatively it can be registered for SQL calls using [register]. + * + * @see org.apache.spark.sql.expressions.UserDefinedFunction + * @see NamedUserDefinedFunction8 + * @see udf + */ +open class UserDefinedFunction8( + override val udf: SparkUserDefinedFunction, + override val encoder: Encoder, +): UserDefinedFunction> { + + /** + * Allows this UDF to be called in typed manner using columns in a [Dataset.selectTyped] call. + * @see typedCol to create typed columns. + * @see org.apache.spark.sql.expressions.UserDefinedFunction.apply + */ + operator fun invoke(param0: TypedColumn, param1: TypedColumn, param2: TypedColumn, param3: TypedColumn, param4: TypedColumn, param5: TypedColumn, param6: TypedColumn, param7: TypedColumn): TypedColumn = invokeUntyped(*arrayOf(param0, param1, param2, param3, param4, param5, param6, param7)).`as`(encoder) as TypedColumn + + /** + * Returns an expression that invokes the UDF in untyped manner, using the given arguments. + * @see org.apache.spark.sql.expressions.UserDefinedFunction.apply + */ + operator fun invoke(param0: Column, param1: Column, param2: Column, param3: Column, param4: Column, param5: Column, param6: Column, param7: Column): Column = invokeUntyped(*arrayOf(param0, param1, param2, param3, param4, param5, param6, param7)) + + /** + * Returns an expression that invokes the UDF in untyped manner, using the given arguments. + * @see org.apache.spark.sql.expressions.UserDefinedFunction.apply + */ + fun invokeUntyped(param0: Column, param1: Column, param2: Column, param3: Column, param4: Column, param5: Column, param6: Column, param7: Column): Column = invokeUntyped(*arrayOf(param0, param1, param2, param3, param4, param5, param6, param7)) + + /** Returns named variant of this UDF. */ + override fun withName(name: String): NamedUserDefinedFunction8 = NamedUserDefinedFunction8( + name = name, + udf = udf, + encoder = encoder, + ) + + /** + * Returns named variant of this UDF. + * @see withName + */ + override fun getValue(thisRef: Any?, property: KProperty<*>): NamedUserDefinedFunction8 = + withName(property.name) +} + +/** + * Instance of a UDF with 8 arguments with name. + * This UDF can be invoked with (typed) columns in a [Dataset.select] or [selectTyped] call. + * Alternatively it can be registered for SQL calls using [register]. + * + * @see org.apache.spark.sql.expressions.UserDefinedFunction + * @see UserDefinedFunction8 + * @see udf + */ +class NamedUserDefinedFunction8( + override val name: String, + udf: SparkUserDefinedFunction, + encoder: Encoder, +): NamedUserDefinedFunction>, + UserDefinedFunction8(udf = udf.withName(name), encoder = encoder) + +/** + * Creates a UDF ([NamedUserDefinedFunction8]) from a function reference adapting its name by reflection. + * For example: `val myUdf = udf(::myFunction)` + * @param func function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +inline fun udf( + func: KProperty0<(T1, T2, T3, T4, T5, T6, T7, T8) -> R>, + nondeterministic: Boolean = false, +): NamedUserDefinedFunction8 = udf(func.name, func, nondeterministic) + +/** + * Creates a UDF ([NamedUserDefinedFunction8]) from a function reference. + * For example: `val myUdf = udf("myFunction", ::myFunction)` + * @param name Optional. Name for the UDF. + * @param func function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +inline fun udf( + name: String, + func: KProperty0<(T1, T2, T3, T4, T5, T6, T7, T8) -> R>, + nondeterministic: Boolean = false, +): NamedUserDefinedFunction8 = udf(name, nondeterministic, func.get()) + +/** + * Creates and registers a UDF ([NamedUserDefinedFunction8]) from a function reference adapting its name by reflection. + * For example: `val myUdf = udf.register(::myFunction)` + * @param func function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +inline fun UDFRegistration.register( + func: KProperty0<(T1, T2, T3, T4, T5, T6, T7, T8) -> R>, + nondeterministic: Boolean = false, +): NamedUserDefinedFunction8 = register(udf(func, nondeterministic)) + +/** + * Creates and registers a UDF ([NamedUserDefinedFunction8]) from a function reference. + * For example: `val myUdf = udf.register("myFunction", ::myFunction)` + * @param name Optional. Name for the UDF. + * @param func function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +inline fun UDFRegistration.register( + name: String, + func: KProperty0<(T1, T2, T3, T4, T5, T6, T7, T8) -> R>, + nondeterministic: Boolean = false, +): NamedUserDefinedFunction8 = register(udf(name, func, nondeterministic)) + +/** + * Creates a UDF ([NamedUserDefinedFunction8]) from a function reference adapting its name by reflection. + * For example: `val myUdf = udf(::myFunction)` + * @param func function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +inline fun udf( + func: KFunction8, + nondeterministic: Boolean = false, +): NamedUserDefinedFunction8 = udf(func.name, func, nondeterministic) + +/** + * Creates a UDF ([NamedUserDefinedFunction8]) from a function reference. + * For example: `val myUdf = udf("myFunction", ::myFunction)` + * @param name Optional. Name for the UDF. + * @param func function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +inline fun udf( + name: String, + func: KFunction8, + nondeterministic: Boolean = false, +): NamedUserDefinedFunction8 = udf(name, nondeterministic, func) + +/** + * Creates and registers a UDF ([NamedUserDefinedFunction8]) from a function reference adapting its name by reflection. + * For example: `val myUdf = udf.register(::myFunction)` + * @param func function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +inline fun UDFRegistration.register( + func: KFunction8, + nondeterministic: Boolean = false, +): NamedUserDefinedFunction8 = register(udf(func, nondeterministic)) + +/** + * Creates and registers a UDF ([NamedUserDefinedFunction8]) from a function reference. + * For example: `val myUdf = udf.register("myFunction", ::myFunction)` + * @param name Optional. Name for the UDF. + * @param func function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +inline fun UDFRegistration.register( + name: String, + func: KFunction8, + nondeterministic: Boolean = false, +): NamedUserDefinedFunction8 = register(udf(name, func, nondeterministic)) + + +/** Kotlin wrapper around UDF interface to ensure nullability in types. */ +fun interface UDF8 : org.apache.spark.sql.api.java.UDF8 { override fun call(t1: T1, t2: T2, t3: T3, t4: T4, t5: T5, t6: T6, t7: T7, t8: T8): R } + +/** + * Defines a named UDF ([NamedUserDefinedFunction8]) instance based on the (lambda) function [func]. + * For example: `val myUdf = udf("myUdf") { t1: T1, t2: T2, t3: T3, t4: T4, t5: T5, t6: T6, t7: T7, t8: T8 -> ... }` + * Name can also be supplied using delegate: `val myUdf by udf { t1: T1, t2: T2, t3: T3, t4: T4, t5: T5, t6: T6, t7: T7, t8: T8 -> ... }` + * @see UserDefinedFunction.getValue + * + * @param name The name for this UDF. + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @param func The function to convert to a UDF. Can be a lambda. + */ +inline fun udf( + name: String, + nondeterministic: Boolean = false, + func: UDF8, +): NamedUserDefinedFunction8 = + udf(nondeterministic, func).withName(name) + +/** + * Defines a UDF ([UserDefinedFunction8]) instance based on the (lambda) function [func]. + * For example: `val myUdf = udf { t1: T1, t2: T2, t3: T3, t4: T4, t5: T5, t6: T6, t7: T7, t8: T8 -> ... }` + * + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @param func The function to convert to a UDF. Can be a lambda. + */ +inline fun udf( + nondeterministic: Boolean = false, + func: UDF8, +): UserDefinedFunction8 { + T1::class.checkForValidType("T1") + T2::class.checkForValidType("T2") + T3::class.checkForValidType("T3") + T4::class.checkForValidType("T4") + T5::class.checkForValidType("T5") + T6::class.checkForValidType("T6") + T7::class.checkForValidType("T7") + T8::class.checkForValidType("T8") + + return UserDefinedFunction8( + udf = functions.udf(func, schema(typeOf()).unWrap()) + .let { if (nondeterministic) it.asNondeterministic() else it } + .let { if (typeOf().isMarkedNullable) it else it.asNonNullable() }, + encoder = encoder(), + ) +} + +/** + * Defines and registers a named UDF ([NamedUserDefinedFunction8]) instance based on the (lambda) function [func]. + * For example: `val myUdf = udf.register("myUdf") { t1: T1, t2: T2, t3: T3, t4: T4, t5: T5, t6: T6, t7: T7, t8: T8 -> ... }` + * + * @param name The name for this UDF. + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @param func The function to convert to a UDF. Can be a lambda. + */ +inline fun UDFRegistration.register( + name: String, + nondeterministic: Boolean = false, + func: UDF8, +): NamedUserDefinedFunction8 = + register(udf(name, nondeterministic, func)) + +/** + * Instance of a UDF with 9 arguments. + * This UDF can be invoked with (typed) columns in a [Dataset.select] or [selectTyped] call. + * Alternatively it can be registered for SQL calls using [register]. + * + * @see org.apache.spark.sql.expressions.UserDefinedFunction + * @see NamedUserDefinedFunction9 + * @see udf + */ +open class UserDefinedFunction9( + override val udf: SparkUserDefinedFunction, + override val encoder: Encoder, +): UserDefinedFunction> { + + /** + * Allows this UDF to be called in typed manner using columns in a [Dataset.selectTyped] call. + * @see typedCol to create typed columns. + * @see org.apache.spark.sql.expressions.UserDefinedFunction.apply + */ + operator fun invoke(param0: TypedColumn, param1: TypedColumn, param2: TypedColumn, param3: TypedColumn, param4: TypedColumn, param5: TypedColumn, param6: TypedColumn, param7: TypedColumn, param8: TypedColumn): TypedColumn = invokeUntyped(*arrayOf(param0, param1, param2, param3, param4, param5, param6, param7, param8)).`as`(encoder) as TypedColumn + + /** + * Returns an expression that invokes the UDF in untyped manner, using the given arguments. + * @see org.apache.spark.sql.expressions.UserDefinedFunction.apply + */ + operator fun invoke(param0: Column, param1: Column, param2: Column, param3: Column, param4: Column, param5: Column, param6: Column, param7: Column, param8: Column): Column = invokeUntyped(*arrayOf(param0, param1, param2, param3, param4, param5, param6, param7, param8)) + + /** + * Returns an expression that invokes the UDF in untyped manner, using the given arguments. + * @see org.apache.spark.sql.expressions.UserDefinedFunction.apply + */ + fun invokeUntyped(param0: Column, param1: Column, param2: Column, param3: Column, param4: Column, param5: Column, param6: Column, param7: Column, param8: Column): Column = invokeUntyped(*arrayOf(param0, param1, param2, param3, param4, param5, param6, param7, param8)) + + /** Returns named variant of this UDF. */ + override fun withName(name: String): NamedUserDefinedFunction9 = NamedUserDefinedFunction9( + name = name, + udf = udf, + encoder = encoder, + ) + + /** + * Returns named variant of this UDF. + * @see withName + */ + override fun getValue(thisRef: Any?, property: KProperty<*>): NamedUserDefinedFunction9 = + withName(property.name) +} + +/** + * Instance of a UDF with 9 arguments with name. + * This UDF can be invoked with (typed) columns in a [Dataset.select] or [selectTyped] call. + * Alternatively it can be registered for SQL calls using [register]. + * + * @see org.apache.spark.sql.expressions.UserDefinedFunction + * @see UserDefinedFunction9 + * @see udf + */ +class NamedUserDefinedFunction9( + override val name: String, + udf: SparkUserDefinedFunction, + encoder: Encoder, +): NamedUserDefinedFunction>, + UserDefinedFunction9(udf = udf.withName(name), encoder = encoder) + +/** + * Creates a UDF ([NamedUserDefinedFunction9]) from a function reference adapting its name by reflection. + * For example: `val myUdf = udf(::myFunction)` + * @param func function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +inline fun udf( + func: KProperty0<(T1, T2, T3, T4, T5, T6, T7, T8, T9) -> R>, + nondeterministic: Boolean = false, +): NamedUserDefinedFunction9 = udf(func.name, func, nondeterministic) + +/** + * Creates a UDF ([NamedUserDefinedFunction9]) from a function reference. + * For example: `val myUdf = udf("myFunction", ::myFunction)` + * @param name Optional. Name for the UDF. + * @param func function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +inline fun udf( + name: String, + func: KProperty0<(T1, T2, T3, T4, T5, T6, T7, T8, T9) -> R>, + nondeterministic: Boolean = false, +): NamedUserDefinedFunction9 = udf(name, nondeterministic, func.get()) + +/** + * Creates and registers a UDF ([NamedUserDefinedFunction9]) from a function reference adapting its name by reflection. + * For example: `val myUdf = udf.register(::myFunction)` + * @param func function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +inline fun UDFRegistration.register( + func: KProperty0<(T1, T2, T3, T4, T5, T6, T7, T8, T9) -> R>, + nondeterministic: Boolean = false, +): NamedUserDefinedFunction9 = register(udf(func, nondeterministic)) + +/** + * Creates and registers a UDF ([NamedUserDefinedFunction9]) from a function reference. + * For example: `val myUdf = udf.register("myFunction", ::myFunction)` + * @param name Optional. Name for the UDF. + * @param func function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +inline fun UDFRegistration.register( + name: String, + func: KProperty0<(T1, T2, T3, T4, T5, T6, T7, T8, T9) -> R>, + nondeterministic: Boolean = false, +): NamedUserDefinedFunction9 = register(udf(name, func, nondeterministic)) + +/** + * Creates a UDF ([NamedUserDefinedFunction9]) from a function reference adapting its name by reflection. + * For example: `val myUdf = udf(::myFunction)` + * @param func function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +inline fun udf( + func: KFunction9, + nondeterministic: Boolean = false, +): NamedUserDefinedFunction9 = udf(func.name, func, nondeterministic) + +/** + * Creates a UDF ([NamedUserDefinedFunction9]) from a function reference. + * For example: `val myUdf = udf("myFunction", ::myFunction)` + * @param name Optional. Name for the UDF. + * @param func function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +inline fun udf( + name: String, + func: KFunction9, + nondeterministic: Boolean = false, +): NamedUserDefinedFunction9 = udf(name, nondeterministic, func) + +/** + * Creates and registers a UDF ([NamedUserDefinedFunction9]) from a function reference adapting its name by reflection. + * For example: `val myUdf = udf.register(::myFunction)` + * @param func function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +inline fun UDFRegistration.register( + func: KFunction9, + nondeterministic: Boolean = false, +): NamedUserDefinedFunction9 = register(udf(func, nondeterministic)) + +/** + * Creates and registers a UDF ([NamedUserDefinedFunction9]) from a function reference. + * For example: `val myUdf = udf.register("myFunction", ::myFunction)` + * @param name Optional. Name for the UDF. + * @param func function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +inline fun UDFRegistration.register( + name: String, + func: KFunction9, + nondeterministic: Boolean = false, +): NamedUserDefinedFunction9 = register(udf(name, func, nondeterministic)) + + +/** Kotlin wrapper around UDF interface to ensure nullability in types. */ +fun interface UDF9 : org.apache.spark.sql.api.java.UDF9 { override fun call(t1: T1, t2: T2, t3: T3, t4: T4, t5: T5, t6: T6, t7: T7, t8: T8, t9: T9): R } + +/** + * Defines a named UDF ([NamedUserDefinedFunction9]) instance based on the (lambda) function [func]. + * For example: `val myUdf = udf("myUdf") { t1: T1, t2: T2, t3: T3, t4: T4, t5: T5, t6: T6, t7: T7, t8: T8, t9: T9 -> ... }` + * Name can also be supplied using delegate: `val myUdf by udf { t1: T1, t2: T2, t3: T3, t4: T4, t5: T5, t6: T6, t7: T7, t8: T8, t9: T9 -> ... }` + * @see UserDefinedFunction.getValue + * + * @param name The name for this UDF. + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @param func The function to convert to a UDF. Can be a lambda. + */ +inline fun udf( + name: String, + nondeterministic: Boolean = false, + func: UDF9, +): NamedUserDefinedFunction9 = + udf(nondeterministic, func).withName(name) + +/** + * Defines a UDF ([UserDefinedFunction9]) instance based on the (lambda) function [func]. + * For example: `val myUdf = udf { t1: T1, t2: T2, t3: T3, t4: T4, t5: T5, t6: T6, t7: T7, t8: T8, t9: T9 -> ... }` + * + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @param func The function to convert to a UDF. Can be a lambda. + */ +inline fun udf( + nondeterministic: Boolean = false, + func: UDF9, +): UserDefinedFunction9 { + T1::class.checkForValidType("T1") + T2::class.checkForValidType("T2") + T3::class.checkForValidType("T3") + T4::class.checkForValidType("T4") + T5::class.checkForValidType("T5") + T6::class.checkForValidType("T6") + T7::class.checkForValidType("T7") + T8::class.checkForValidType("T8") + T9::class.checkForValidType("T9") + + return UserDefinedFunction9( + udf = functions.udf(func, schema(typeOf()).unWrap()) + .let { if (nondeterministic) it.asNondeterministic() else it } + .let { if (typeOf().isMarkedNullable) it else it.asNonNullable() }, + encoder = encoder(), + ) +} + +/** + * Defines and registers a named UDF ([NamedUserDefinedFunction9]) instance based on the (lambda) function [func]. + * For example: `val myUdf = udf.register("myUdf") { t1: T1, t2: T2, t3: T3, t4: T4, t5: T5, t6: T6, t7: T7, t8: T8, t9: T9 -> ... }` + * + * @param name The name for this UDF. + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @param func The function to convert to a UDF. Can be a lambda. + */ +inline fun UDFRegistration.register( + name: String, + nondeterministic: Boolean = false, + func: UDF9, +): NamedUserDefinedFunction9 = + register(udf(name, nondeterministic, func)) + +/** + * Instance of a UDF with 10 arguments. + * This UDF can be invoked with (typed) columns in a [Dataset.select] or [selectTyped] call. + * Alternatively it can be registered for SQL calls using [register]. + * + * @see org.apache.spark.sql.expressions.UserDefinedFunction + * @see NamedUserDefinedFunction10 + * @see udf + */ +open class UserDefinedFunction10( + override val udf: SparkUserDefinedFunction, + override val encoder: Encoder, +): UserDefinedFunction> { + + /** + * Allows this UDF to be called in typed manner using columns in a [Dataset.selectTyped] call. + * @see typedCol to create typed columns. + * @see org.apache.spark.sql.expressions.UserDefinedFunction.apply + */ + operator fun invoke(param0: TypedColumn, param1: TypedColumn, param2: TypedColumn, param3: TypedColumn, param4: TypedColumn, param5: TypedColumn, param6: TypedColumn, param7: TypedColumn, param8: TypedColumn, param9: TypedColumn): TypedColumn = invokeUntyped(*arrayOf(param0, param1, param2, param3, param4, param5, param6, param7, param8, param9)).`as`(encoder) as TypedColumn + + /** + * Returns an expression that invokes the UDF in untyped manner, using the given arguments. + * @see org.apache.spark.sql.expressions.UserDefinedFunction.apply + */ + operator fun invoke(param0: Column, param1: Column, param2: Column, param3: Column, param4: Column, param5: Column, param6: Column, param7: Column, param8: Column, param9: Column): Column = invokeUntyped(*arrayOf(param0, param1, param2, param3, param4, param5, param6, param7, param8, param9)) + + /** + * Returns an expression that invokes the UDF in untyped manner, using the given arguments. + * @see org.apache.spark.sql.expressions.UserDefinedFunction.apply + */ + fun invokeUntyped(param0: Column, param1: Column, param2: Column, param3: Column, param4: Column, param5: Column, param6: Column, param7: Column, param8: Column, param9: Column): Column = invokeUntyped(*arrayOf(param0, param1, param2, param3, param4, param5, param6, param7, param8, param9)) + + /** Returns named variant of this UDF. */ + override fun withName(name: String): NamedUserDefinedFunction10 = NamedUserDefinedFunction10( + name = name, + udf = udf, + encoder = encoder, + ) + + /** + * Returns named variant of this UDF. + * @see withName + */ + override fun getValue(thisRef: Any?, property: KProperty<*>): NamedUserDefinedFunction10 = + withName(property.name) +} + +/** + * Instance of a UDF with 10 arguments with name. + * This UDF can be invoked with (typed) columns in a [Dataset.select] or [selectTyped] call. + * Alternatively it can be registered for SQL calls using [register]. + * + * @see org.apache.spark.sql.expressions.UserDefinedFunction + * @see UserDefinedFunction10 + * @see udf + */ +class NamedUserDefinedFunction10( + override val name: String, + udf: SparkUserDefinedFunction, + encoder: Encoder, +): NamedUserDefinedFunction>, + UserDefinedFunction10(udf = udf.withName(name), encoder = encoder) + +/** + * Creates a UDF ([NamedUserDefinedFunction10]) from a function reference adapting its name by reflection. + * For example: `val myUdf = udf(::myFunction)` + * @param func function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +inline fun udf( + func: KProperty0<(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10) -> R>, + nondeterministic: Boolean = false, +): NamedUserDefinedFunction10 = udf(func.name, func, nondeterministic) + +/** + * Creates a UDF ([NamedUserDefinedFunction10]) from a function reference. + * For example: `val myUdf = udf("myFunction", ::myFunction)` + * @param name Optional. Name for the UDF. + * @param func function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +inline fun udf( + name: String, + func: KProperty0<(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10) -> R>, + nondeterministic: Boolean = false, +): NamedUserDefinedFunction10 = udf(name, nondeterministic, func.get()) + +/** + * Creates and registers a UDF ([NamedUserDefinedFunction10]) from a function reference adapting its name by reflection. + * For example: `val myUdf = udf.register(::myFunction)` + * @param func function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +inline fun UDFRegistration.register( + func: KProperty0<(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10) -> R>, + nondeterministic: Boolean = false, +): NamedUserDefinedFunction10 = register(udf(func, nondeterministic)) + +/** + * Creates and registers a UDF ([NamedUserDefinedFunction10]) from a function reference. + * For example: `val myUdf = udf.register("myFunction", ::myFunction)` + * @param name Optional. Name for the UDF. + * @param func function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +inline fun UDFRegistration.register( + name: String, + func: KProperty0<(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10) -> R>, + nondeterministic: Boolean = false, +): NamedUserDefinedFunction10 = register(udf(name, func, nondeterministic)) + +/** + * Creates a UDF ([NamedUserDefinedFunction10]) from a function reference adapting its name by reflection. + * For example: `val myUdf = udf(::myFunction)` + * @param func function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +inline fun udf( + func: KFunction10, + nondeterministic: Boolean = false, +): NamedUserDefinedFunction10 = udf(func.name, func, nondeterministic) + +/** + * Creates a UDF ([NamedUserDefinedFunction10]) from a function reference. + * For example: `val myUdf = udf("myFunction", ::myFunction)` + * @param name Optional. Name for the UDF. + * @param func function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +inline fun udf( + name: String, + func: KFunction10, + nondeterministic: Boolean = false, +): NamedUserDefinedFunction10 = udf(name, nondeterministic, func) + +/** + * Creates and registers a UDF ([NamedUserDefinedFunction10]) from a function reference adapting its name by reflection. + * For example: `val myUdf = udf.register(::myFunction)` + * @param func function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +inline fun UDFRegistration.register( + func: KFunction10, + nondeterministic: Boolean = false, +): NamedUserDefinedFunction10 = register(udf(func, nondeterministic)) + +/** + * Creates and registers a UDF ([NamedUserDefinedFunction10]) from a function reference. + * For example: `val myUdf = udf.register("myFunction", ::myFunction)` + * @param name Optional. Name for the UDF. + * @param func function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +inline fun UDFRegistration.register( + name: String, + func: KFunction10, + nondeterministic: Boolean = false, +): NamedUserDefinedFunction10 = register(udf(name, func, nondeterministic)) + + +/** Kotlin wrapper around UDF interface to ensure nullability in types. */ +fun interface UDF10 : org.apache.spark.sql.api.java.UDF10 { override fun call(t1: T1, t2: T2, t3: T3, t4: T4, t5: T5, t6: T6, t7: T7, t8: T8, t9: T9, t10: T10): R } + +/** + * Defines a named UDF ([NamedUserDefinedFunction10]) instance based on the (lambda) function [func]. + * For example: `val myUdf = udf("myUdf") { t1: T1, t2: T2, t3: T3, t4: T4, t5: T5, t6: T6, t7: T7, t8: T8, t9: T9, t10: T10 -> ... }` + * Name can also be supplied using delegate: `val myUdf by udf { t1: T1, t2: T2, t3: T3, t4: T4, t5: T5, t6: T6, t7: T7, t8: T8, t9: T9, t10: T10 -> ... }` + * @see UserDefinedFunction.getValue + * + * @param name The name for this UDF. + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @param func The function to convert to a UDF. Can be a lambda. + */ +inline fun udf( + name: String, + nondeterministic: Boolean = false, + func: UDF10, +): NamedUserDefinedFunction10 = + udf(nondeterministic, func).withName(name) + +/** + * Defines a UDF ([UserDefinedFunction10]) instance based on the (lambda) function [func]. + * For example: `val myUdf = udf { t1: T1, t2: T2, t3: T3, t4: T4, t5: T5, t6: T6, t7: T7, t8: T8, t9: T9, t10: T10 -> ... }` + * + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @param func The function to convert to a UDF. Can be a lambda. + */ +inline fun udf( + nondeterministic: Boolean = false, + func: UDF10, +): UserDefinedFunction10 { + T1::class.checkForValidType("T1") + T2::class.checkForValidType("T2") + T3::class.checkForValidType("T3") + T4::class.checkForValidType("T4") + T5::class.checkForValidType("T5") + T6::class.checkForValidType("T6") + T7::class.checkForValidType("T7") + T8::class.checkForValidType("T8") + T9::class.checkForValidType("T9") + T10::class.checkForValidType("T10") + + return UserDefinedFunction10( + udf = functions.udf(func, schema(typeOf()).unWrap()) + .let { if (nondeterministic) it.asNondeterministic() else it } + .let { if (typeOf().isMarkedNullable) it else it.asNonNullable() }, + encoder = encoder(), + ) +} + +/** + * Defines and registers a named UDF ([NamedUserDefinedFunction10]) instance based on the (lambda) function [func]. + * For example: `val myUdf = udf.register("myUdf") { t1: T1, t2: T2, t3: T3, t4: T4, t5: T5, t6: T6, t7: T7, t8: T8, t9: T9, t10: T10 -> ... }` + * + * @param name The name for this UDF. + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @param func The function to convert to a UDF. Can be a lambda. + */ +inline fun UDFRegistration.register( + name: String, + nondeterministic: Boolean = false, + func: UDF10, +): NamedUserDefinedFunction10 = + register(udf(name, nondeterministic, func)) + +/** + * Instance of a UDF with 11 arguments. + * This UDF can be invoked with (typed) columns in a [Dataset.select] or [selectTyped] call. + * Alternatively it can be registered for SQL calls using [register]. + * + * @see org.apache.spark.sql.expressions.UserDefinedFunction + * @see NamedUserDefinedFunction11 + * @see udf + */ +open class UserDefinedFunction11( + override val udf: SparkUserDefinedFunction, + override val encoder: Encoder, +): UserDefinedFunction> { + + /** + * Allows this UDF to be called in typed manner using columns in a [Dataset.selectTyped] call. + * @see typedCol to create typed columns. + * @see org.apache.spark.sql.expressions.UserDefinedFunction.apply + */ + operator fun invoke(param0: TypedColumn, param1: TypedColumn, param2: TypedColumn, param3: TypedColumn, param4: TypedColumn, param5: TypedColumn, param6: TypedColumn, param7: TypedColumn, param8: TypedColumn, param9: TypedColumn, param10: TypedColumn): TypedColumn = invokeUntyped(*arrayOf(param0, param1, param2, param3, param4, param5, param6, param7, param8, param9, param10)).`as`(encoder) as TypedColumn + + /** + * Returns an expression that invokes the UDF in untyped manner, using the given arguments. + * @see org.apache.spark.sql.expressions.UserDefinedFunction.apply + */ + operator fun invoke(param0: Column, param1: Column, param2: Column, param3: Column, param4: Column, param5: Column, param6: Column, param7: Column, param8: Column, param9: Column, param10: Column): Column = invokeUntyped(*arrayOf(param0, param1, param2, param3, param4, param5, param6, param7, param8, param9, param10)) + + /** + * Returns an expression that invokes the UDF in untyped manner, using the given arguments. + * @see org.apache.spark.sql.expressions.UserDefinedFunction.apply + */ + fun invokeUntyped(param0: Column, param1: Column, param2: Column, param3: Column, param4: Column, param5: Column, param6: Column, param7: Column, param8: Column, param9: Column, param10: Column): Column = invokeUntyped(*arrayOf(param0, param1, param2, param3, param4, param5, param6, param7, param8, param9, param10)) + + /** Returns named variant of this UDF. */ + override fun withName(name: String): NamedUserDefinedFunction11 = NamedUserDefinedFunction11( + name = name, + udf = udf, + encoder = encoder, + ) + + /** + * Returns named variant of this UDF. + * @see withName + */ + override fun getValue(thisRef: Any?, property: KProperty<*>): NamedUserDefinedFunction11 = + withName(property.name) +} + +/** + * Instance of a UDF with 11 arguments with name. + * This UDF can be invoked with (typed) columns in a [Dataset.select] or [selectTyped] call. + * Alternatively it can be registered for SQL calls using [register]. + * + * @see org.apache.spark.sql.expressions.UserDefinedFunction + * @see UserDefinedFunction11 + * @see udf + */ +class NamedUserDefinedFunction11( + override val name: String, + udf: SparkUserDefinedFunction, + encoder: Encoder, +): NamedUserDefinedFunction>, + UserDefinedFunction11(udf = udf.withName(name), encoder = encoder) + +/** + * Creates a UDF ([NamedUserDefinedFunction11]) from a function reference adapting its name by reflection. + * For example: `val myUdf = udf(::myFunction)` + * @param func function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +inline fun udf( + func: KProperty0<(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11) -> R>, + nondeterministic: Boolean = false, +): NamedUserDefinedFunction11 = udf(func.name, func, nondeterministic) + +/** + * Creates a UDF ([NamedUserDefinedFunction11]) from a function reference. + * For example: `val myUdf = udf("myFunction", ::myFunction)` + * @param name Optional. Name for the UDF. + * @param func function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +inline fun udf( + name: String, + func: KProperty0<(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11) -> R>, + nondeterministic: Boolean = false, +): NamedUserDefinedFunction11 = udf(name, nondeterministic, func.get()) + +/** + * Creates and registers a UDF ([NamedUserDefinedFunction11]) from a function reference adapting its name by reflection. + * For example: `val myUdf = udf.register(::myFunction)` + * @param func function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +inline fun UDFRegistration.register( + func: KProperty0<(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11) -> R>, + nondeterministic: Boolean = false, +): NamedUserDefinedFunction11 = register(udf(func, nondeterministic)) + +/** + * Creates and registers a UDF ([NamedUserDefinedFunction11]) from a function reference. + * For example: `val myUdf = udf.register("myFunction", ::myFunction)` + * @param name Optional. Name for the UDF. + * @param func function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +inline fun UDFRegistration.register( + name: String, + func: KProperty0<(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11) -> R>, + nondeterministic: Boolean = false, +): NamedUserDefinedFunction11 = register(udf(name, func, nondeterministic)) + +/** + * Creates a UDF ([NamedUserDefinedFunction11]) from a function reference adapting its name by reflection. + * For example: `val myUdf = udf(::myFunction)` + * @param func function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +inline fun udf( + func: KFunction11, + nondeterministic: Boolean = false, +): NamedUserDefinedFunction11 = udf(func.name, func, nondeterministic) + +/** + * Creates a UDF ([NamedUserDefinedFunction11]) from a function reference. + * For example: `val myUdf = udf("myFunction", ::myFunction)` + * @param name Optional. Name for the UDF. + * @param func function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +inline fun udf( + name: String, + func: KFunction11, + nondeterministic: Boolean = false, +): NamedUserDefinedFunction11 = udf(name, nondeterministic, func) + +/** + * Creates and registers a UDF ([NamedUserDefinedFunction11]) from a function reference adapting its name by reflection. + * For example: `val myUdf = udf.register(::myFunction)` + * @param func function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +inline fun UDFRegistration.register( + func: KFunction11, + nondeterministic: Boolean = false, +): NamedUserDefinedFunction11 = register(udf(func, nondeterministic)) + +/** + * Creates and registers a UDF ([NamedUserDefinedFunction11]) from a function reference. + * For example: `val myUdf = udf.register("myFunction", ::myFunction)` + * @param name Optional. Name for the UDF. + * @param func function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +inline fun UDFRegistration.register( + name: String, + func: KFunction11, + nondeterministic: Boolean = false, +): NamedUserDefinedFunction11 = register(udf(name, func, nondeterministic)) + + +/** Kotlin wrapper around UDF interface to ensure nullability in types. */ +fun interface UDF11 : org.apache.spark.sql.api.java.UDF11 { override fun call(t1: T1, t2: T2, t3: T3, t4: T4, t5: T5, t6: T6, t7: T7, t8: T8, t9: T9, t10: T10, t11: T11): R } + +/** + * Defines a named UDF ([NamedUserDefinedFunction11]) instance based on the (lambda) function [func]. + * For example: `val myUdf = udf("myUdf") { t1: T1, t2: T2, t3: T3, t4: T4, t5: T5, t6: T6, t7: T7, t8: T8, t9: T9, t10: T10, t11: T11 -> ... }` + * Name can also be supplied using delegate: `val myUdf by udf { t1: T1, t2: T2, t3: T3, t4: T4, t5: T5, t6: T6, t7: T7, t8: T8, t9: T9, t10: T10, t11: T11 -> ... }` + * @see UserDefinedFunction.getValue + * + * @param name The name for this UDF. + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @param func The function to convert to a UDF. Can be a lambda. + */ +inline fun udf( + name: String, + nondeterministic: Boolean = false, + func: UDF11, +): NamedUserDefinedFunction11 = + udf(nondeterministic, func).withName(name) + +/** + * Defines a UDF ([UserDefinedFunction11]) instance based on the (lambda) function [func]. + * For example: `val myUdf = udf { t1: T1, t2: T2, t3: T3, t4: T4, t5: T5, t6: T6, t7: T7, t8: T8, t9: T9, t10: T10, t11: T11 -> ... }` + * + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @param func The function to convert to a UDF. Can be a lambda. + */ +inline fun udf( + nondeterministic: Boolean = false, + func: UDF11, +): UserDefinedFunction11 { + T1::class.checkForValidType("T1") + T2::class.checkForValidType("T2") + T3::class.checkForValidType("T3") + T4::class.checkForValidType("T4") + T5::class.checkForValidType("T5") + T6::class.checkForValidType("T6") + T7::class.checkForValidType("T7") + T8::class.checkForValidType("T8") + T9::class.checkForValidType("T9") + T10::class.checkForValidType("T10") + T11::class.checkForValidType("T11") + + return UserDefinedFunction11( + udf = functions.udf(func, schema(typeOf()).unWrap()) + .let { if (nondeterministic) it.asNondeterministic() else it } + .let { if (typeOf().isMarkedNullable) it else it.asNonNullable() }, + encoder = encoder(), + ) +} + +/** + * Defines and registers a named UDF ([NamedUserDefinedFunction11]) instance based on the (lambda) function [func]. + * For example: `val myUdf = udf.register("myUdf") { t1: T1, t2: T2, t3: T3, t4: T4, t5: T5, t6: T6, t7: T7, t8: T8, t9: T9, t10: T10, t11: T11 -> ... }` + * + * @param name The name for this UDF. + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @param func The function to convert to a UDF. Can be a lambda. + */ +inline fun UDFRegistration.register( + name: String, + nondeterministic: Boolean = false, + func: UDF11, +): NamedUserDefinedFunction11 = + register(udf(name, nondeterministic, func)) + +/** + * Instance of a UDF with 12 arguments. + * This UDF can be invoked with (typed) columns in a [Dataset.select] or [selectTyped] call. + * Alternatively it can be registered for SQL calls using [register]. + * + * @see org.apache.spark.sql.expressions.UserDefinedFunction + * @see NamedUserDefinedFunction12 + * @see udf + */ +open class UserDefinedFunction12( + override val udf: SparkUserDefinedFunction, + override val encoder: Encoder, +): UserDefinedFunction> { + + /** + * Allows this UDF to be called in typed manner using columns in a [Dataset.selectTyped] call. + * @see typedCol to create typed columns. + * @see org.apache.spark.sql.expressions.UserDefinedFunction.apply + */ + operator fun invoke(param0: TypedColumn, param1: TypedColumn, param2: TypedColumn, param3: TypedColumn, param4: TypedColumn, param5: TypedColumn, param6: TypedColumn, param7: TypedColumn, param8: TypedColumn, param9: TypedColumn, param10: TypedColumn, param11: TypedColumn): TypedColumn = invokeUntyped(*arrayOf(param0, param1, param2, param3, param4, param5, param6, param7, param8, param9, param10, param11)).`as`(encoder) as TypedColumn + + /** + * Returns an expression that invokes the UDF in untyped manner, using the given arguments. + * @see org.apache.spark.sql.expressions.UserDefinedFunction.apply + */ + operator fun invoke(param0: Column, param1: Column, param2: Column, param3: Column, param4: Column, param5: Column, param6: Column, param7: Column, param8: Column, param9: Column, param10: Column, param11: Column): Column = invokeUntyped(*arrayOf(param0, param1, param2, param3, param4, param5, param6, param7, param8, param9, param10, param11)) + + /** + * Returns an expression that invokes the UDF in untyped manner, using the given arguments. + * @see org.apache.spark.sql.expressions.UserDefinedFunction.apply + */ + fun invokeUntyped(param0: Column, param1: Column, param2: Column, param3: Column, param4: Column, param5: Column, param6: Column, param7: Column, param8: Column, param9: Column, param10: Column, param11: Column): Column = invokeUntyped(*arrayOf(param0, param1, param2, param3, param4, param5, param6, param7, param8, param9, param10, param11)) + + /** Returns named variant of this UDF. */ + override fun withName(name: String): NamedUserDefinedFunction12 = NamedUserDefinedFunction12( + name = name, + udf = udf, + encoder = encoder, + ) + + /** + * Returns named variant of this UDF. + * @see withName + */ + override fun getValue(thisRef: Any?, property: KProperty<*>): NamedUserDefinedFunction12 = + withName(property.name) +} + +/** + * Instance of a UDF with 12 arguments with name. + * This UDF can be invoked with (typed) columns in a [Dataset.select] or [selectTyped] call. + * Alternatively it can be registered for SQL calls using [register]. + * + * @see org.apache.spark.sql.expressions.UserDefinedFunction + * @see UserDefinedFunction12 + * @see udf + */ +class NamedUserDefinedFunction12( + override val name: String, + udf: SparkUserDefinedFunction, + encoder: Encoder, +): NamedUserDefinedFunction>, + UserDefinedFunction12(udf = udf.withName(name), encoder = encoder) + +/** + * Creates a UDF ([NamedUserDefinedFunction12]) from a function reference adapting its name by reflection. + * For example: `val myUdf = udf(::myFunction)` + * @param func function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +inline fun udf( + func: KProperty0<(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12) -> R>, + nondeterministic: Boolean = false, +): NamedUserDefinedFunction12 = udf(func.name, func, nondeterministic) + +/** + * Creates a UDF ([NamedUserDefinedFunction12]) from a function reference. + * For example: `val myUdf = udf("myFunction", ::myFunction)` + * @param name Optional. Name for the UDF. + * @param func function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +inline fun udf( + name: String, + func: KProperty0<(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12) -> R>, + nondeterministic: Boolean = false, +): NamedUserDefinedFunction12 = udf(name, nondeterministic, func.get()) + +/** + * Creates and registers a UDF ([NamedUserDefinedFunction12]) from a function reference adapting its name by reflection. + * For example: `val myUdf = udf.register(::myFunction)` + * @param func function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +inline fun UDFRegistration.register( + func: KProperty0<(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12) -> R>, + nondeterministic: Boolean = false, +): NamedUserDefinedFunction12 = register(udf(func, nondeterministic)) + +/** + * Creates and registers a UDF ([NamedUserDefinedFunction12]) from a function reference. + * For example: `val myUdf = udf.register("myFunction", ::myFunction)` + * @param name Optional. Name for the UDF. + * @param func function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +inline fun UDFRegistration.register( + name: String, + func: KProperty0<(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12) -> R>, + nondeterministic: Boolean = false, +): NamedUserDefinedFunction12 = register(udf(name, func, nondeterministic)) + +/** + * Creates a UDF ([NamedUserDefinedFunction12]) from a function reference adapting its name by reflection. + * For example: `val myUdf = udf(::myFunction)` + * @param func function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +inline fun udf( + func: KFunction12, + nondeterministic: Boolean = false, +): NamedUserDefinedFunction12 = udf(func.name, func, nondeterministic) + +/** + * Creates a UDF ([NamedUserDefinedFunction12]) from a function reference. + * For example: `val myUdf = udf("myFunction", ::myFunction)` + * @param name Optional. Name for the UDF. + * @param func function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +inline fun udf( + name: String, + func: KFunction12, + nondeterministic: Boolean = false, +): NamedUserDefinedFunction12 = udf(name, nondeterministic, func) + +/** + * Creates and registers a UDF ([NamedUserDefinedFunction12]) from a function reference adapting its name by reflection. + * For example: `val myUdf = udf.register(::myFunction)` + * @param func function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +inline fun UDFRegistration.register( + func: KFunction12, + nondeterministic: Boolean = false, +): NamedUserDefinedFunction12 = register(udf(func, nondeterministic)) + +/** + * Creates and registers a UDF ([NamedUserDefinedFunction12]) from a function reference. + * For example: `val myUdf = udf.register("myFunction", ::myFunction)` + * @param name Optional. Name for the UDF. + * @param func function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +inline fun UDFRegistration.register( + name: String, + func: KFunction12, + nondeterministic: Boolean = false, +): NamedUserDefinedFunction12 = register(udf(name, func, nondeterministic)) + + +/** Kotlin wrapper around UDF interface to ensure nullability in types. */ +fun interface UDF12 : org.apache.spark.sql.api.java.UDF12 { override fun call(t1: T1, t2: T2, t3: T3, t4: T4, t5: T5, t6: T6, t7: T7, t8: T8, t9: T9, t10: T10, t11: T11, t12: T12): R } + +/** + * Defines a named UDF ([NamedUserDefinedFunction12]) instance based on the (lambda) function [func]. + * For example: `val myUdf = udf("myUdf") { t1: T1, t2: T2, t3: T3, t4: T4, t5: T5, t6: T6, t7: T7, t8: T8, t9: T9, t10: T10, t11: T11, t12: T12 -> ... }` + * Name can also be supplied using delegate: `val myUdf by udf { t1: T1, t2: T2, t3: T3, t4: T4, t5: T5, t6: T6, t7: T7, t8: T8, t9: T9, t10: T10, t11: T11, t12: T12 -> ... }` + * @see UserDefinedFunction.getValue + * + * @param name The name for this UDF. + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @param func The function to convert to a UDF. Can be a lambda. + */ +inline fun udf( + name: String, + nondeterministic: Boolean = false, + func: UDF12, +): NamedUserDefinedFunction12 = + udf(nondeterministic, func).withName(name) + +/** + * Defines a UDF ([UserDefinedFunction12]) instance based on the (lambda) function [func]. + * For example: `val myUdf = udf { t1: T1, t2: T2, t3: T3, t4: T4, t5: T5, t6: T6, t7: T7, t8: T8, t9: T9, t10: T10, t11: T11, t12: T12 -> ... }` + * + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @param func The function to convert to a UDF. Can be a lambda. + */ +inline fun udf( + nondeterministic: Boolean = false, + func: UDF12, +): UserDefinedFunction12 { + T1::class.checkForValidType("T1") + T2::class.checkForValidType("T2") + T3::class.checkForValidType("T3") + T4::class.checkForValidType("T4") + T5::class.checkForValidType("T5") + T6::class.checkForValidType("T6") + T7::class.checkForValidType("T7") + T8::class.checkForValidType("T8") + T9::class.checkForValidType("T9") + T10::class.checkForValidType("T10") + T11::class.checkForValidType("T11") + T12::class.checkForValidType("T12") + + return UserDefinedFunction12( + udf = functions.udf(func, schema(typeOf()).unWrap()) + .let { if (nondeterministic) it.asNondeterministic() else it } + .let { if (typeOf().isMarkedNullable) it else it.asNonNullable() }, + encoder = encoder(), + ) +} + +/** + * Defines and registers a named UDF ([NamedUserDefinedFunction12]) instance based on the (lambda) function [func]. + * For example: `val myUdf = udf.register("myUdf") { t1: T1, t2: T2, t3: T3, t4: T4, t5: T5, t6: T6, t7: T7, t8: T8, t9: T9, t10: T10, t11: T11, t12: T12 -> ... }` + * + * @param name The name for this UDF. + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @param func The function to convert to a UDF. Can be a lambda. + */ +inline fun UDFRegistration.register( + name: String, + nondeterministic: Boolean = false, + func: UDF12, +): NamedUserDefinedFunction12 = + register(udf(name, nondeterministic, func)) + +/** + * Instance of a UDF with 13 arguments. + * This UDF can be invoked with (typed) columns in a [Dataset.select] or [selectTyped] call. + * Alternatively it can be registered for SQL calls using [register]. + * + * @see org.apache.spark.sql.expressions.UserDefinedFunction + * @see NamedUserDefinedFunction13 + * @see udf + */ +open class UserDefinedFunction13( + override val udf: SparkUserDefinedFunction, + override val encoder: Encoder, +): UserDefinedFunction> { + + /** + * Allows this UDF to be called in typed manner using columns in a [Dataset.selectTyped] call. + * @see typedCol to create typed columns. + * @see org.apache.spark.sql.expressions.UserDefinedFunction.apply + */ + operator fun invoke(param0: TypedColumn, param1: TypedColumn, param2: TypedColumn, param3: TypedColumn, param4: TypedColumn, param5: TypedColumn, param6: TypedColumn, param7: TypedColumn, param8: TypedColumn, param9: TypedColumn, param10: TypedColumn, param11: TypedColumn, param12: TypedColumn): TypedColumn = invokeUntyped(*arrayOf(param0, param1, param2, param3, param4, param5, param6, param7, param8, param9, param10, param11, param12)).`as`(encoder) as TypedColumn + + /** + * Returns an expression that invokes the UDF in untyped manner, using the given arguments. + * @see org.apache.spark.sql.expressions.UserDefinedFunction.apply + */ + operator fun invoke(param0: Column, param1: Column, param2: Column, param3: Column, param4: Column, param5: Column, param6: Column, param7: Column, param8: Column, param9: Column, param10: Column, param11: Column, param12: Column): Column = invokeUntyped(*arrayOf(param0, param1, param2, param3, param4, param5, param6, param7, param8, param9, param10, param11, param12)) + + /** + * Returns an expression that invokes the UDF in untyped manner, using the given arguments. + * @see org.apache.spark.sql.expressions.UserDefinedFunction.apply + */ + fun invokeUntyped(param0: Column, param1: Column, param2: Column, param3: Column, param4: Column, param5: Column, param6: Column, param7: Column, param8: Column, param9: Column, param10: Column, param11: Column, param12: Column): Column = invokeUntyped(*arrayOf(param0, param1, param2, param3, param4, param5, param6, param7, param8, param9, param10, param11, param12)) + + /** Returns named variant of this UDF. */ + override fun withName(name: String): NamedUserDefinedFunction13 = NamedUserDefinedFunction13( + name = name, + udf = udf, + encoder = encoder, + ) + + /** + * Returns named variant of this UDF. + * @see withName + */ + override fun getValue(thisRef: Any?, property: KProperty<*>): NamedUserDefinedFunction13 = + withName(property.name) +} + +/** + * Instance of a UDF with 13 arguments with name. + * This UDF can be invoked with (typed) columns in a [Dataset.select] or [selectTyped] call. + * Alternatively it can be registered for SQL calls using [register]. + * + * @see org.apache.spark.sql.expressions.UserDefinedFunction + * @see UserDefinedFunction13 + * @see udf + */ +class NamedUserDefinedFunction13( + override val name: String, + udf: SparkUserDefinedFunction, + encoder: Encoder, +): NamedUserDefinedFunction>, + UserDefinedFunction13(udf = udf.withName(name), encoder = encoder) + +/** + * Creates a UDF ([NamedUserDefinedFunction13]) from a function reference adapting its name by reflection. + * For example: `val myUdf = udf(::myFunction)` + * @param func function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +inline fun udf( + func: KProperty0<(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13) -> R>, + nondeterministic: Boolean = false, +): NamedUserDefinedFunction13 = udf(func.name, func, nondeterministic) + +/** + * Creates a UDF ([NamedUserDefinedFunction13]) from a function reference. + * For example: `val myUdf = udf("myFunction", ::myFunction)` + * @param name Optional. Name for the UDF. + * @param func function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +inline fun udf( + name: String, + func: KProperty0<(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13) -> R>, + nondeterministic: Boolean = false, +): NamedUserDefinedFunction13 = udf(name, nondeterministic, func.get()) + +/** + * Creates and registers a UDF ([NamedUserDefinedFunction13]) from a function reference adapting its name by reflection. + * For example: `val myUdf = udf.register(::myFunction)` + * @param func function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +inline fun UDFRegistration.register( + func: KProperty0<(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13) -> R>, + nondeterministic: Boolean = false, +): NamedUserDefinedFunction13 = register(udf(func, nondeterministic)) + +/** + * Creates and registers a UDF ([NamedUserDefinedFunction13]) from a function reference. + * For example: `val myUdf = udf.register("myFunction", ::myFunction)` + * @param name Optional. Name for the UDF. + * @param func function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +inline fun UDFRegistration.register( + name: String, + func: KProperty0<(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13) -> R>, + nondeterministic: Boolean = false, +): NamedUserDefinedFunction13 = register(udf(name, func, nondeterministic)) + +/** + * Creates a UDF ([NamedUserDefinedFunction13]) from a function reference adapting its name by reflection. + * For example: `val myUdf = udf(::myFunction)` + * @param func function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +inline fun udf( + func: KFunction13, + nondeterministic: Boolean = false, +): NamedUserDefinedFunction13 = udf(func.name, func, nondeterministic) + +/** + * Creates a UDF ([NamedUserDefinedFunction13]) from a function reference. + * For example: `val myUdf = udf("myFunction", ::myFunction)` + * @param name Optional. Name for the UDF. + * @param func function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +inline fun udf( + name: String, + func: KFunction13, + nondeterministic: Boolean = false, +): NamedUserDefinedFunction13 = udf(name, nondeterministic, func) + +/** + * Creates and registers a UDF ([NamedUserDefinedFunction13]) from a function reference adapting its name by reflection. + * For example: `val myUdf = udf.register(::myFunction)` + * @param func function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +inline fun UDFRegistration.register( + func: KFunction13, + nondeterministic: Boolean = false, +): NamedUserDefinedFunction13 = register(udf(func, nondeterministic)) + +/** + * Creates and registers a UDF ([NamedUserDefinedFunction13]) from a function reference. + * For example: `val myUdf = udf.register("myFunction", ::myFunction)` + * @param name Optional. Name for the UDF. + * @param func function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +inline fun UDFRegistration.register( + name: String, + func: KFunction13, + nondeterministic: Boolean = false, +): NamedUserDefinedFunction13 = register(udf(name, func, nondeterministic)) + + +/** Kotlin wrapper around UDF interface to ensure nullability in types. */ +fun interface UDF13 : org.apache.spark.sql.api.java.UDF13 { override fun call(t1: T1, t2: T2, t3: T3, t4: T4, t5: T5, t6: T6, t7: T7, t8: T8, t9: T9, t10: T10, t11: T11, t12: T12, t13: T13): R } + +/** + * Defines a named UDF ([NamedUserDefinedFunction13]) instance based on the (lambda) function [func]. + * For example: `val myUdf = udf("myUdf") { t1: T1, t2: T2, t3: T3, t4: T4, t5: T5, t6: T6, t7: T7, t8: T8, t9: T9, t10: T10, t11: T11, t12: T12, t13: T13 -> ... }` + * Name can also be supplied using delegate: `val myUdf by udf { t1: T1, t2: T2, t3: T3, t4: T4, t5: T5, t6: T6, t7: T7, t8: T8, t9: T9, t10: T10, t11: T11, t12: T12, t13: T13 -> ... }` + * @see UserDefinedFunction.getValue + * + * @param name The name for this UDF. + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @param func The function to convert to a UDF. Can be a lambda. + */ +inline fun udf( + name: String, + nondeterministic: Boolean = false, + func: UDF13, +): NamedUserDefinedFunction13 = + udf(nondeterministic, func).withName(name) + +/** + * Defines a UDF ([UserDefinedFunction13]) instance based on the (lambda) function [func]. + * For example: `val myUdf = udf { t1: T1, t2: T2, t3: T3, t4: T4, t5: T5, t6: T6, t7: T7, t8: T8, t9: T9, t10: T10, t11: T11, t12: T12, t13: T13 -> ... }` + * + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @param func The function to convert to a UDF. Can be a lambda. + */ +inline fun udf( + nondeterministic: Boolean = false, + func: UDF13, +): UserDefinedFunction13 { + T1::class.checkForValidType("T1") + T2::class.checkForValidType("T2") + T3::class.checkForValidType("T3") + T4::class.checkForValidType("T4") + T5::class.checkForValidType("T5") + T6::class.checkForValidType("T6") + T7::class.checkForValidType("T7") + T8::class.checkForValidType("T8") + T9::class.checkForValidType("T9") + T10::class.checkForValidType("T10") + T11::class.checkForValidType("T11") + T12::class.checkForValidType("T12") + T13::class.checkForValidType("T13") + + return UserDefinedFunction13( + udf = functions.udf(func, schema(typeOf()).unWrap()) + .let { if (nondeterministic) it.asNondeterministic() else it } + .let { if (typeOf().isMarkedNullable) it else it.asNonNullable() }, + encoder = encoder(), + ) +} + +/** + * Defines and registers a named UDF ([NamedUserDefinedFunction13]) instance based on the (lambda) function [func]. + * For example: `val myUdf = udf.register("myUdf") { t1: T1, t2: T2, t3: T3, t4: T4, t5: T5, t6: T6, t7: T7, t8: T8, t9: T9, t10: T10, t11: T11, t12: T12, t13: T13 -> ... }` + * + * @param name The name for this UDF. + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @param func The function to convert to a UDF. Can be a lambda. + */ +inline fun UDFRegistration.register( + name: String, + nondeterministic: Boolean = false, + func: UDF13, +): NamedUserDefinedFunction13 = + register(udf(name, nondeterministic, func)) + +/** + * Instance of a UDF with 14 arguments. + * This UDF can be invoked with (typed) columns in a [Dataset.select] or [selectTyped] call. + * Alternatively it can be registered for SQL calls using [register]. + * + * @see org.apache.spark.sql.expressions.UserDefinedFunction + * @see NamedUserDefinedFunction14 + * @see udf + */ +open class UserDefinedFunction14( + override val udf: SparkUserDefinedFunction, + override val encoder: Encoder, +): UserDefinedFunction> { + + /** + * Allows this UDF to be called in typed manner using columns in a [Dataset.selectTyped] call. + * @see typedCol to create typed columns. + * @see org.apache.spark.sql.expressions.UserDefinedFunction.apply + */ + operator fun invoke(param0: TypedColumn, param1: TypedColumn, param2: TypedColumn, param3: TypedColumn, param4: TypedColumn, param5: TypedColumn, param6: TypedColumn, param7: TypedColumn, param8: TypedColumn, param9: TypedColumn, param10: TypedColumn, param11: TypedColumn, param12: TypedColumn, param13: TypedColumn): TypedColumn = invokeUntyped(*arrayOf(param0, param1, param2, param3, param4, param5, param6, param7, param8, param9, param10, param11, param12, param13)).`as`(encoder) as TypedColumn + + /** + * Returns an expression that invokes the UDF in untyped manner, using the given arguments. + * @see org.apache.spark.sql.expressions.UserDefinedFunction.apply + */ + operator fun invoke(param0: Column, param1: Column, param2: Column, param3: Column, param4: Column, param5: Column, param6: Column, param7: Column, param8: Column, param9: Column, param10: Column, param11: Column, param12: Column, param13: Column): Column = invokeUntyped(*arrayOf(param0, param1, param2, param3, param4, param5, param6, param7, param8, param9, param10, param11, param12, param13)) + + /** + * Returns an expression that invokes the UDF in untyped manner, using the given arguments. + * @see org.apache.spark.sql.expressions.UserDefinedFunction.apply + */ + fun invokeUntyped(param0: Column, param1: Column, param2: Column, param3: Column, param4: Column, param5: Column, param6: Column, param7: Column, param8: Column, param9: Column, param10: Column, param11: Column, param12: Column, param13: Column): Column = invokeUntyped(*arrayOf(param0, param1, param2, param3, param4, param5, param6, param7, param8, param9, param10, param11, param12, param13)) + + /** Returns named variant of this UDF. */ + override fun withName(name: String): NamedUserDefinedFunction14 = NamedUserDefinedFunction14( + name = name, + udf = udf, + encoder = encoder, + ) + + /** + * Returns named variant of this UDF. + * @see withName + */ + override fun getValue(thisRef: Any?, property: KProperty<*>): NamedUserDefinedFunction14 = + withName(property.name) +} + +/** + * Instance of a UDF with 14 arguments with name. + * This UDF can be invoked with (typed) columns in a [Dataset.select] or [selectTyped] call. + * Alternatively it can be registered for SQL calls using [register]. + * + * @see org.apache.spark.sql.expressions.UserDefinedFunction + * @see UserDefinedFunction14 + * @see udf + */ +class NamedUserDefinedFunction14( + override val name: String, + udf: SparkUserDefinedFunction, + encoder: Encoder, +): NamedUserDefinedFunction>, + UserDefinedFunction14(udf = udf.withName(name), encoder = encoder) + +/** + * Creates a UDF ([NamedUserDefinedFunction14]) from a function reference adapting its name by reflection. + * For example: `val myUdf = udf(::myFunction)` + * @param func function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +inline fun udf( + func: KProperty0<(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14) -> R>, + nondeterministic: Boolean = false, +): NamedUserDefinedFunction14 = udf(func.name, func, nondeterministic) + +/** + * Creates a UDF ([NamedUserDefinedFunction14]) from a function reference. + * For example: `val myUdf = udf("myFunction", ::myFunction)` + * @param name Optional. Name for the UDF. + * @param func function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +inline fun udf( + name: String, + func: KProperty0<(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14) -> R>, + nondeterministic: Boolean = false, +): NamedUserDefinedFunction14 = udf(name, nondeterministic, func.get()) + +/** + * Creates and registers a UDF ([NamedUserDefinedFunction14]) from a function reference adapting its name by reflection. + * For example: `val myUdf = udf.register(::myFunction)` + * @param func function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +inline fun UDFRegistration.register( + func: KProperty0<(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14) -> R>, + nondeterministic: Boolean = false, +): NamedUserDefinedFunction14 = register(udf(func, nondeterministic)) + +/** + * Creates and registers a UDF ([NamedUserDefinedFunction14]) from a function reference. + * For example: `val myUdf = udf.register("myFunction", ::myFunction)` + * @param name Optional. Name for the UDF. + * @param func function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +inline fun UDFRegistration.register( + name: String, + func: KProperty0<(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14) -> R>, + nondeterministic: Boolean = false, +): NamedUserDefinedFunction14 = register(udf(name, func, nondeterministic)) + +/** + * Creates a UDF ([NamedUserDefinedFunction14]) from a function reference adapting its name by reflection. + * For example: `val myUdf = udf(::myFunction)` + * @param func function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +inline fun udf( + func: KFunction14, + nondeterministic: Boolean = false, +): NamedUserDefinedFunction14 = udf(func.name, func, nondeterministic) + +/** + * Creates a UDF ([NamedUserDefinedFunction14]) from a function reference. + * For example: `val myUdf = udf("myFunction", ::myFunction)` + * @param name Optional. Name for the UDF. + * @param func function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +inline fun udf( + name: String, + func: KFunction14, + nondeterministic: Boolean = false, +): NamedUserDefinedFunction14 = udf(name, nondeterministic, func) + +/** + * Creates and registers a UDF ([NamedUserDefinedFunction14]) from a function reference adapting its name by reflection. + * For example: `val myUdf = udf.register(::myFunction)` + * @param func function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +inline fun UDFRegistration.register( + func: KFunction14, + nondeterministic: Boolean = false, +): NamedUserDefinedFunction14 = register(udf(func, nondeterministic)) + +/** + * Creates and registers a UDF ([NamedUserDefinedFunction14]) from a function reference. + * For example: `val myUdf = udf.register("myFunction", ::myFunction)` + * @param name Optional. Name for the UDF. + * @param func function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +inline fun UDFRegistration.register( + name: String, + func: KFunction14, + nondeterministic: Boolean = false, +): NamedUserDefinedFunction14 = register(udf(name, func, nondeterministic)) + + +/** Kotlin wrapper around UDF interface to ensure nullability in types. */ +fun interface UDF14 : org.apache.spark.sql.api.java.UDF14 { override fun call(t1: T1, t2: T2, t3: T3, t4: T4, t5: T5, t6: T6, t7: T7, t8: T8, t9: T9, t10: T10, t11: T11, t12: T12, t13: T13, t14: T14): R } + +/** + * Defines a named UDF ([NamedUserDefinedFunction14]) instance based on the (lambda) function [func]. + * For example: `val myUdf = udf("myUdf") { t1: T1, t2: T2, t3: T3, t4: T4, t5: T5, t6: T6, t7: T7, t8: T8, t9: T9, t10: T10, t11: T11, t12: T12, t13: T13, t14: T14 -> ... }` + * Name can also be supplied using delegate: `val myUdf by udf { t1: T1, t2: T2, t3: T3, t4: T4, t5: T5, t6: T6, t7: T7, t8: T8, t9: T9, t10: T10, t11: T11, t12: T12, t13: T13, t14: T14 -> ... }` + * @see UserDefinedFunction.getValue + * + * @param name The name for this UDF. + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @param func The function to convert to a UDF. Can be a lambda. + */ +inline fun udf( + name: String, + nondeterministic: Boolean = false, + func: UDF14, +): NamedUserDefinedFunction14 = + udf(nondeterministic, func).withName(name) + +/** + * Defines a UDF ([UserDefinedFunction14]) instance based on the (lambda) function [func]. + * For example: `val myUdf = udf { t1: T1, t2: T2, t3: T3, t4: T4, t5: T5, t6: T6, t7: T7, t8: T8, t9: T9, t10: T10, t11: T11, t12: T12, t13: T13, t14: T14 -> ... }` + * + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @param func The function to convert to a UDF. Can be a lambda. + */ +inline fun udf( + nondeterministic: Boolean = false, + func: UDF14, +): UserDefinedFunction14 { + T1::class.checkForValidType("T1") + T2::class.checkForValidType("T2") + T3::class.checkForValidType("T3") + T4::class.checkForValidType("T4") + T5::class.checkForValidType("T5") + T6::class.checkForValidType("T6") + T7::class.checkForValidType("T7") + T8::class.checkForValidType("T8") + T9::class.checkForValidType("T9") + T10::class.checkForValidType("T10") + T11::class.checkForValidType("T11") + T12::class.checkForValidType("T12") + T13::class.checkForValidType("T13") + T14::class.checkForValidType("T14") + + return UserDefinedFunction14( + udf = functions.udf(func, schema(typeOf()).unWrap()) + .let { if (nondeterministic) it.asNondeterministic() else it } + .let { if (typeOf().isMarkedNullable) it else it.asNonNullable() }, + encoder = encoder(), + ) +} + +/** + * Defines and registers a named UDF ([NamedUserDefinedFunction14]) instance based on the (lambda) function [func]. + * For example: `val myUdf = udf.register("myUdf") { t1: T1, t2: T2, t3: T3, t4: T4, t5: T5, t6: T6, t7: T7, t8: T8, t9: T9, t10: T10, t11: T11, t12: T12, t13: T13, t14: T14 -> ... }` + * + * @param name The name for this UDF. + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @param func The function to convert to a UDF. Can be a lambda. + */ +inline fun UDFRegistration.register( + name: String, + nondeterministic: Boolean = false, + func: UDF14, +): NamedUserDefinedFunction14 = + register(udf(name, nondeterministic, func)) + +/** + * Instance of a UDF with 15 arguments. + * This UDF can be invoked with (typed) columns in a [Dataset.select] or [selectTyped] call. + * Alternatively it can be registered for SQL calls using [register]. + * + * @see org.apache.spark.sql.expressions.UserDefinedFunction + * @see NamedUserDefinedFunction15 + * @see udf + */ +open class UserDefinedFunction15( + override val udf: SparkUserDefinedFunction, + override val encoder: Encoder, +): UserDefinedFunction> { + + /** + * Allows this UDF to be called in typed manner using columns in a [Dataset.selectTyped] call. + * @see typedCol to create typed columns. + * @see org.apache.spark.sql.expressions.UserDefinedFunction.apply + */ + operator fun invoke(param0: TypedColumn, param1: TypedColumn, param2: TypedColumn, param3: TypedColumn, param4: TypedColumn, param5: TypedColumn, param6: TypedColumn, param7: TypedColumn, param8: TypedColumn, param9: TypedColumn, param10: TypedColumn, param11: TypedColumn, param12: TypedColumn, param13: TypedColumn, param14: TypedColumn): TypedColumn = invokeUntyped(*arrayOf(param0, param1, param2, param3, param4, param5, param6, param7, param8, param9, param10, param11, param12, param13, param14)).`as`(encoder) as TypedColumn + + /** + * Returns an expression that invokes the UDF in untyped manner, using the given arguments. + * @see org.apache.spark.sql.expressions.UserDefinedFunction.apply + */ + operator fun invoke(param0: Column, param1: Column, param2: Column, param3: Column, param4: Column, param5: Column, param6: Column, param7: Column, param8: Column, param9: Column, param10: Column, param11: Column, param12: Column, param13: Column, param14: Column): Column = invokeUntyped(*arrayOf(param0, param1, param2, param3, param4, param5, param6, param7, param8, param9, param10, param11, param12, param13, param14)) + + /** + * Returns an expression that invokes the UDF in untyped manner, using the given arguments. + * @see org.apache.spark.sql.expressions.UserDefinedFunction.apply + */ + fun invokeUntyped(param0: Column, param1: Column, param2: Column, param3: Column, param4: Column, param5: Column, param6: Column, param7: Column, param8: Column, param9: Column, param10: Column, param11: Column, param12: Column, param13: Column, param14: Column): Column = invokeUntyped(*arrayOf(param0, param1, param2, param3, param4, param5, param6, param7, param8, param9, param10, param11, param12, param13, param14)) + + /** Returns named variant of this UDF. */ + override fun withName(name: String): NamedUserDefinedFunction15 = NamedUserDefinedFunction15( + name = name, + udf = udf, + encoder = encoder, + ) + + /** + * Returns named variant of this UDF. + * @see withName + */ + override fun getValue(thisRef: Any?, property: KProperty<*>): NamedUserDefinedFunction15 = + withName(property.name) +} + +/** + * Instance of a UDF with 15 arguments with name. + * This UDF can be invoked with (typed) columns in a [Dataset.select] or [selectTyped] call. + * Alternatively it can be registered for SQL calls using [register]. + * + * @see org.apache.spark.sql.expressions.UserDefinedFunction + * @see UserDefinedFunction15 + * @see udf + */ +class NamedUserDefinedFunction15( + override val name: String, + udf: SparkUserDefinedFunction, + encoder: Encoder, +): NamedUserDefinedFunction>, + UserDefinedFunction15(udf = udf.withName(name), encoder = encoder) + +/** + * Creates a UDF ([NamedUserDefinedFunction15]) from a function reference adapting its name by reflection. + * For example: `val myUdf = udf(::myFunction)` + * @param func function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +inline fun udf( + func: KProperty0<(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15) -> R>, + nondeterministic: Boolean = false, +): NamedUserDefinedFunction15 = udf(func.name, func, nondeterministic) + +/** + * Creates a UDF ([NamedUserDefinedFunction15]) from a function reference. + * For example: `val myUdf = udf("myFunction", ::myFunction)` + * @param name Optional. Name for the UDF. + * @param func function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +inline fun udf( + name: String, + func: KProperty0<(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15) -> R>, + nondeterministic: Boolean = false, +): NamedUserDefinedFunction15 = udf(name, nondeterministic, func.get()) + +/** + * Creates and registers a UDF ([NamedUserDefinedFunction15]) from a function reference adapting its name by reflection. + * For example: `val myUdf = udf.register(::myFunction)` + * @param func function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +inline fun UDFRegistration.register( + func: KProperty0<(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15) -> R>, + nondeterministic: Boolean = false, +): NamedUserDefinedFunction15 = register(udf(func, nondeterministic)) + +/** + * Creates and registers a UDF ([NamedUserDefinedFunction15]) from a function reference. + * For example: `val myUdf = udf.register("myFunction", ::myFunction)` + * @param name Optional. Name for the UDF. + * @param func function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +inline fun UDFRegistration.register( + name: String, + func: KProperty0<(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15) -> R>, + nondeterministic: Boolean = false, +): NamedUserDefinedFunction15 = register(udf(name, func, nondeterministic)) + +/** + * Creates a UDF ([NamedUserDefinedFunction15]) from a function reference adapting its name by reflection. + * For example: `val myUdf = udf(::myFunction)` + * @param func function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +inline fun udf( + func: KFunction15, + nondeterministic: Boolean = false, +): NamedUserDefinedFunction15 = udf(func.name, func, nondeterministic) + +/** + * Creates a UDF ([NamedUserDefinedFunction15]) from a function reference. + * For example: `val myUdf = udf("myFunction", ::myFunction)` + * @param name Optional. Name for the UDF. + * @param func function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +inline fun udf( + name: String, + func: KFunction15, + nondeterministic: Boolean = false, +): NamedUserDefinedFunction15 = udf(name, nondeterministic, func) + +/** + * Creates and registers a UDF ([NamedUserDefinedFunction15]) from a function reference adapting its name by reflection. + * For example: `val myUdf = udf.register(::myFunction)` + * @param func function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +inline fun UDFRegistration.register( + func: KFunction15, + nondeterministic: Boolean = false, +): NamedUserDefinedFunction15 = register(udf(func, nondeterministic)) + +/** + * Creates and registers a UDF ([NamedUserDefinedFunction15]) from a function reference. + * For example: `val myUdf = udf.register("myFunction", ::myFunction)` + * @param name Optional. Name for the UDF. + * @param func function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +inline fun UDFRegistration.register( + name: String, + func: KFunction15, + nondeterministic: Boolean = false, +): NamedUserDefinedFunction15 = register(udf(name, func, nondeterministic)) + + +/** Kotlin wrapper around UDF interface to ensure nullability in types. */ +fun interface UDF15 : org.apache.spark.sql.api.java.UDF15 { override fun call(t1: T1, t2: T2, t3: T3, t4: T4, t5: T5, t6: T6, t7: T7, t8: T8, t9: T9, t10: T10, t11: T11, t12: T12, t13: T13, t14: T14, t15: T15): R } + +/** + * Defines a named UDF ([NamedUserDefinedFunction15]) instance based on the (lambda) function [func]. + * For example: `val myUdf = udf("myUdf") { t1: T1, t2: T2, t3: T3, t4: T4, t5: T5, t6: T6, t7: T7, t8: T8, t9: T9, t10: T10, t11: T11, t12: T12, t13: T13, t14: T14, t15: T15 -> ... }` + * Name can also be supplied using delegate: `val myUdf by udf { t1: T1, t2: T2, t3: T3, t4: T4, t5: T5, t6: T6, t7: T7, t8: T8, t9: T9, t10: T10, t11: T11, t12: T12, t13: T13, t14: T14, t15: T15 -> ... }` + * @see UserDefinedFunction.getValue + * + * @param name The name for this UDF. + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @param func The function to convert to a UDF. Can be a lambda. + */ +inline fun udf( + name: String, + nondeterministic: Boolean = false, + func: UDF15, +): NamedUserDefinedFunction15 = + udf(nondeterministic, func).withName(name) + +/** + * Defines a UDF ([UserDefinedFunction15]) instance based on the (lambda) function [func]. + * For example: `val myUdf = udf { t1: T1, t2: T2, t3: T3, t4: T4, t5: T5, t6: T6, t7: T7, t8: T8, t9: T9, t10: T10, t11: T11, t12: T12, t13: T13, t14: T14, t15: T15 -> ... }` + * + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @param func The function to convert to a UDF. Can be a lambda. + */ +inline fun udf( + nondeterministic: Boolean = false, + func: UDF15, +): UserDefinedFunction15 { + T1::class.checkForValidType("T1") + T2::class.checkForValidType("T2") + T3::class.checkForValidType("T3") + T4::class.checkForValidType("T4") + T5::class.checkForValidType("T5") + T6::class.checkForValidType("T6") + T7::class.checkForValidType("T7") + T8::class.checkForValidType("T8") + T9::class.checkForValidType("T9") + T10::class.checkForValidType("T10") + T11::class.checkForValidType("T11") + T12::class.checkForValidType("T12") + T13::class.checkForValidType("T13") + T14::class.checkForValidType("T14") + T15::class.checkForValidType("T15") + + return UserDefinedFunction15( + udf = functions.udf(func, schema(typeOf()).unWrap()) + .let { if (nondeterministic) it.asNondeterministic() else it } + .let { if (typeOf().isMarkedNullable) it else it.asNonNullable() }, + encoder = encoder(), + ) +} + +/** + * Defines and registers a named UDF ([NamedUserDefinedFunction15]) instance based on the (lambda) function [func]. + * For example: `val myUdf = udf.register("myUdf") { t1: T1, t2: T2, t3: T3, t4: T4, t5: T5, t6: T6, t7: T7, t8: T8, t9: T9, t10: T10, t11: T11, t12: T12, t13: T13, t14: T14, t15: T15 -> ... }` + * + * @param name The name for this UDF. + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @param func The function to convert to a UDF. Can be a lambda. + */ +inline fun UDFRegistration.register( + name: String, + nondeterministic: Boolean = false, + func: UDF15, +): NamedUserDefinedFunction15 = + register(udf(name, nondeterministic, func)) + +/** + * Instance of a UDF with 16 arguments. + * This UDF can be invoked with (typed) columns in a [Dataset.select] or [selectTyped] call. + * Alternatively it can be registered for SQL calls using [register]. + * + * @see org.apache.spark.sql.expressions.UserDefinedFunction + * @see NamedUserDefinedFunction16 + * @see udf + */ +open class UserDefinedFunction16( + override val udf: SparkUserDefinedFunction, + override val encoder: Encoder, +): UserDefinedFunction> { + + /** + * Allows this UDF to be called in typed manner using columns in a [Dataset.selectTyped] call. + * @see typedCol to create typed columns. + * @see org.apache.spark.sql.expressions.UserDefinedFunction.apply + */ + operator fun invoke(param0: TypedColumn, param1: TypedColumn, param2: TypedColumn, param3: TypedColumn, param4: TypedColumn, param5: TypedColumn, param6: TypedColumn, param7: TypedColumn, param8: TypedColumn, param9: TypedColumn, param10: TypedColumn, param11: TypedColumn, param12: TypedColumn, param13: TypedColumn, param14: TypedColumn, param15: TypedColumn): TypedColumn = invokeUntyped(*arrayOf(param0, param1, param2, param3, param4, param5, param6, param7, param8, param9, param10, param11, param12, param13, param14, param15)).`as`(encoder) as TypedColumn + + /** + * Returns an expression that invokes the UDF in untyped manner, using the given arguments. + * @see org.apache.spark.sql.expressions.UserDefinedFunction.apply + */ + operator fun invoke(param0: Column, param1: Column, param2: Column, param3: Column, param4: Column, param5: Column, param6: Column, param7: Column, param8: Column, param9: Column, param10: Column, param11: Column, param12: Column, param13: Column, param14: Column, param15: Column): Column = invokeUntyped(*arrayOf(param0, param1, param2, param3, param4, param5, param6, param7, param8, param9, param10, param11, param12, param13, param14, param15)) + + /** + * Returns an expression that invokes the UDF in untyped manner, using the given arguments. + * @see org.apache.spark.sql.expressions.UserDefinedFunction.apply + */ + fun invokeUntyped(param0: Column, param1: Column, param2: Column, param3: Column, param4: Column, param5: Column, param6: Column, param7: Column, param8: Column, param9: Column, param10: Column, param11: Column, param12: Column, param13: Column, param14: Column, param15: Column): Column = invokeUntyped(*arrayOf(param0, param1, param2, param3, param4, param5, param6, param7, param8, param9, param10, param11, param12, param13, param14, param15)) + + /** Returns named variant of this UDF. */ + override fun withName(name: String): NamedUserDefinedFunction16 = NamedUserDefinedFunction16( + name = name, + udf = udf, + encoder = encoder, + ) + + /** + * Returns named variant of this UDF. + * @see withName + */ + override fun getValue(thisRef: Any?, property: KProperty<*>): NamedUserDefinedFunction16 = + withName(property.name) +} + +/** + * Instance of a UDF with 16 arguments with name. + * This UDF can be invoked with (typed) columns in a [Dataset.select] or [selectTyped] call. + * Alternatively it can be registered for SQL calls using [register]. + * + * @see org.apache.spark.sql.expressions.UserDefinedFunction + * @see UserDefinedFunction16 + * @see udf + */ +class NamedUserDefinedFunction16( + override val name: String, + udf: SparkUserDefinedFunction, + encoder: Encoder, +): NamedUserDefinedFunction>, + UserDefinedFunction16(udf = udf.withName(name), encoder = encoder) + +/** + * Creates a UDF ([NamedUserDefinedFunction16]) from a function reference adapting its name by reflection. + * For example: `val myUdf = udf(::myFunction)` + * @param func function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +inline fun udf( + func: KProperty0<(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16) -> R>, + nondeterministic: Boolean = false, +): NamedUserDefinedFunction16 = udf(func.name, func, nondeterministic) + +/** + * Creates a UDF ([NamedUserDefinedFunction16]) from a function reference. + * For example: `val myUdf = udf("myFunction", ::myFunction)` + * @param name Optional. Name for the UDF. + * @param func function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +inline fun udf( + name: String, + func: KProperty0<(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16) -> R>, + nondeterministic: Boolean = false, +): NamedUserDefinedFunction16 = udf(name, nondeterministic, func.get()) + +/** + * Creates and registers a UDF ([NamedUserDefinedFunction16]) from a function reference adapting its name by reflection. + * For example: `val myUdf = udf.register(::myFunction)` + * @param func function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +inline fun UDFRegistration.register( + func: KProperty0<(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16) -> R>, + nondeterministic: Boolean = false, +): NamedUserDefinedFunction16 = register(udf(func, nondeterministic)) + +/** + * Creates and registers a UDF ([NamedUserDefinedFunction16]) from a function reference. + * For example: `val myUdf = udf.register("myFunction", ::myFunction)` + * @param name Optional. Name for the UDF. + * @param func function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +inline fun UDFRegistration.register( + name: String, + func: KProperty0<(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16) -> R>, + nondeterministic: Boolean = false, +): NamedUserDefinedFunction16 = register(udf(name, func, nondeterministic)) + +/** + * Creates a UDF ([NamedUserDefinedFunction16]) from a function reference adapting its name by reflection. + * For example: `val myUdf = udf(::myFunction)` + * @param func function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +inline fun udf( + func: KFunction16, + nondeterministic: Boolean = false, +): NamedUserDefinedFunction16 = udf(func.name, func, nondeterministic) + +/** + * Creates a UDF ([NamedUserDefinedFunction16]) from a function reference. + * For example: `val myUdf = udf("myFunction", ::myFunction)` + * @param name Optional. Name for the UDF. + * @param func function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +inline fun udf( + name: String, + func: KFunction16, + nondeterministic: Boolean = false, +): NamedUserDefinedFunction16 = udf(name, nondeterministic, func) + +/** + * Creates and registers a UDF ([NamedUserDefinedFunction16]) from a function reference adapting its name by reflection. + * For example: `val myUdf = udf.register(::myFunction)` + * @param func function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +inline fun UDFRegistration.register( + func: KFunction16, + nondeterministic: Boolean = false, +): NamedUserDefinedFunction16 = register(udf(func, nondeterministic)) + +/** + * Creates and registers a UDF ([NamedUserDefinedFunction16]) from a function reference. + * For example: `val myUdf = udf.register("myFunction", ::myFunction)` + * @param name Optional. Name for the UDF. + * @param func function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +inline fun UDFRegistration.register( + name: String, + func: KFunction16, + nondeterministic: Boolean = false, +): NamedUserDefinedFunction16 = register(udf(name, func, nondeterministic)) + + +/** Kotlin wrapper around UDF interface to ensure nullability in types. */ +fun interface UDF16 : org.apache.spark.sql.api.java.UDF16 { override fun call(t1: T1, t2: T2, t3: T3, t4: T4, t5: T5, t6: T6, t7: T7, t8: T8, t9: T9, t10: T10, t11: T11, t12: T12, t13: T13, t14: T14, t15: T15, t16: T16): R } + +/** + * Defines a named UDF ([NamedUserDefinedFunction16]) instance based on the (lambda) function [func]. + * For example: `val myUdf = udf("myUdf") { t1: T1, t2: T2, t3: T3, t4: T4, t5: T5, t6: T6, t7: T7, t8: T8, t9: T9, t10: T10, t11: T11, t12: T12, t13: T13, t14: T14, t15: T15, t16: T16 -> ... }` + * Name can also be supplied using delegate: `val myUdf by udf { t1: T1, t2: T2, t3: T3, t4: T4, t5: T5, t6: T6, t7: T7, t8: T8, t9: T9, t10: T10, t11: T11, t12: T12, t13: T13, t14: T14, t15: T15, t16: T16 -> ... }` + * @see UserDefinedFunction.getValue + * + * @param name The name for this UDF. + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @param func The function to convert to a UDF. Can be a lambda. + */ +inline fun udf( + name: String, + nondeterministic: Boolean = false, + func: UDF16, +): NamedUserDefinedFunction16 = + udf(nondeterministic, func).withName(name) + +/** + * Defines a UDF ([UserDefinedFunction16]) instance based on the (lambda) function [func]. + * For example: `val myUdf = udf { t1: T1, t2: T2, t3: T3, t4: T4, t5: T5, t6: T6, t7: T7, t8: T8, t9: T9, t10: T10, t11: T11, t12: T12, t13: T13, t14: T14, t15: T15, t16: T16 -> ... }` + * + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @param func The function to convert to a UDF. Can be a lambda. + */ +inline fun udf( + nondeterministic: Boolean = false, + func: UDF16, +): UserDefinedFunction16 { + T1::class.checkForValidType("T1") + T2::class.checkForValidType("T2") + T3::class.checkForValidType("T3") + T4::class.checkForValidType("T4") + T5::class.checkForValidType("T5") + T6::class.checkForValidType("T6") + T7::class.checkForValidType("T7") + T8::class.checkForValidType("T8") + T9::class.checkForValidType("T9") + T10::class.checkForValidType("T10") + T11::class.checkForValidType("T11") + T12::class.checkForValidType("T12") + T13::class.checkForValidType("T13") + T14::class.checkForValidType("T14") + T15::class.checkForValidType("T15") + T16::class.checkForValidType("T16") + + return UserDefinedFunction16( + udf = functions.udf(func, schema(typeOf()).unWrap()) + .let { if (nondeterministic) it.asNondeterministic() else it } + .let { if (typeOf().isMarkedNullable) it else it.asNonNullable() }, + encoder = encoder(), + ) +} + +/** + * Defines and registers a named UDF ([NamedUserDefinedFunction16]) instance based on the (lambda) function [func]. + * For example: `val myUdf = udf.register("myUdf") { t1: T1, t2: T2, t3: T3, t4: T4, t5: T5, t6: T6, t7: T7, t8: T8, t9: T9, t10: T10, t11: T11, t12: T12, t13: T13, t14: T14, t15: T15, t16: T16 -> ... }` + * + * @param name The name for this UDF. + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @param func The function to convert to a UDF. Can be a lambda. + */ +inline fun UDFRegistration.register( + name: String, + nondeterministic: Boolean = false, + func: UDF16, +): NamedUserDefinedFunction16 = + register(udf(name, nondeterministic, func)) + +/** + * Instance of a UDF with 17 arguments. + * This UDF can be invoked with (typed) columns in a [Dataset.select] or [selectTyped] call. + * Alternatively it can be registered for SQL calls using [register]. + * + * @see org.apache.spark.sql.expressions.UserDefinedFunction + * @see NamedUserDefinedFunction17 + * @see udf + */ +open class UserDefinedFunction17( + override val udf: SparkUserDefinedFunction, + override val encoder: Encoder, +): UserDefinedFunction> { + + /** + * Allows this UDF to be called in typed manner using columns in a [Dataset.selectTyped] call. + * @see typedCol to create typed columns. + * @see org.apache.spark.sql.expressions.UserDefinedFunction.apply + */ + operator fun invoke(param0: TypedColumn, param1: TypedColumn, param2: TypedColumn, param3: TypedColumn, param4: TypedColumn, param5: TypedColumn, param6: TypedColumn, param7: TypedColumn, param8: TypedColumn, param9: TypedColumn, param10: TypedColumn, param11: TypedColumn, param12: TypedColumn, param13: TypedColumn, param14: TypedColumn, param15: TypedColumn, param16: TypedColumn): TypedColumn = invokeUntyped(*arrayOf(param0, param1, param2, param3, param4, param5, param6, param7, param8, param9, param10, param11, param12, param13, param14, param15, param16)).`as`(encoder) as TypedColumn + + /** + * Returns an expression that invokes the UDF in untyped manner, using the given arguments. + * @see org.apache.spark.sql.expressions.UserDefinedFunction.apply + */ + operator fun invoke(param0: Column, param1: Column, param2: Column, param3: Column, param4: Column, param5: Column, param6: Column, param7: Column, param8: Column, param9: Column, param10: Column, param11: Column, param12: Column, param13: Column, param14: Column, param15: Column, param16: Column): Column = invokeUntyped(*arrayOf(param0, param1, param2, param3, param4, param5, param6, param7, param8, param9, param10, param11, param12, param13, param14, param15, param16)) + + /** + * Returns an expression that invokes the UDF in untyped manner, using the given arguments. + * @see org.apache.spark.sql.expressions.UserDefinedFunction.apply + */ + fun invokeUntyped(param0: Column, param1: Column, param2: Column, param3: Column, param4: Column, param5: Column, param6: Column, param7: Column, param8: Column, param9: Column, param10: Column, param11: Column, param12: Column, param13: Column, param14: Column, param15: Column, param16: Column): Column = invokeUntyped(*arrayOf(param0, param1, param2, param3, param4, param5, param6, param7, param8, param9, param10, param11, param12, param13, param14, param15, param16)) + + /** Returns named variant of this UDF. */ + override fun withName(name: String): NamedUserDefinedFunction17 = NamedUserDefinedFunction17( + name = name, + udf = udf, + encoder = encoder, + ) + + /** + * Returns named variant of this UDF. + * @see withName + */ + override fun getValue(thisRef: Any?, property: KProperty<*>): NamedUserDefinedFunction17 = + withName(property.name) +} + +/** + * Instance of a UDF with 17 arguments with name. + * This UDF can be invoked with (typed) columns in a [Dataset.select] or [selectTyped] call. + * Alternatively it can be registered for SQL calls using [register]. + * + * @see org.apache.spark.sql.expressions.UserDefinedFunction + * @see UserDefinedFunction17 + * @see udf + */ +class NamedUserDefinedFunction17( + override val name: String, + udf: SparkUserDefinedFunction, + encoder: Encoder, +): NamedUserDefinedFunction>, + UserDefinedFunction17(udf = udf.withName(name), encoder = encoder) + +/** + * Creates a UDF ([NamedUserDefinedFunction17]) from a function reference adapting its name by reflection. + * For example: `val myUdf = udf(::myFunction)` + * @param func function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +inline fun udf( + func: KProperty0<(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17) -> R>, + nondeterministic: Boolean = false, +): NamedUserDefinedFunction17 = udf(func.name, func, nondeterministic) + +/** + * Creates a UDF ([NamedUserDefinedFunction17]) from a function reference. + * For example: `val myUdf = udf("myFunction", ::myFunction)` + * @param name Optional. Name for the UDF. + * @param func function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +inline fun udf( + name: String, + func: KProperty0<(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17) -> R>, + nondeterministic: Boolean = false, +): NamedUserDefinedFunction17 = udf(name, nondeterministic, func.get()) + +/** + * Creates and registers a UDF ([NamedUserDefinedFunction17]) from a function reference adapting its name by reflection. + * For example: `val myUdf = udf.register(::myFunction)` + * @param func function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +inline fun UDFRegistration.register( + func: KProperty0<(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17) -> R>, + nondeterministic: Boolean = false, +): NamedUserDefinedFunction17 = register(udf(func, nondeterministic)) + +/** + * Creates and registers a UDF ([NamedUserDefinedFunction17]) from a function reference. + * For example: `val myUdf = udf.register("myFunction", ::myFunction)` + * @param name Optional. Name for the UDF. + * @param func function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +inline fun UDFRegistration.register( + name: String, + func: KProperty0<(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17) -> R>, + nondeterministic: Boolean = false, +): NamedUserDefinedFunction17 = register(udf(name, func, nondeterministic)) + +/** + * Creates a UDF ([NamedUserDefinedFunction17]) from a function reference adapting its name by reflection. + * For example: `val myUdf = udf(::myFunction)` + * @param func function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +inline fun udf( + func: KFunction17, + nondeterministic: Boolean = false, +): NamedUserDefinedFunction17 = udf(func.name, func, nondeterministic) + +/** + * Creates a UDF ([NamedUserDefinedFunction17]) from a function reference. + * For example: `val myUdf = udf("myFunction", ::myFunction)` + * @param name Optional. Name for the UDF. + * @param func function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +inline fun udf( + name: String, + func: KFunction17, + nondeterministic: Boolean = false, +): NamedUserDefinedFunction17 = udf(name, nondeterministic, func) + +/** + * Creates and registers a UDF ([NamedUserDefinedFunction17]) from a function reference adapting its name by reflection. + * For example: `val myUdf = udf.register(::myFunction)` + * @param func function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +inline fun UDFRegistration.register( + func: KFunction17, + nondeterministic: Boolean = false, +): NamedUserDefinedFunction17 = register(udf(func, nondeterministic)) + +/** + * Creates and registers a UDF ([NamedUserDefinedFunction17]) from a function reference. + * For example: `val myUdf = udf.register("myFunction", ::myFunction)` + * @param name Optional. Name for the UDF. + * @param func function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +inline fun UDFRegistration.register( + name: String, + func: KFunction17, + nondeterministic: Boolean = false, +): NamedUserDefinedFunction17 = register(udf(name, func, nondeterministic)) + + +/** Kotlin wrapper around UDF interface to ensure nullability in types. */ +fun interface UDF17 : org.apache.spark.sql.api.java.UDF17 { override fun call(t1: T1, t2: T2, t3: T3, t4: T4, t5: T5, t6: T6, t7: T7, t8: T8, t9: T9, t10: T10, t11: T11, t12: T12, t13: T13, t14: T14, t15: T15, t16: T16, t17: T17): R } + +/** + * Defines a named UDF ([NamedUserDefinedFunction17]) instance based on the (lambda) function [func]. + * For example: `val myUdf = udf("myUdf") { t1: T1, t2: T2, t3: T3, t4: T4, t5: T5, t6: T6, t7: T7, t8: T8, t9: T9, t10: T10, t11: T11, t12: T12, t13: T13, t14: T14, t15: T15, t16: T16, t17: T17 -> ... }` + * Name can also be supplied using delegate: `val myUdf by udf { t1: T1, t2: T2, t3: T3, t4: T4, t5: T5, t6: T6, t7: T7, t8: T8, t9: T9, t10: T10, t11: T11, t12: T12, t13: T13, t14: T14, t15: T15, t16: T16, t17: T17 -> ... }` + * @see UserDefinedFunction.getValue + * + * @param name The name for this UDF. + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @param func The function to convert to a UDF. Can be a lambda. + */ +inline fun udf( + name: String, + nondeterministic: Boolean = false, + func: UDF17, +): NamedUserDefinedFunction17 = + udf(nondeterministic, func).withName(name) + +/** + * Defines a UDF ([UserDefinedFunction17]) instance based on the (lambda) function [func]. + * For example: `val myUdf = udf { t1: T1, t2: T2, t3: T3, t4: T4, t5: T5, t6: T6, t7: T7, t8: T8, t9: T9, t10: T10, t11: T11, t12: T12, t13: T13, t14: T14, t15: T15, t16: T16, t17: T17 -> ... }` + * + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @param func The function to convert to a UDF. Can be a lambda. + */ +inline fun udf( + nondeterministic: Boolean = false, + func: UDF17, +): UserDefinedFunction17 { + T1::class.checkForValidType("T1") + T2::class.checkForValidType("T2") + T3::class.checkForValidType("T3") + T4::class.checkForValidType("T4") + T5::class.checkForValidType("T5") + T6::class.checkForValidType("T6") + T7::class.checkForValidType("T7") + T8::class.checkForValidType("T8") + T9::class.checkForValidType("T9") + T10::class.checkForValidType("T10") + T11::class.checkForValidType("T11") + T12::class.checkForValidType("T12") + T13::class.checkForValidType("T13") + T14::class.checkForValidType("T14") + T15::class.checkForValidType("T15") + T16::class.checkForValidType("T16") + T17::class.checkForValidType("T17") + + return UserDefinedFunction17( + udf = functions.udf(func, schema(typeOf()).unWrap()) + .let { if (nondeterministic) it.asNondeterministic() else it } + .let { if (typeOf().isMarkedNullable) it else it.asNonNullable() }, + encoder = encoder(), + ) +} + +/** + * Defines and registers a named UDF ([NamedUserDefinedFunction17]) instance based on the (lambda) function [func]. + * For example: `val myUdf = udf.register("myUdf") { t1: T1, t2: T2, t3: T3, t4: T4, t5: T5, t6: T6, t7: T7, t8: T8, t9: T9, t10: T10, t11: T11, t12: T12, t13: T13, t14: T14, t15: T15, t16: T16, t17: T17 -> ... }` + * + * @param name The name for this UDF. + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @param func The function to convert to a UDF. Can be a lambda. + */ +inline fun UDFRegistration.register( + name: String, + nondeterministic: Boolean = false, + func: UDF17, +): NamedUserDefinedFunction17 = + register(udf(name, nondeterministic, func)) + +/** + * Instance of a UDF with 18 arguments. + * This UDF can be invoked with (typed) columns in a [Dataset.select] or [selectTyped] call. + * Alternatively it can be registered for SQL calls using [register]. + * + * @see org.apache.spark.sql.expressions.UserDefinedFunction + * @see NamedUserDefinedFunction18 + * @see udf + */ +open class UserDefinedFunction18( + override val udf: SparkUserDefinedFunction, + override val encoder: Encoder, +): UserDefinedFunction> { + + /** + * Allows this UDF to be called in typed manner using columns in a [Dataset.selectTyped] call. + * @see typedCol to create typed columns. + * @see org.apache.spark.sql.expressions.UserDefinedFunction.apply + */ + operator fun invoke(param0: TypedColumn, param1: TypedColumn, param2: TypedColumn, param3: TypedColumn, param4: TypedColumn, param5: TypedColumn, param6: TypedColumn, param7: TypedColumn, param8: TypedColumn, param9: TypedColumn, param10: TypedColumn, param11: TypedColumn, param12: TypedColumn, param13: TypedColumn, param14: TypedColumn, param15: TypedColumn, param16: TypedColumn, param17: TypedColumn): TypedColumn = invokeUntyped(*arrayOf(param0, param1, param2, param3, param4, param5, param6, param7, param8, param9, param10, param11, param12, param13, param14, param15, param16, param17)).`as`(encoder) as TypedColumn + + /** + * Returns an expression that invokes the UDF in untyped manner, using the given arguments. + * @see org.apache.spark.sql.expressions.UserDefinedFunction.apply + */ + operator fun invoke(param0: Column, param1: Column, param2: Column, param3: Column, param4: Column, param5: Column, param6: Column, param7: Column, param8: Column, param9: Column, param10: Column, param11: Column, param12: Column, param13: Column, param14: Column, param15: Column, param16: Column, param17: Column): Column = invokeUntyped(*arrayOf(param0, param1, param2, param3, param4, param5, param6, param7, param8, param9, param10, param11, param12, param13, param14, param15, param16, param17)) + + /** + * Returns an expression that invokes the UDF in untyped manner, using the given arguments. + * @see org.apache.spark.sql.expressions.UserDefinedFunction.apply + */ + fun invokeUntyped(param0: Column, param1: Column, param2: Column, param3: Column, param4: Column, param5: Column, param6: Column, param7: Column, param8: Column, param9: Column, param10: Column, param11: Column, param12: Column, param13: Column, param14: Column, param15: Column, param16: Column, param17: Column): Column = invokeUntyped(*arrayOf(param0, param1, param2, param3, param4, param5, param6, param7, param8, param9, param10, param11, param12, param13, param14, param15, param16, param17)) + + /** Returns named variant of this UDF. */ + override fun withName(name: String): NamedUserDefinedFunction18 = NamedUserDefinedFunction18( + name = name, + udf = udf, + encoder = encoder, + ) + + /** + * Returns named variant of this UDF. + * @see withName + */ + override fun getValue(thisRef: Any?, property: KProperty<*>): NamedUserDefinedFunction18 = + withName(property.name) +} + +/** + * Instance of a UDF with 18 arguments with name. + * This UDF can be invoked with (typed) columns in a [Dataset.select] or [selectTyped] call. + * Alternatively it can be registered for SQL calls using [register]. + * + * @see org.apache.spark.sql.expressions.UserDefinedFunction + * @see UserDefinedFunction18 + * @see udf + */ +class NamedUserDefinedFunction18( + override val name: String, + udf: SparkUserDefinedFunction, + encoder: Encoder, +): NamedUserDefinedFunction>, + UserDefinedFunction18(udf = udf.withName(name), encoder = encoder) + +/** + * Creates a UDF ([NamedUserDefinedFunction18]) from a function reference adapting its name by reflection. + * For example: `val myUdf = udf(::myFunction)` + * @param func function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +inline fun udf( + func: KProperty0<(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18) -> R>, + nondeterministic: Boolean = false, +): NamedUserDefinedFunction18 = udf(func.name, func, nondeterministic) + +/** + * Creates a UDF ([NamedUserDefinedFunction18]) from a function reference. + * For example: `val myUdf = udf("myFunction", ::myFunction)` + * @param name Optional. Name for the UDF. + * @param func function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +inline fun udf( + name: String, + func: KProperty0<(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18) -> R>, + nondeterministic: Boolean = false, +): NamedUserDefinedFunction18 = udf(name, nondeterministic, func.get()) + +/** + * Creates and registers a UDF ([NamedUserDefinedFunction18]) from a function reference adapting its name by reflection. + * For example: `val myUdf = udf.register(::myFunction)` + * @param func function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +inline fun UDFRegistration.register( + func: KProperty0<(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18) -> R>, + nondeterministic: Boolean = false, +): NamedUserDefinedFunction18 = register(udf(func, nondeterministic)) + +/** + * Creates and registers a UDF ([NamedUserDefinedFunction18]) from a function reference. + * For example: `val myUdf = udf.register("myFunction", ::myFunction)` + * @param name Optional. Name for the UDF. + * @param func function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +inline fun UDFRegistration.register( + name: String, + func: KProperty0<(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18) -> R>, + nondeterministic: Boolean = false, +): NamedUserDefinedFunction18 = register(udf(name, func, nondeterministic)) + +/** + * Creates a UDF ([NamedUserDefinedFunction18]) from a function reference adapting its name by reflection. + * For example: `val myUdf = udf(::myFunction)` + * @param func function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +inline fun udf( + func: KFunction18, + nondeterministic: Boolean = false, +): NamedUserDefinedFunction18 = udf(func.name, func, nondeterministic) + +/** + * Creates a UDF ([NamedUserDefinedFunction18]) from a function reference. + * For example: `val myUdf = udf("myFunction", ::myFunction)` + * @param name Optional. Name for the UDF. + * @param func function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +inline fun udf( + name: String, + func: KFunction18, + nondeterministic: Boolean = false, +): NamedUserDefinedFunction18 = udf(name, nondeterministic, func) + +/** + * Creates and registers a UDF ([NamedUserDefinedFunction18]) from a function reference adapting its name by reflection. + * For example: `val myUdf = udf.register(::myFunction)` + * @param func function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +inline fun UDFRegistration.register( + func: KFunction18, + nondeterministic: Boolean = false, +): NamedUserDefinedFunction18 = register(udf(func, nondeterministic)) + +/** + * Creates and registers a UDF ([NamedUserDefinedFunction18]) from a function reference. + * For example: `val myUdf = udf.register("myFunction", ::myFunction)` + * @param name Optional. Name for the UDF. + * @param func function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +inline fun UDFRegistration.register( + name: String, + func: KFunction18, + nondeterministic: Boolean = false, +): NamedUserDefinedFunction18 = register(udf(name, func, nondeterministic)) + + +/** Kotlin wrapper around UDF interface to ensure nullability in types. */ +fun interface UDF18 : org.apache.spark.sql.api.java.UDF18 { override fun call(t1: T1, t2: T2, t3: T3, t4: T4, t5: T5, t6: T6, t7: T7, t8: T8, t9: T9, t10: T10, t11: T11, t12: T12, t13: T13, t14: T14, t15: T15, t16: T16, t17: T17, t18: T18): R } + +/** + * Defines a named UDF ([NamedUserDefinedFunction18]) instance based on the (lambda) function [func]. + * For example: `val myUdf = udf("myUdf") { t1: T1, t2: T2, t3: T3, t4: T4, t5: T5, t6: T6, t7: T7, t8: T8, t9: T9, t10: T10, t11: T11, t12: T12, t13: T13, t14: T14, t15: T15, t16: T16, t17: T17, t18: T18 -> ... }` + * Name can also be supplied using delegate: `val myUdf by udf { t1: T1, t2: T2, t3: T3, t4: T4, t5: T5, t6: T6, t7: T7, t8: T8, t9: T9, t10: T10, t11: T11, t12: T12, t13: T13, t14: T14, t15: T15, t16: T16, t17: T17, t18: T18 -> ... }` + * @see UserDefinedFunction.getValue + * + * @param name The name for this UDF. + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @param func The function to convert to a UDF. Can be a lambda. + */ +inline fun udf( + name: String, + nondeterministic: Boolean = false, + func: UDF18, +): NamedUserDefinedFunction18 = + udf(nondeterministic, func).withName(name) + +/** + * Defines a UDF ([UserDefinedFunction18]) instance based on the (lambda) function [func]. + * For example: `val myUdf = udf { t1: T1, t2: T2, t3: T3, t4: T4, t5: T5, t6: T6, t7: T7, t8: T8, t9: T9, t10: T10, t11: T11, t12: T12, t13: T13, t14: T14, t15: T15, t16: T16, t17: T17, t18: T18 -> ... }` + * + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @param func The function to convert to a UDF. Can be a lambda. + */ +inline fun udf( + nondeterministic: Boolean = false, + func: UDF18, +): UserDefinedFunction18 { + T1::class.checkForValidType("T1") + T2::class.checkForValidType("T2") + T3::class.checkForValidType("T3") + T4::class.checkForValidType("T4") + T5::class.checkForValidType("T5") + T6::class.checkForValidType("T6") + T7::class.checkForValidType("T7") + T8::class.checkForValidType("T8") + T9::class.checkForValidType("T9") + T10::class.checkForValidType("T10") + T11::class.checkForValidType("T11") + T12::class.checkForValidType("T12") + T13::class.checkForValidType("T13") + T14::class.checkForValidType("T14") + T15::class.checkForValidType("T15") + T16::class.checkForValidType("T16") + T17::class.checkForValidType("T17") + T18::class.checkForValidType("T18") + + return UserDefinedFunction18( + udf = functions.udf(func, schema(typeOf()).unWrap()) + .let { if (nondeterministic) it.asNondeterministic() else it } + .let { if (typeOf().isMarkedNullable) it else it.asNonNullable() }, + encoder = encoder(), + ) +} + +/** + * Defines and registers a named UDF ([NamedUserDefinedFunction18]) instance based on the (lambda) function [func]. + * For example: `val myUdf = udf.register("myUdf") { t1: T1, t2: T2, t3: T3, t4: T4, t5: T5, t6: T6, t7: T7, t8: T8, t9: T9, t10: T10, t11: T11, t12: T12, t13: T13, t14: T14, t15: T15, t16: T16, t17: T17, t18: T18 -> ... }` + * + * @param name The name for this UDF. + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @param func The function to convert to a UDF. Can be a lambda. + */ +inline fun UDFRegistration.register( + name: String, + nondeterministic: Boolean = false, + func: UDF18, +): NamedUserDefinedFunction18 = + register(udf(name, nondeterministic, func)) + +/** + * Instance of a UDF with 19 arguments. + * This UDF can be invoked with (typed) columns in a [Dataset.select] or [selectTyped] call. + * Alternatively it can be registered for SQL calls using [register]. + * + * @see org.apache.spark.sql.expressions.UserDefinedFunction + * @see NamedUserDefinedFunction19 + * @see udf + */ +open class UserDefinedFunction19( + override val udf: SparkUserDefinedFunction, + override val encoder: Encoder, +): UserDefinedFunction> { + + /** + * Allows this UDF to be called in typed manner using columns in a [Dataset.selectTyped] call. + * @see typedCol to create typed columns. + * @see org.apache.spark.sql.expressions.UserDefinedFunction.apply + */ + operator fun invoke(param0: TypedColumn, param1: TypedColumn, param2: TypedColumn, param3: TypedColumn, param4: TypedColumn, param5: TypedColumn, param6: TypedColumn, param7: TypedColumn, param8: TypedColumn, param9: TypedColumn, param10: TypedColumn, param11: TypedColumn, param12: TypedColumn, param13: TypedColumn, param14: TypedColumn, param15: TypedColumn, param16: TypedColumn, param17: TypedColumn, param18: TypedColumn): TypedColumn = invokeUntyped(*arrayOf(param0, param1, param2, param3, param4, param5, param6, param7, param8, param9, param10, param11, param12, param13, param14, param15, param16, param17, param18)).`as`(encoder) as TypedColumn + + /** + * Returns an expression that invokes the UDF in untyped manner, using the given arguments. + * @see org.apache.spark.sql.expressions.UserDefinedFunction.apply + */ + operator fun invoke(param0: Column, param1: Column, param2: Column, param3: Column, param4: Column, param5: Column, param6: Column, param7: Column, param8: Column, param9: Column, param10: Column, param11: Column, param12: Column, param13: Column, param14: Column, param15: Column, param16: Column, param17: Column, param18: Column): Column = invokeUntyped(*arrayOf(param0, param1, param2, param3, param4, param5, param6, param7, param8, param9, param10, param11, param12, param13, param14, param15, param16, param17, param18)) + + /** + * Returns an expression that invokes the UDF in untyped manner, using the given arguments. + * @see org.apache.spark.sql.expressions.UserDefinedFunction.apply + */ + fun invokeUntyped(param0: Column, param1: Column, param2: Column, param3: Column, param4: Column, param5: Column, param6: Column, param7: Column, param8: Column, param9: Column, param10: Column, param11: Column, param12: Column, param13: Column, param14: Column, param15: Column, param16: Column, param17: Column, param18: Column): Column = invokeUntyped(*arrayOf(param0, param1, param2, param3, param4, param5, param6, param7, param8, param9, param10, param11, param12, param13, param14, param15, param16, param17, param18)) + + /** Returns named variant of this UDF. */ + override fun withName(name: String): NamedUserDefinedFunction19 = NamedUserDefinedFunction19( + name = name, + udf = udf, + encoder = encoder, + ) + + /** + * Returns named variant of this UDF. + * @see withName + */ + override fun getValue(thisRef: Any?, property: KProperty<*>): NamedUserDefinedFunction19 = + withName(property.name) +} + +/** + * Instance of a UDF with 19 arguments with name. + * This UDF can be invoked with (typed) columns in a [Dataset.select] or [selectTyped] call. + * Alternatively it can be registered for SQL calls using [register]. + * + * @see org.apache.spark.sql.expressions.UserDefinedFunction + * @see UserDefinedFunction19 + * @see udf + */ +class NamedUserDefinedFunction19( + override val name: String, + udf: SparkUserDefinedFunction, + encoder: Encoder, +): NamedUserDefinedFunction>, + UserDefinedFunction19(udf = udf.withName(name), encoder = encoder) + +/** + * Creates a UDF ([NamedUserDefinedFunction19]) from a function reference adapting its name by reflection. + * For example: `val myUdf = udf(::myFunction)` + * @param func function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +inline fun udf( + func: KProperty0<(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18, T19) -> R>, + nondeterministic: Boolean = false, +): NamedUserDefinedFunction19 = udf(func.name, func, nondeterministic) + +/** + * Creates a UDF ([NamedUserDefinedFunction19]) from a function reference. + * For example: `val myUdf = udf("myFunction", ::myFunction)` + * @param name Optional. Name for the UDF. + * @param func function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +inline fun udf( + name: String, + func: KProperty0<(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18, T19) -> R>, + nondeterministic: Boolean = false, +): NamedUserDefinedFunction19 = udf(name, nondeterministic, func.get()) + +/** + * Creates and registers a UDF ([NamedUserDefinedFunction19]) from a function reference adapting its name by reflection. + * For example: `val myUdf = udf.register(::myFunction)` + * @param func function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +inline fun UDFRegistration.register( + func: KProperty0<(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18, T19) -> R>, + nondeterministic: Boolean = false, +): NamedUserDefinedFunction19 = register(udf(func, nondeterministic)) + +/** + * Creates and registers a UDF ([NamedUserDefinedFunction19]) from a function reference. + * For example: `val myUdf = udf.register("myFunction", ::myFunction)` + * @param name Optional. Name for the UDF. + * @param func function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +inline fun UDFRegistration.register( + name: String, + func: KProperty0<(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18, T19) -> R>, + nondeterministic: Boolean = false, +): NamedUserDefinedFunction19 = register(udf(name, func, nondeterministic)) + +/** + * Creates a UDF ([NamedUserDefinedFunction19]) from a function reference adapting its name by reflection. + * For example: `val myUdf = udf(::myFunction)` + * @param func function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +inline fun udf( + func: KFunction19, + nondeterministic: Boolean = false, +): NamedUserDefinedFunction19 = udf(func.name, func, nondeterministic) + +/** + * Creates a UDF ([NamedUserDefinedFunction19]) from a function reference. + * For example: `val myUdf = udf("myFunction", ::myFunction)` + * @param name Optional. Name for the UDF. + * @param func function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +inline fun udf( + name: String, + func: KFunction19, + nondeterministic: Boolean = false, +): NamedUserDefinedFunction19 = udf(name, nondeterministic, func) + +/** + * Creates and registers a UDF ([NamedUserDefinedFunction19]) from a function reference adapting its name by reflection. + * For example: `val myUdf = udf.register(::myFunction)` + * @param func function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +inline fun UDFRegistration.register( + func: KFunction19, + nondeterministic: Boolean = false, +): NamedUserDefinedFunction19 = register(udf(func, nondeterministic)) + +/** + * Creates and registers a UDF ([NamedUserDefinedFunction19]) from a function reference. + * For example: `val myUdf = udf.register("myFunction", ::myFunction)` + * @param name Optional. Name for the UDF. + * @param func function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +inline fun UDFRegistration.register( + name: String, + func: KFunction19, + nondeterministic: Boolean = false, +): NamedUserDefinedFunction19 = register(udf(name, func, nondeterministic)) + + +/** Kotlin wrapper around UDF interface to ensure nullability in types. */ +fun interface UDF19 : org.apache.spark.sql.api.java.UDF19 { override fun call(t1: T1, t2: T2, t3: T3, t4: T4, t5: T5, t6: T6, t7: T7, t8: T8, t9: T9, t10: T10, t11: T11, t12: T12, t13: T13, t14: T14, t15: T15, t16: T16, t17: T17, t18: T18, t19: T19): R } + +/** + * Defines a named UDF ([NamedUserDefinedFunction19]) instance based on the (lambda) function [func]. + * For example: `val myUdf = udf("myUdf") { t1: T1, t2: T2, t3: T3, t4: T4, t5: T5, t6: T6, t7: T7, t8: T8, t9: T9, t10: T10, t11: T11, t12: T12, t13: T13, t14: T14, t15: T15, t16: T16, t17: T17, t18: T18, t19: T19 -> ... }` + * Name can also be supplied using delegate: `val myUdf by udf { t1: T1, t2: T2, t3: T3, t4: T4, t5: T5, t6: T6, t7: T7, t8: T8, t9: T9, t10: T10, t11: T11, t12: T12, t13: T13, t14: T14, t15: T15, t16: T16, t17: T17, t18: T18, t19: T19 -> ... }` + * @see UserDefinedFunction.getValue + * + * @param name The name for this UDF. + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @param func The function to convert to a UDF. Can be a lambda. + */ +inline fun udf( + name: String, + nondeterministic: Boolean = false, + func: UDF19, +): NamedUserDefinedFunction19 = + udf(nondeterministic, func).withName(name) + +/** + * Defines a UDF ([UserDefinedFunction19]) instance based on the (lambda) function [func]. + * For example: `val myUdf = udf { t1: T1, t2: T2, t3: T3, t4: T4, t5: T5, t6: T6, t7: T7, t8: T8, t9: T9, t10: T10, t11: T11, t12: T12, t13: T13, t14: T14, t15: T15, t16: T16, t17: T17, t18: T18, t19: T19 -> ... }` + * + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @param func The function to convert to a UDF. Can be a lambda. + */ +inline fun udf( + nondeterministic: Boolean = false, + func: UDF19, +): UserDefinedFunction19 { + T1::class.checkForValidType("T1") + T2::class.checkForValidType("T2") + T3::class.checkForValidType("T3") + T4::class.checkForValidType("T4") + T5::class.checkForValidType("T5") + T6::class.checkForValidType("T6") + T7::class.checkForValidType("T7") + T8::class.checkForValidType("T8") + T9::class.checkForValidType("T9") + T10::class.checkForValidType("T10") + T11::class.checkForValidType("T11") + T12::class.checkForValidType("T12") + T13::class.checkForValidType("T13") + T14::class.checkForValidType("T14") + T15::class.checkForValidType("T15") + T16::class.checkForValidType("T16") + T17::class.checkForValidType("T17") + T18::class.checkForValidType("T18") + T19::class.checkForValidType("T19") + + return UserDefinedFunction19( + udf = functions.udf(func, schema(typeOf()).unWrap()) + .let { if (nondeterministic) it.asNondeterministic() else it } + .let { if (typeOf().isMarkedNullable) it else it.asNonNullable() }, + encoder = encoder(), + ) +} + +/** + * Defines and registers a named UDF ([NamedUserDefinedFunction19]) instance based on the (lambda) function [func]. + * For example: `val myUdf = udf.register("myUdf") { t1: T1, t2: T2, t3: T3, t4: T4, t5: T5, t6: T6, t7: T7, t8: T8, t9: T9, t10: T10, t11: T11, t12: T12, t13: T13, t14: T14, t15: T15, t16: T16, t17: T17, t18: T18, t19: T19 -> ... }` + * + * @param name The name for this UDF. + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @param func The function to convert to a UDF. Can be a lambda. + */ +inline fun UDFRegistration.register( + name: String, + nondeterministic: Boolean = false, + func: UDF19, +): NamedUserDefinedFunction19 = + register(udf(name, nondeterministic, func)) + +/** + * Instance of a UDF with 20 arguments. + * This UDF can be invoked with (typed) columns in a [Dataset.select] or [selectTyped] call. + * Alternatively it can be registered for SQL calls using [register]. + * + * @see org.apache.spark.sql.expressions.UserDefinedFunction + * @see NamedUserDefinedFunction20 + * @see udf + */ +open class UserDefinedFunction20( + override val udf: SparkUserDefinedFunction, + override val encoder: Encoder, +): UserDefinedFunction> { + + /** + * Allows this UDF to be called in typed manner using columns in a [Dataset.selectTyped] call. + * @see typedCol to create typed columns. + * @see org.apache.spark.sql.expressions.UserDefinedFunction.apply + */ + operator fun invoke(param0: TypedColumn, param1: TypedColumn, param2: TypedColumn, param3: TypedColumn, param4: TypedColumn, param5: TypedColumn, param6: TypedColumn, param7: TypedColumn, param8: TypedColumn, param9: TypedColumn, param10: TypedColumn, param11: TypedColumn, param12: TypedColumn, param13: TypedColumn, param14: TypedColumn, param15: TypedColumn, param16: TypedColumn, param17: TypedColumn, param18: TypedColumn, param19: TypedColumn): TypedColumn = invokeUntyped(*arrayOf(param0, param1, param2, param3, param4, param5, param6, param7, param8, param9, param10, param11, param12, param13, param14, param15, param16, param17, param18, param19)).`as`(encoder) as TypedColumn + + /** + * Returns an expression that invokes the UDF in untyped manner, using the given arguments. + * @see org.apache.spark.sql.expressions.UserDefinedFunction.apply + */ + operator fun invoke(param0: Column, param1: Column, param2: Column, param3: Column, param4: Column, param5: Column, param6: Column, param7: Column, param8: Column, param9: Column, param10: Column, param11: Column, param12: Column, param13: Column, param14: Column, param15: Column, param16: Column, param17: Column, param18: Column, param19: Column): Column = invokeUntyped(*arrayOf(param0, param1, param2, param3, param4, param5, param6, param7, param8, param9, param10, param11, param12, param13, param14, param15, param16, param17, param18, param19)) + + /** + * Returns an expression that invokes the UDF in untyped manner, using the given arguments. + * @see org.apache.spark.sql.expressions.UserDefinedFunction.apply + */ + fun invokeUntyped(param0: Column, param1: Column, param2: Column, param3: Column, param4: Column, param5: Column, param6: Column, param7: Column, param8: Column, param9: Column, param10: Column, param11: Column, param12: Column, param13: Column, param14: Column, param15: Column, param16: Column, param17: Column, param18: Column, param19: Column): Column = invokeUntyped(*arrayOf(param0, param1, param2, param3, param4, param5, param6, param7, param8, param9, param10, param11, param12, param13, param14, param15, param16, param17, param18, param19)) + + /** Returns named variant of this UDF. */ + override fun withName(name: String): NamedUserDefinedFunction20 = NamedUserDefinedFunction20( + name = name, + udf = udf, + encoder = encoder, + ) + + /** + * Returns named variant of this UDF. + * @see withName + */ + override fun getValue(thisRef: Any?, property: KProperty<*>): NamedUserDefinedFunction20 = + withName(property.name) +} + +/** + * Instance of a UDF with 20 arguments with name. + * This UDF can be invoked with (typed) columns in a [Dataset.select] or [selectTyped] call. + * Alternatively it can be registered for SQL calls using [register]. + * + * @see org.apache.spark.sql.expressions.UserDefinedFunction + * @see UserDefinedFunction20 + * @see udf + */ +class NamedUserDefinedFunction20( + override val name: String, + udf: SparkUserDefinedFunction, + encoder: Encoder, +): NamedUserDefinedFunction>, + UserDefinedFunction20(udf = udf.withName(name), encoder = encoder) + +/** + * Creates a UDF ([NamedUserDefinedFunction20]) from a function reference adapting its name by reflection. + * For example: `val myUdf = udf(::myFunction)` + * @param func function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +inline fun udf( + func: KProperty0<(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18, T19, T20) -> R>, + nondeterministic: Boolean = false, +): NamedUserDefinedFunction20 = udf(func.name, func, nondeterministic) + +/** + * Creates a UDF ([NamedUserDefinedFunction20]) from a function reference. + * For example: `val myUdf = udf("myFunction", ::myFunction)` + * @param name Optional. Name for the UDF. + * @param func function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +inline fun udf( + name: String, + func: KProperty0<(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18, T19, T20) -> R>, + nondeterministic: Boolean = false, +): NamedUserDefinedFunction20 = udf(name, nondeterministic, func.get()) + +/** + * Creates and registers a UDF ([NamedUserDefinedFunction20]) from a function reference adapting its name by reflection. + * For example: `val myUdf = udf.register(::myFunction)` + * @param func function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +inline fun UDFRegistration.register( + func: KProperty0<(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18, T19, T20) -> R>, + nondeterministic: Boolean = false, +): NamedUserDefinedFunction20 = register(udf(func, nondeterministic)) + +/** + * Creates and registers a UDF ([NamedUserDefinedFunction20]) from a function reference. + * For example: `val myUdf = udf.register("myFunction", ::myFunction)` + * @param name Optional. Name for the UDF. + * @param func function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +inline fun UDFRegistration.register( + name: String, + func: KProperty0<(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18, T19, T20) -> R>, + nondeterministic: Boolean = false, +): NamedUserDefinedFunction20 = register(udf(name, func, nondeterministic)) + +/** + * Creates a UDF ([NamedUserDefinedFunction20]) from a function reference adapting its name by reflection. + * For example: `val myUdf = udf(::myFunction)` + * @param func function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +inline fun udf( + func: KFunction20, + nondeterministic: Boolean = false, +): NamedUserDefinedFunction20 = udf(func.name, func, nondeterministic) + +/** + * Creates a UDF ([NamedUserDefinedFunction20]) from a function reference. + * For example: `val myUdf = udf("myFunction", ::myFunction)` + * @param name Optional. Name for the UDF. + * @param func function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +inline fun udf( + name: String, + func: KFunction20, + nondeterministic: Boolean = false, +): NamedUserDefinedFunction20 = udf(name, nondeterministic, func) + +/** + * Creates and registers a UDF ([NamedUserDefinedFunction20]) from a function reference adapting its name by reflection. + * For example: `val myUdf = udf.register(::myFunction)` + * @param func function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +inline fun UDFRegistration.register( + func: KFunction20, + nondeterministic: Boolean = false, +): NamedUserDefinedFunction20 = register(udf(func, nondeterministic)) + +/** + * Creates and registers a UDF ([NamedUserDefinedFunction20]) from a function reference. + * For example: `val myUdf = udf.register("myFunction", ::myFunction)` + * @param name Optional. Name for the UDF. + * @param func function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +inline fun UDFRegistration.register( + name: String, + func: KFunction20, + nondeterministic: Boolean = false, +): NamedUserDefinedFunction20 = register(udf(name, func, nondeterministic)) + + +/** Kotlin wrapper around UDF interface to ensure nullability in types. */ +fun interface UDF20 : org.apache.spark.sql.api.java.UDF20 { override fun call(t1: T1, t2: T2, t3: T3, t4: T4, t5: T5, t6: T6, t7: T7, t8: T8, t9: T9, t10: T10, t11: T11, t12: T12, t13: T13, t14: T14, t15: T15, t16: T16, t17: T17, t18: T18, t19: T19, t20: T20): R } + +/** + * Defines a named UDF ([NamedUserDefinedFunction20]) instance based on the (lambda) function [func]. + * For example: `val myUdf = udf("myUdf") { t1: T1, t2: T2, t3: T3, t4: T4, t5: T5, t6: T6, t7: T7, t8: T8, t9: T9, t10: T10, t11: T11, t12: T12, t13: T13, t14: T14, t15: T15, t16: T16, t17: T17, t18: T18, t19: T19, t20: T20 -> ... }` + * Name can also be supplied using delegate: `val myUdf by udf { t1: T1, t2: T2, t3: T3, t4: T4, t5: T5, t6: T6, t7: T7, t8: T8, t9: T9, t10: T10, t11: T11, t12: T12, t13: T13, t14: T14, t15: T15, t16: T16, t17: T17, t18: T18, t19: T19, t20: T20 -> ... }` + * @see UserDefinedFunction.getValue + * + * @param name The name for this UDF. + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @param func The function to convert to a UDF. Can be a lambda. + */ +inline fun udf( + name: String, + nondeterministic: Boolean = false, + func: UDF20, +): NamedUserDefinedFunction20 = + udf(nondeterministic, func).withName(name) + +/** + * Defines a UDF ([UserDefinedFunction20]) instance based on the (lambda) function [func]. + * For example: `val myUdf = udf { t1: T1, t2: T2, t3: T3, t4: T4, t5: T5, t6: T6, t7: T7, t8: T8, t9: T9, t10: T10, t11: T11, t12: T12, t13: T13, t14: T14, t15: T15, t16: T16, t17: T17, t18: T18, t19: T19, t20: T20 -> ... }` + * + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @param func The function to convert to a UDF. Can be a lambda. + */ +inline fun udf( + nondeterministic: Boolean = false, + func: UDF20, +): UserDefinedFunction20 { + T1::class.checkForValidType("T1") + T2::class.checkForValidType("T2") + T3::class.checkForValidType("T3") + T4::class.checkForValidType("T4") + T5::class.checkForValidType("T5") + T6::class.checkForValidType("T6") + T7::class.checkForValidType("T7") + T8::class.checkForValidType("T8") + T9::class.checkForValidType("T9") + T10::class.checkForValidType("T10") + T11::class.checkForValidType("T11") + T12::class.checkForValidType("T12") + T13::class.checkForValidType("T13") + T14::class.checkForValidType("T14") + T15::class.checkForValidType("T15") + T16::class.checkForValidType("T16") + T17::class.checkForValidType("T17") + T18::class.checkForValidType("T18") + T19::class.checkForValidType("T19") + T20::class.checkForValidType("T20") + + return UserDefinedFunction20( + udf = functions.udf(func, schema(typeOf()).unWrap()) + .let { if (nondeterministic) it.asNondeterministic() else it } + .let { if (typeOf().isMarkedNullable) it else it.asNonNullable() }, + encoder = encoder(), + ) +} + +/** + * Defines and registers a named UDF ([NamedUserDefinedFunction20]) instance based on the (lambda) function [func]. + * For example: `val myUdf = udf.register("myUdf") { t1: T1, t2: T2, t3: T3, t4: T4, t5: T5, t6: T6, t7: T7, t8: T8, t9: T9, t10: T10, t11: T11, t12: T12, t13: T13, t14: T14, t15: T15, t16: T16, t17: T17, t18: T18, t19: T19, t20: T20 -> ... }` + * + * @param name The name for this UDF. + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @param func The function to convert to a UDF. Can be a lambda. + */ +inline fun UDFRegistration.register( + name: String, + nondeterministic: Boolean = false, + func: UDF20, +): NamedUserDefinedFunction20 = + register(udf(name, nondeterministic, func)) + +/** + * Instance of a UDF with 21 arguments. + * This UDF can be invoked with (typed) columns in a [Dataset.select] or [selectTyped] call. + * Alternatively it can be registered for SQL calls using [register]. + * + * @see org.apache.spark.sql.expressions.UserDefinedFunction + * @see NamedUserDefinedFunction21 + * @see udf + */ +open class UserDefinedFunction21( + override val udf: SparkUserDefinedFunction, + override val encoder: Encoder, +): UserDefinedFunction> { + + /** + * Allows this UDF to be called in typed manner using columns in a [Dataset.selectTyped] call. + * @see typedCol to create typed columns. + * @see org.apache.spark.sql.expressions.UserDefinedFunction.apply + */ + operator fun invoke(param0: TypedColumn, param1: TypedColumn, param2: TypedColumn, param3: TypedColumn, param4: TypedColumn, param5: TypedColumn, param6: TypedColumn, param7: TypedColumn, param8: TypedColumn, param9: TypedColumn, param10: TypedColumn, param11: TypedColumn, param12: TypedColumn, param13: TypedColumn, param14: TypedColumn, param15: TypedColumn, param16: TypedColumn, param17: TypedColumn, param18: TypedColumn, param19: TypedColumn, param20: TypedColumn): TypedColumn = invokeUntyped(*arrayOf(param0, param1, param2, param3, param4, param5, param6, param7, param8, param9, param10, param11, param12, param13, param14, param15, param16, param17, param18, param19, param20)).`as`(encoder) as TypedColumn + + /** + * Returns an expression that invokes the UDF in untyped manner, using the given arguments. + * @see org.apache.spark.sql.expressions.UserDefinedFunction.apply + */ + operator fun invoke(param0: Column, param1: Column, param2: Column, param3: Column, param4: Column, param5: Column, param6: Column, param7: Column, param8: Column, param9: Column, param10: Column, param11: Column, param12: Column, param13: Column, param14: Column, param15: Column, param16: Column, param17: Column, param18: Column, param19: Column, param20: Column): Column = invokeUntyped(*arrayOf(param0, param1, param2, param3, param4, param5, param6, param7, param8, param9, param10, param11, param12, param13, param14, param15, param16, param17, param18, param19, param20)) + + /** + * Returns an expression that invokes the UDF in untyped manner, using the given arguments. + * @see org.apache.spark.sql.expressions.UserDefinedFunction.apply + */ + fun invokeUntyped(param0: Column, param1: Column, param2: Column, param3: Column, param4: Column, param5: Column, param6: Column, param7: Column, param8: Column, param9: Column, param10: Column, param11: Column, param12: Column, param13: Column, param14: Column, param15: Column, param16: Column, param17: Column, param18: Column, param19: Column, param20: Column): Column = invokeUntyped(*arrayOf(param0, param1, param2, param3, param4, param5, param6, param7, param8, param9, param10, param11, param12, param13, param14, param15, param16, param17, param18, param19, param20)) + + /** Returns named variant of this UDF. */ + override fun withName(name: String): NamedUserDefinedFunction21 = NamedUserDefinedFunction21( + name = name, + udf = udf, + encoder = encoder, + ) + + /** + * Returns named variant of this UDF. + * @see withName + */ + override fun getValue(thisRef: Any?, property: KProperty<*>): NamedUserDefinedFunction21 = + withName(property.name) +} + +/** + * Instance of a UDF with 21 arguments with name. + * This UDF can be invoked with (typed) columns in a [Dataset.select] or [selectTyped] call. + * Alternatively it can be registered for SQL calls using [register]. + * + * @see org.apache.spark.sql.expressions.UserDefinedFunction + * @see UserDefinedFunction21 + * @see udf + */ +class NamedUserDefinedFunction21( + override val name: String, + udf: SparkUserDefinedFunction, + encoder: Encoder, +): NamedUserDefinedFunction>, + UserDefinedFunction21(udf = udf.withName(name), encoder = encoder) + +/** + * Creates a UDF ([NamedUserDefinedFunction21]) from a function reference adapting its name by reflection. + * For example: `val myUdf = udf(::myFunction)` + * @param func function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +inline fun udf( + func: KProperty0<(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18, T19, T20, T21) -> R>, + nondeterministic: Boolean = false, +): NamedUserDefinedFunction21 = udf(func.name, func, nondeterministic) + +/** + * Creates a UDF ([NamedUserDefinedFunction21]) from a function reference. + * For example: `val myUdf = udf("myFunction", ::myFunction)` + * @param name Optional. Name for the UDF. + * @param func function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +inline fun udf( + name: String, + func: KProperty0<(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18, T19, T20, T21) -> R>, + nondeterministic: Boolean = false, +): NamedUserDefinedFunction21 = udf(name, nondeterministic, func.get()) + +/** + * Creates and registers a UDF ([NamedUserDefinedFunction21]) from a function reference adapting its name by reflection. + * For example: `val myUdf = udf.register(::myFunction)` + * @param func function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +inline fun UDFRegistration.register( + func: KProperty0<(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18, T19, T20, T21) -> R>, + nondeterministic: Boolean = false, +): NamedUserDefinedFunction21 = register(udf(func, nondeterministic)) + +/** + * Creates and registers a UDF ([NamedUserDefinedFunction21]) from a function reference. + * For example: `val myUdf = udf.register("myFunction", ::myFunction)` + * @param name Optional. Name for the UDF. + * @param func function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +inline fun UDFRegistration.register( + name: String, + func: KProperty0<(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18, T19, T20, T21) -> R>, + nondeterministic: Boolean = false, +): NamedUserDefinedFunction21 = register(udf(name, func, nondeterministic)) + +/** + * Creates a UDF ([NamedUserDefinedFunction21]) from a function reference adapting its name by reflection. + * For example: `val myUdf = udf(::myFunction)` + * @param func function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +inline fun udf( + func: KFunction21, + nondeterministic: Boolean = false, +): NamedUserDefinedFunction21 = udf(func.name, func, nondeterministic) + +/** + * Creates a UDF ([NamedUserDefinedFunction21]) from a function reference. + * For example: `val myUdf = udf("myFunction", ::myFunction)` + * @param name Optional. Name for the UDF. + * @param func function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +inline fun udf( + name: String, + func: KFunction21, + nondeterministic: Boolean = false, +): NamedUserDefinedFunction21 = udf(name, nondeterministic, func) + +/** + * Creates and registers a UDF ([NamedUserDefinedFunction21]) from a function reference adapting its name by reflection. + * For example: `val myUdf = udf.register(::myFunction)` + * @param func function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +inline fun UDFRegistration.register( + func: KFunction21, + nondeterministic: Boolean = false, +): NamedUserDefinedFunction21 = register(udf(func, nondeterministic)) + +/** + * Creates and registers a UDF ([NamedUserDefinedFunction21]) from a function reference. + * For example: `val myUdf = udf.register("myFunction", ::myFunction)` + * @param name Optional. Name for the UDF. + * @param func function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +inline fun UDFRegistration.register( + name: String, + func: KFunction21, + nondeterministic: Boolean = false, +): NamedUserDefinedFunction21 = register(udf(name, func, nondeterministic)) + + +/** Kotlin wrapper around UDF interface to ensure nullability in types. */ +fun interface UDF21 : org.apache.spark.sql.api.java.UDF21 { override fun call(t1: T1, t2: T2, t3: T3, t4: T4, t5: T5, t6: T6, t7: T7, t8: T8, t9: T9, t10: T10, t11: T11, t12: T12, t13: T13, t14: T14, t15: T15, t16: T16, t17: T17, t18: T18, t19: T19, t20: T20, t21: T21): R } + +/** + * Defines a named UDF ([NamedUserDefinedFunction21]) instance based on the (lambda) function [func]. + * For example: `val myUdf = udf("myUdf") { t1: T1, t2: T2, t3: T3, t4: T4, t5: T5, t6: T6, t7: T7, t8: T8, t9: T9, t10: T10, t11: T11, t12: T12, t13: T13, t14: T14, t15: T15, t16: T16, t17: T17, t18: T18, t19: T19, t20: T20, t21: T21 -> ... }` + * Name can also be supplied using delegate: `val myUdf by udf { t1: T1, t2: T2, t3: T3, t4: T4, t5: T5, t6: T6, t7: T7, t8: T8, t9: T9, t10: T10, t11: T11, t12: T12, t13: T13, t14: T14, t15: T15, t16: T16, t17: T17, t18: T18, t19: T19, t20: T20, t21: T21 -> ... }` + * @see UserDefinedFunction.getValue + * + * @param name The name for this UDF. + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @param func The function to convert to a UDF. Can be a lambda. + */ +inline fun udf( + name: String, + nondeterministic: Boolean = false, + func: UDF21, +): NamedUserDefinedFunction21 = + udf(nondeterministic, func).withName(name) + +/** + * Defines a UDF ([UserDefinedFunction21]) instance based on the (lambda) function [func]. + * For example: `val myUdf = udf { t1: T1, t2: T2, t3: T3, t4: T4, t5: T5, t6: T6, t7: T7, t8: T8, t9: T9, t10: T10, t11: T11, t12: T12, t13: T13, t14: T14, t15: T15, t16: T16, t17: T17, t18: T18, t19: T19, t20: T20, t21: T21 -> ... }` + * + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @param func The function to convert to a UDF. Can be a lambda. + */ +inline fun udf( + nondeterministic: Boolean = false, + func: UDF21, +): UserDefinedFunction21 { + T1::class.checkForValidType("T1") + T2::class.checkForValidType("T2") + T3::class.checkForValidType("T3") + T4::class.checkForValidType("T4") + T5::class.checkForValidType("T5") + T6::class.checkForValidType("T6") + T7::class.checkForValidType("T7") + T8::class.checkForValidType("T8") + T9::class.checkForValidType("T9") + T10::class.checkForValidType("T10") + T11::class.checkForValidType("T11") + T12::class.checkForValidType("T12") + T13::class.checkForValidType("T13") + T14::class.checkForValidType("T14") + T15::class.checkForValidType("T15") + T16::class.checkForValidType("T16") + T17::class.checkForValidType("T17") + T18::class.checkForValidType("T18") + T19::class.checkForValidType("T19") + T20::class.checkForValidType("T20") + T21::class.checkForValidType("T21") + + return UserDefinedFunction21( + udf = functions.udf(func, schema(typeOf()).unWrap()) + .let { if (nondeterministic) it.asNondeterministic() else it } + .let { if (typeOf().isMarkedNullable) it else it.asNonNullable() }, + encoder = encoder(), + ) +} + +/** + * Defines and registers a named UDF ([NamedUserDefinedFunction21]) instance based on the (lambda) function [func]. + * For example: `val myUdf = udf.register("myUdf") { t1: T1, t2: T2, t3: T3, t4: T4, t5: T5, t6: T6, t7: T7, t8: T8, t9: T9, t10: T10, t11: T11, t12: T12, t13: T13, t14: T14, t15: T15, t16: T16, t17: T17, t18: T18, t19: T19, t20: T20, t21: T21 -> ... }` + * + * @param name The name for this UDF. + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @param func The function to convert to a UDF. Can be a lambda. + */ +inline fun UDFRegistration.register( + name: String, + nondeterministic: Boolean = false, + func: UDF21, +): NamedUserDefinedFunction21 = + register(udf(name, nondeterministic, func)) + +/** + * Instance of a UDF with 22 arguments. + * This UDF can be invoked with (typed) columns in a [Dataset.select] or [selectTyped] call. + * Alternatively it can be registered for SQL calls using [register]. + * + * @see org.apache.spark.sql.expressions.UserDefinedFunction + * @see NamedUserDefinedFunction22 + * @see udf + */ +open class UserDefinedFunction22( + override val udf: SparkUserDefinedFunction, + override val encoder: Encoder, +): UserDefinedFunction> { + + /** + * Allows this UDF to be called in typed manner using columns in a [Dataset.selectTyped] call. + * @see typedCol to create typed columns. + * @see org.apache.spark.sql.expressions.UserDefinedFunction.apply + */ + operator fun invoke(param0: TypedColumn, param1: TypedColumn, param2: TypedColumn, param3: TypedColumn, param4: TypedColumn, param5: TypedColumn, param6: TypedColumn, param7: TypedColumn, param8: TypedColumn, param9: TypedColumn, param10: TypedColumn, param11: TypedColumn, param12: TypedColumn, param13: TypedColumn, param14: TypedColumn, param15: TypedColumn, param16: TypedColumn, param17: TypedColumn, param18: TypedColumn, param19: TypedColumn, param20: TypedColumn, param21: TypedColumn): TypedColumn = invokeUntyped(*arrayOf(param0, param1, param2, param3, param4, param5, param6, param7, param8, param9, param10, param11, param12, param13, param14, param15, param16, param17, param18, param19, param20, param21)).`as`(encoder) as TypedColumn + + /** + * Returns an expression that invokes the UDF in untyped manner, using the given arguments. + * @see org.apache.spark.sql.expressions.UserDefinedFunction.apply + */ + operator fun invoke(param0: Column, param1: Column, param2: Column, param3: Column, param4: Column, param5: Column, param6: Column, param7: Column, param8: Column, param9: Column, param10: Column, param11: Column, param12: Column, param13: Column, param14: Column, param15: Column, param16: Column, param17: Column, param18: Column, param19: Column, param20: Column, param21: Column): Column = invokeUntyped(*arrayOf(param0, param1, param2, param3, param4, param5, param6, param7, param8, param9, param10, param11, param12, param13, param14, param15, param16, param17, param18, param19, param20, param21)) + + /** + * Returns an expression that invokes the UDF in untyped manner, using the given arguments. + * @see org.apache.spark.sql.expressions.UserDefinedFunction.apply + */ + fun invokeUntyped(param0: Column, param1: Column, param2: Column, param3: Column, param4: Column, param5: Column, param6: Column, param7: Column, param8: Column, param9: Column, param10: Column, param11: Column, param12: Column, param13: Column, param14: Column, param15: Column, param16: Column, param17: Column, param18: Column, param19: Column, param20: Column, param21: Column): Column = invokeUntyped(*arrayOf(param0, param1, param2, param3, param4, param5, param6, param7, param8, param9, param10, param11, param12, param13, param14, param15, param16, param17, param18, param19, param20, param21)) + + /** Returns named variant of this UDF. */ + override fun withName(name: String): NamedUserDefinedFunction22 = NamedUserDefinedFunction22( + name = name, + udf = udf, + encoder = encoder, + ) + + /** + * Returns named variant of this UDF. + * @see withName + */ + override fun getValue(thisRef: Any?, property: KProperty<*>): NamedUserDefinedFunction22 = + withName(property.name) +} + +/** + * Instance of a UDF with 22 arguments with name. + * This UDF can be invoked with (typed) columns in a [Dataset.select] or [selectTyped] call. + * Alternatively it can be registered for SQL calls using [register]. + * + * @see org.apache.spark.sql.expressions.UserDefinedFunction + * @see UserDefinedFunction22 + * @see udf + */ +class NamedUserDefinedFunction22( + override val name: String, + udf: SparkUserDefinedFunction, + encoder: Encoder, +): NamedUserDefinedFunction>, + UserDefinedFunction22(udf = udf.withName(name), encoder = encoder) + +/** + * Creates a UDF ([NamedUserDefinedFunction22]) from a function reference adapting its name by reflection. + * For example: `val myUdf = udf(::myFunction)` + * @param func function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +inline fun udf( + func: KProperty0<(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22) -> R>, + nondeterministic: Boolean = false, +): NamedUserDefinedFunction22 = udf(func.name, func, nondeterministic) + +/** + * Creates a UDF ([NamedUserDefinedFunction22]) from a function reference. + * For example: `val myUdf = udf("myFunction", ::myFunction)` + * @param name Optional. Name for the UDF. + * @param func function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +inline fun udf( + name: String, + func: KProperty0<(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22) -> R>, + nondeterministic: Boolean = false, +): NamedUserDefinedFunction22 = udf(name, nondeterministic, func.get()) + +/** + * Creates and registers a UDF ([NamedUserDefinedFunction22]) from a function reference adapting its name by reflection. + * For example: `val myUdf = udf.register(::myFunction)` + * @param func function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +inline fun UDFRegistration.register( + func: KProperty0<(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22) -> R>, + nondeterministic: Boolean = false, +): NamedUserDefinedFunction22 = register(udf(func, nondeterministic)) + +/** + * Creates and registers a UDF ([NamedUserDefinedFunction22]) from a function reference. + * For example: `val myUdf = udf.register("myFunction", ::myFunction)` + * @param name Optional. Name for the UDF. + * @param func function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +inline fun UDFRegistration.register( + name: String, + func: KProperty0<(T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22) -> R>, + nondeterministic: Boolean = false, +): NamedUserDefinedFunction22 = register(udf(name, func, nondeterministic)) + +/** + * Creates a UDF ([NamedUserDefinedFunction22]) from a function reference adapting its name by reflection. + * For example: `val myUdf = udf(::myFunction)` + * @param func function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +inline fun udf( + func: KFunction22, + nondeterministic: Boolean = false, +): NamedUserDefinedFunction22 = udf(func.name, func, nondeterministic) + +/** + * Creates a UDF ([NamedUserDefinedFunction22]) from a function reference. + * For example: `val myUdf = udf("myFunction", ::myFunction)` + * @param name Optional. Name for the UDF. + * @param func function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +inline fun udf( + name: String, + func: KFunction22, + nondeterministic: Boolean = false, +): NamedUserDefinedFunction22 = udf(name, nondeterministic, func) + +/** + * Creates and registers a UDF ([NamedUserDefinedFunction22]) from a function reference adapting its name by reflection. + * For example: `val myUdf = udf.register(::myFunction)` + * @param func function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +inline fun UDFRegistration.register( + func: KFunction22, + nondeterministic: Boolean = false, +): NamedUserDefinedFunction22 = register(udf(func, nondeterministic)) + +/** + * Creates and registers a UDF ([NamedUserDefinedFunction22]) from a function reference. + * For example: `val myUdf = udf.register("myFunction", ::myFunction)` + * @param name Optional. Name for the UDF. + * @param func function reference + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @see udf + */ +inline fun UDFRegistration.register( + name: String, + func: KFunction22, + nondeterministic: Boolean = false, +): NamedUserDefinedFunction22 = register(udf(name, func, nondeterministic)) + + +/** Kotlin wrapper around UDF interface to ensure nullability in types. */ +fun interface UDF22 : org.apache.spark.sql.api.java.UDF22 { override fun call(t1: T1, t2: T2, t3: T3, t4: T4, t5: T5, t6: T6, t7: T7, t8: T8, t9: T9, t10: T10, t11: T11, t12: T12, t13: T13, t14: T14, t15: T15, t16: T16, t17: T17, t18: T18, t19: T19, t20: T20, t21: T21, t22: T22): R } + +/** + * Defines a named UDF ([NamedUserDefinedFunction22]) instance based on the (lambda) function [func]. + * For example: `val myUdf = udf("myUdf") { t1: T1, t2: T2, t3: T3, t4: T4, t5: T5, t6: T6, t7: T7, t8: T8, t9: T9, t10: T10, t11: T11, t12: T12, t13: T13, t14: T14, t15: T15, t16: T16, t17: T17, t18: T18, t19: T19, t20: T20, t21: T21, t22: T22 -> ... }` + * Name can also be supplied using delegate: `val myUdf by udf { t1: T1, t2: T2, t3: T3, t4: T4, t5: T5, t6: T6, t7: T7, t8: T8, t9: T9, t10: T10, t11: T11, t12: T12, t13: T13, t14: T14, t15: T15, t16: T16, t17: T17, t18: T18, t19: T19, t20: T20, t21: T21, t22: T22 -> ... }` + * @see UserDefinedFunction.getValue + * + * @param name The name for this UDF. + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @param func The function to convert to a UDF. Can be a lambda. + */ +inline fun udf( + name: String, + nondeterministic: Boolean = false, + func: UDF22, +): NamedUserDefinedFunction22 = + udf(nondeterministic, func).withName(name) + +/** + * Defines a UDF ([UserDefinedFunction22]) instance based on the (lambda) function [func]. + * For example: `val myUdf = udf { t1: T1, t2: T2, t3: T3, t4: T4, t5: T5, t6: T6, t7: T7, t8: T8, t9: T9, t10: T10, t11: T11, t12: T12, t13: T13, t14: T14, t15: T15, t16: T16, t17: T17, t18: T18, t19: T19, t20: T20, t21: T21, t22: T22 -> ... }` + * + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @param func The function to convert to a UDF. Can be a lambda. + */ +inline fun udf( + nondeterministic: Boolean = false, + func: UDF22, +): UserDefinedFunction22 { + T1::class.checkForValidType("T1") + T2::class.checkForValidType("T2") + T3::class.checkForValidType("T3") + T4::class.checkForValidType("T4") + T5::class.checkForValidType("T5") + T6::class.checkForValidType("T6") + T7::class.checkForValidType("T7") + T8::class.checkForValidType("T8") + T9::class.checkForValidType("T9") + T10::class.checkForValidType("T10") + T11::class.checkForValidType("T11") + T12::class.checkForValidType("T12") + T13::class.checkForValidType("T13") + T14::class.checkForValidType("T14") + T15::class.checkForValidType("T15") + T16::class.checkForValidType("T16") + T17::class.checkForValidType("T17") + T18::class.checkForValidType("T18") + T19::class.checkForValidType("T19") + T20::class.checkForValidType("T20") + T21::class.checkForValidType("T21") + T22::class.checkForValidType("T22") + + return UserDefinedFunction22( + udf = functions.udf(func, schema(typeOf()).unWrap()) + .let { if (nondeterministic) it.asNondeterministic() else it } + .let { if (typeOf().isMarkedNullable) it else it.asNonNullable() }, + encoder = encoder(), + ) +} + +/** + * Defines and registers a named UDF ([NamedUserDefinedFunction22]) instance based on the (lambda) function [func]. + * For example: `val myUdf = udf.register("myUdf") { t1: T1, t2: T2, t3: T3, t4: T4, t5: T5, t6: T6, t7: T7, t8: T8, t9: T9, t10: T10, t11: T11, t12: T12, t13: T13, t14: T14, t15: T15, t16: T16, t17: T17, t18: T18, t19: T19, t20: T20, t21: T21, t22: T22 -> ... }` + * + * @param name The name for this UDF. + * @param nondeterministic Optional. If true, sets the UserDefinedFunction as nondeterministic. + * @param func The function to convert to a UDF. Can be a lambda. + */ +inline fun UDFRegistration.register( + name: String, + nondeterministic: Boolean = false, + func: UDF22, +): NamedUserDefinedFunction22 = + register(udf(name, nondeterministic, func)) + + + + diff --git a/kotlin-spark-api/3.2/src/test/kotlin/org/jetbrains/kotlinx/spark/api/DatasetFunctionTest.kt b/kotlin-spark-api/3.2/src/test/kotlin/org/jetbrains/kotlinx/spark/api/DatasetFunctionTest.kt index 26dcceaf..1cf6b861 100644 --- a/kotlin-spark-api/3.2/src/test/kotlin/org/jetbrains/kotlinx/spark/api/DatasetFunctionTest.kt +++ b/kotlin-spark-api/3.2/src/test/kotlin/org/jetbrains/kotlinx/spark/api/DatasetFunctionTest.kt @@ -29,6 +29,7 @@ import org.apache.spark.api.java.JavaRDD import org.apache.spark.rdd.RDD import org.apache.spark.sql.Dataset import org.apache.spark.sql.functions +import org.apache.spark.sql.functions.col import org.apache.spark.sql.streaming.GroupState import org.apache.spark.sql.streaming.GroupStateTimeout import org.jetbrains.kotlinx.spark.api.tuples.* @@ -47,7 +48,7 @@ class DatasetFunctionTest : ShouldSpec({ val result = dsOf(1, 2, 3, 4, 5) .map { it X (it + 2) } .withCached { - expect(collectAsList()).contains.inAnyOrder.only.values( + expect(collectAsList()).toContain.inAnyOrder.only.values( 1 X 3, 2 X 4, 3 X 5, @@ -56,14 +57,14 @@ class DatasetFunctionTest : ShouldSpec({ ) val next = filter { it._1 % 2 == 0 } - expect(next.collectAsList()).contains.inAnyOrder.only.values(2 X 4, 4 X 6) + expect(next.collectAsList()).toContain.inAnyOrder.only.values(2 X 4, 4 X 6) next } .map { it: Tuple2 -> it + (it._1 + it._2) * 2 } .collectAsList() - expect(result).contains.inOrder.only.values(2 X 4 X 12, 4 X 6 X 20) + expect(result).toContain.inOrder.only.values(2 X 4 X 12, 4 X 6 X 20) } should("handle join operations") { @@ -77,7 +78,7 @@ class DatasetFunctionTest : ShouldSpec({ .leftJoin(second, first.col("id") eq second.col("id")) .map { it._1.id X it._1.name X it._2?.value } .collectAsList() - expect(result).contains.inOrder.only.values(t(1, "a", 100), t(2, "b", null)) + expect(result).toContain.inOrder.only.values(t(1, "a", 100), t(2, "b", null)) } should("handle map operations") { @@ -86,7 +87,7 @@ class DatasetFunctionTest : ShouldSpec({ .map { it + 4 } .filter { it < 10 } .collectAsList() - expect(result).contains.inAnyOrder.only.values(5, 6, 7, 8, 7, 8, 9) + expect(result).toContain.inAnyOrder.only.values(5, 6, 7, 8, 7, 8, 9) } should("Allow simple forEachPartition in datasets") { @@ -341,25 +342,25 @@ class DatasetFunctionTest : ShouldSpec({ SomeClass(intArrayOf(1, 2, 4), 5), ) - val newDS1WithAs: Dataset = dataset.selectTyped( - functions.col("a").`as`(), + val newDS1WithAs: Dataset = dataset.select( + col("a").typed(), ) newDS1WithAs.collectAsList() - val newDS2: Dataset> = dataset.selectTyped( + val newDS2: Dataset> = dataset.select( col(SomeClass::a), // NOTE: this only works on 3.0, returning a data class with an array in it col(SomeClass::b), ) newDS2.collectAsList() - val newDS3: Dataset> = dataset.selectTyped( + val newDS3: Dataset> = dataset.select( col(SomeClass::a), col(SomeClass::b), col(SomeClass::b), ) newDS3.collectAsList() - val newDS4: Dataset> = dataset.selectTyped( + val newDS4: Dataset> = dataset.select( col(SomeClass::a), col(SomeClass::b), col(SomeClass::b), @@ -367,7 +368,7 @@ class DatasetFunctionTest : ShouldSpec({ ) newDS4.collectAsList() - val newDS5: Dataset> = dataset.selectTyped( + val newDS5: Dataset> = dataset.select( col(SomeClass::a), col(SomeClass::b), col(SomeClass::b), @@ -440,7 +441,7 @@ class DatasetFunctionTest : ShouldSpec({ ) dataset.collectAsList() - val column = functions.col("b").`as`() + val column = dataset.col<_, IntArray>("b") val b = dataset.where(column gt 3 and col(SomeOtherClass::c)) diff --git a/kotlin-spark-api/3.2/src/test/kotlin/org/jetbrains/kotlinx/spark/api/UDFRegisterTest.kt b/kotlin-spark-api/3.2/src/test/kotlin/org/jetbrains/kotlinx/spark/api/UDFRegisterTest.kt deleted file mode 100644 index df3525ef..00000000 --- a/kotlin-spark-api/3.2/src/test/kotlin/org/jetbrains/kotlinx/spark/api/UDFRegisterTest.kt +++ /dev/null @@ -1,167 +0,0 @@ -/*- - * =LICENSE= - * Kotlin Spark API: API for Spark 2.4+ (Scala 2.12) - * ---------- - * Copyright (C) 2019 - 2021 JetBrains - * ---------- - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * =LICENSEEND= - */ -package org.jetbrains.kotlinx.spark.api - -import io.kotest.core.spec.style.ShouldSpec -import io.kotest.matchers.shouldBe -import org.apache.spark.sql.Dataset -import org.junit.jupiter.api.assertThrows -import scala.collection.JavaConverters -import scala.collection.mutable.WrappedArray - -@Suppress("unused") -private fun scala.collection.Iterable.asIterable(): Iterable = JavaConverters.asJavaIterable(this) - -@Suppress("unused") -class UDFRegisterTest : ShouldSpec({ - context("org.jetbrains.kotlinx.spark.api.UDFRegister") { - context("the function checkForValidType") { - val invalidTypes = listOf( - Array::class, - Iterable::class, - List::class, - MutableList::class, - ByteArray::class, - CharArray::class, - ShortArray::class, - IntArray::class, - LongArray::class, - FloatArray::class, - DoubleArray::class, - BooleanArray::class, - Map::class, - MutableMap::class, - Set::class, - MutableSet::class, - arrayOf("")::class, - listOf("")::class, - setOf("")::class, - mapOf("" to "")::class, - mutableListOf("")::class, - mutableSetOf("")::class, - mutableMapOf("" to "")::class, - ) - invalidTypes.forEachIndexed { index, invalidType -> - should("$index: throw an ${TypeOfUDFParameterNotSupportedException::class.simpleName} when encountering ${invalidType.qualifiedName}") { - assertThrows { - invalidType.checkForValidType("test") - } - } - } - } - - context("the register-function") { - withSpark { - - should("fail when using a simple kotlin.Array") { - assertThrows { - udf.register("shouldFail") { array: Array -> - array.joinToString(" ") - } - } - } - - should("succeed when using a WrappedArray") { - udf.register("shouldSucceed") { array: WrappedArray -> - array.asIterable().joinToString(" ") - } - } - - should("succeed when return a List") { - udf.register>("StringToIntList") { a -> - a.asIterable().map { it.code } - } - - val result = spark.sql("select StringToIntList('ab')").`as`>().collectAsList() - result shouldBe listOf(listOf(97, 98)) - } - - should("succeed when using three type udf and as result to udf return type") { - listOf("a" to 1, "b" to 2).toDS().toDF().createOrReplaceTempView("test1") - udf.register("stringIntDiff") { a, b -> - a[0].code - b - } - val result = spark.sql("select stringIntDiff(first, second) from test1").`as`().collectAsList() - result shouldBe listOf(96, 96) - } - } - } - - context("calling the UDF-Wrapper") { - withSpark(logLevel = SparkLogLevel.DEBUG) { - should("succeed call UDF-Wrapper in withColumn") { - - val stringArrayMerger = udf.register, String>("stringArrayMerger") { - it.asIterable().joinToString(" ") - } - - val testData = dsOf(listOf("a", "b")) - val newData = testData.withColumn("text", stringArrayMerger(testData.col("value"))) - - newData.select("text").collectAsList().zip(newData.select("value").collectAsList()) - .forEach { (text, textArray) -> - assert(text.getString(0) == textArray.getList(0).joinToString(" ")) - } - } - - - should("succeed in dataset") { - val dataset: Dataset = listOf( - NormalClass(name = "a", age = 10), - NormalClass(name = "b", age = 20) - ).toDS() - - val udfWrapper = udf.register("nameConcatAge") { name, age -> - "$name-$age" - } - - val collectAsList = dataset.withColumn( - "nameAndAge", - udfWrapper(dataset.col("name"), dataset.col("age")) - ) - .select("nameAndAge") - .collectAsList() - - collectAsList[0][0] shouldBe "a-10" - collectAsList[1][0] shouldBe "b-20" - } - } - } - - context("udf return data class") { - withSpark(logLevel = SparkLogLevel.DEBUG) { - should("return NormalClass") { - listOf("a" to 1, "b" to 2).toDS().toDF().createOrReplaceTempView("test2") - udf.register("toNormalClass") { a, b -> - NormalClass(b, a) - } - spark.sql("select toNormalClass(first, second) from test2").show() - } - } - } - - } -}) - - -data class NormalClass( - val age: Int, - val name: String -) diff --git a/kotlin-spark-api/3.2/src/test/kotlin/org/jetbrains/kotlinx/spark/api/UDFTest.kt b/kotlin-spark-api/3.2/src/test/kotlin/org/jetbrains/kotlinx/spark/api/UDFTest.kt new file mode 100644 index 00000000..a4848d40 --- /dev/null +++ b/kotlin-spark-api/3.2/src/test/kotlin/org/jetbrains/kotlinx/spark/api/UDFTest.kt @@ -0,0 +1,1326 @@ +/*- + * =LICENSE= + * Kotlin Spark API: API for Spark 2.4+ (Scala 2.12) + * ---------- + * Copyright (C) 2019 - 2021 JetBrains + * ---------- + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * =LICENSEEND= + */ +@file:Suppress("SqlNoDataSourceInspection") + +package org.jetbrains.kotlinx.spark.api + +import io.kotest.assertions.throwables.shouldThrow +import io.kotest.core.spec.style.ShouldSpec +import io.kotest.matchers.should +import io.kotest.matchers.shouldBe +import io.kotest.matchers.shouldNotBe +import io.kotest.matchers.types.beOfType +import org.apache.spark.sql.AnalysisException +import org.apache.spark.sql.Dataset +import org.apache.spark.sql.Encoder +import org.apache.spark.sql.Row +import org.apache.spark.sql.expressions.Aggregator +import org.intellij.lang.annotations.Language +import org.junit.jupiter.api.assertThrows +import scala.collection.Seq +import scala.collection.mutable.WrappedArray +import java.io.Serializable +import kotlin.random.Random + +@Suppress("unused") +class UDFTest : ShouldSpec({ + + context("UDF tests") { + context("the function checkForValidType") { + val invalidTypes = listOf( + Char::class, + Array::class, + Iterable::class, + List::class, + MutableList::class, + ByteArray::class, + CharArray::class, + ShortArray::class, + IntArray::class, + LongArray::class, + FloatArray::class, + DoubleArray::class, + BooleanArray::class, + Map::class, + MutableMap::class, + Set::class, + MutableSet::class, + arrayOf("")::class, + listOf("")::class, + setOf("")::class, + mapOf("" to "")::class, + mutableListOf("")::class, + mutableSetOf("")::class, + mutableMapOf("" to "")::class, + ) + invalidTypes.forEachIndexed { index, invalidType -> + should("$index: throw an ${TypeOfUDFParameterNotSupportedException::class.simpleName} when encountering ${invalidType.qualifiedName}") { + assertThrows { + invalidType.checkForValidType("test") + } + } + } + } + + context("Test all possible notations") { + withSpark(logLevel = SparkLogLevel.DEBUG) { + should("Support official spark notation") { + + val random = udf(nondeterministic = true) { -> Math.random() } + spark.udf().register("random", random) + spark.sql("SELECT random()").show() + + + val plusOne = udf { x: Int -> x + 1 } + spark.udf().register("plusOne", plusOne) + spark.sql("SELECT plusOne(5)").show() + + spark.udf().register("strLenKotlin") { str: String, int: Int -> str.length + int } + spark.sql("SELECT strLenKotlin('test', 1)").show() + + spark.udf().register("oneArgFilter") { n: Long -> n > 5 } + spark.range(1, 10).createOrReplaceTempView("test") + spark.sql("SELECT * FROM test WHERE oneArgFilter(id)").show() + } + + should("Have named UDFs which do not require a name for registering") { + val plusOne = udf("plusOne1") { x: Int -> x + 1 } + udf.register(plusOne) + spark.sql("SELECT plusOne1(5)").show() + } + + should("Have named UDFs which do not require a name for registering (other register option)") { + val plusOne = udf("plusOne2") { x: Int -> x + 1 } + plusOne.register() + spark.sql("SELECT plusOne2(5)").show() + } + + should("Allow named udf to register with different name") { + val plusOne = udf("plusOne") { x: Int -> x + 1 } + plusOne.name shouldBe "plusOne" + + val plusOne3 = udf.register("plusOne3", plusOne) + plusOne3.name shouldBe "plusOne3" + + spark.sql("SELECT plusOne3(5)").show() + } + + should("Allow named udf to register with different name (other register option)") { + val plusOne = udf("plusOne") { x: Int -> x + 1 } + plusOne.name shouldBe "plusOne" + + val plusOne3 = plusOne.register("plusOne3a") + plusOne3.name shouldBe "plusOne3a" + + spark.sql("SELECT plusOne3a(5)").show() + } + + should("Only register newest name") { + fun plusOne(x: Int) = x + 1 + + val plusOneUdf = udf(::plusOne) + plusOneUdf.name shouldBe "plusOne" + + val plusOne0 = udf("plusOne0", ::plusOne) + plusOne0.name shouldBe "plusOne0" + + val plusOne1 = plusOne0.withName("plusOne1") + plusOne1.name shouldBe "plusOne1" + + val plusOne2 by plusOne1 + plusOne2.name shouldBe "plusOne2" + + val plusOne3 by plusOne2 + plusOne3.name shouldBe "plusOne3" + + val plusOne4 = udf.register("plusOne4", plusOne3) + plusOne4.name shouldBe "plusOne4" + + val plusOne5 = plusOne4.register("plusOne5") + plusOne5.name shouldBe "plusOne5" + + spark.sql("SELECT plusOne4(5)").show() + spark.sql("SELECT plusOne5(5)").show() + } + + should("Allow udf to be registered from function ref") { + fun addTwo(x: Int, y: Int) = x + y + + val addTwo = udf.register(::addTwo) + addTwo.name shouldBe "addTwo" + + val addTwo1 = udf.register("addTwo1", ::addTwo) + addTwo1.name shouldBe "addTwo1" + + spark.sql("SELECT addTwo(1, 2)") + spark.sql("SELECT addTwo1(1, 2)") + + } + + should("Allow udf to be registered from property function ref") { + val addTwo = udf.register(::addTwoConst) + addTwo.name shouldBe "addTwoConst" + + val addTwo1 = udf.register("addTwoConst1", ::addTwoConst) + addTwo1.name shouldBe "addTwoConst1" + + spark.sql("SELECT addTwoConst(1, 2)") + spark.sql("SELECT addTwoConst1(1, 2)") + } + } + } + + context("the register-function") { + withSpark { + + should("fail when using a simple kotlin.Array") { + assertThrows { + udf.register("shouldFail", func = { array: Array -> + array.joinToString(" ") + }) + } + + assertThrows { + udf(func = { array: Array -> + array.joinToString(" ") + }) + } + } + + should("succeed when using a WrappedArray") { + udf.register("shouldSucceed") { array: WrappedArray -> + array.asKotlinIterable().joinToString(" ") + } + } + + should("succeed when using a Seq") { + udf.register("shouldSucceed") { array: Seq -> + array.asKotlinIterable().joinToString(" ") + } + } + + should("succeed when return a List") { + udf.register("StringToIntList") { a: String -> + a.asIterable().map { it.code } + } + + @Language("SQL") + val result = spark.sql("SELECT StringToIntList('ab')").to>().collectAsList() + result shouldBe listOf(listOf(97, 98)) + } + + should("succeed when using three type udf and as result to udf return type") { + listOf("a" to 1, "b" to 2).toDS().toDF().createOrReplaceTempView("test1") + + fun stringIntDiff(a: String, b: Int) = a[0].code - b + udf.register(::stringIntDiff) + + @Language("SQL") + val result = spark.sql("SELECT stringIntDiff(first, second) FROM test1").to().collectAsList() + result shouldBe listOf(96, 96) + } + } + } + + context("calling the UDF-Wrapper") { + withSpark(logLevel = SparkLogLevel.DEBUG) { + should("succeed in withColumn") { + + val stringArrayMerger = udf { it: WrappedArray -> + it.asKotlinIterable().joinToString(" ") + } + + val testData = dsOf(arrayOf("a", "b")) + val newData = testData.withColumn( + "text", + stringArrayMerger( + testData.singleCol().asSeq() + ), + ) + + (newData.select("text").collectAsList() zip newData.select("value").collectAsList()) + .forEach { (text, textArray) -> + assert(text.getString(0) == textArray.getList(0).joinToString(" ")) + } + } + + should("succeed in withColumn using Seq") { + + val stringArrayMerger = udf { it: Seq -> + it.asKotlinIterable().joinToString(" ") + } + + val testData = dsOf(arrayOf("a", "b")) + val newData = testData.withColumn( + "text", + stringArrayMerger( + testData.singleCol().asSeq() + ), + ) + + (newData.select("text").collectAsList() zip newData.select("value").collectAsList()) + .forEach { (text, textArray) -> + assert(text.getString(0) == textArray.getList(0).joinToString(" ")) + } + } + + should("succeed in dataset") { + val dataset = listOf( + NormalClass(name = "a", age = 10), + NormalClass(name = "b", age = 20), + ).toDS() + + val nameConcatAge by udf { name: String, age: Int -> + "$name-$age" + } + + val ds = dataset.select( + nameConcatAge( + col(NormalClass::name), + col(NormalClass::age), + ) + ) + ds should beOfType>() + + "${nameConcatAge.name}(${NormalClass::name.name}, ${NormalClass::age.name})" shouldBe ds.columns().single() + + val collectAsList = ds.collectAsList() + collectAsList[0] shouldBe "a-10" + collectAsList[1] shouldBe "b-20" + } + + should("Return Dataset if types are not adhered to") { + val dataset = listOf( + NormalClass(name = "a", age = 10), + NormalClass(name = "b", age = 20), + ).toDS() + + val nameConcatAge by udf { name: String, age: Int -> + "$name-$age" + } + + val ds = dataset.select( + nameConcatAge( + col(NormalClass::name), + col(NormalClass::age).typed<_, Int?>(), + ) + ) + ds should beOfType>() + + "${nameConcatAge.name}(${NormalClass::name.name}, ${NormalClass::age.name})" shouldBe ds.columns().single() + + val collectAsList = ds.collectAsList() + collectAsList[0].getAs(0) shouldBe "a-10" + collectAsList[1].getAs(0) shouldBe "b-20" + } + + should("Return Dataset if using invokeUntyped") { + val dataset = listOf( + NormalClass(name = "a", age = 10), + NormalClass(name = "b", age = 20), + ).toDS() + + val nameConcatAge by udf { name: String, age: Int -> + "$name-$age" + } + + val ds = dataset.select( + nameConcatAge.invokeUntyped( + col(NormalClass::name), + col(NormalClass::age), + ) + ) + ds should beOfType>() + + "${nameConcatAge.name}(${NormalClass::name.name}, ${NormalClass::age.name})" shouldBe ds.columns().single() + + val collectAsList = ds.collectAsList() + collectAsList[0].getAs(0) shouldBe "a-10" + collectAsList[1].getAs(0) shouldBe "b-20" + } + } + } + + context("non deterministic") { + withSpark(logLevel = SparkLogLevel.DEBUG) { + should("allow udfs to be non deterministic") { + + udf.register("random", nondeterministic = true) { -> + Random.nextInt() + } + + val a = spark.sql("SELECT random()") + .select(col<_, Int>("random()")) + .takeAsList(1) + .single() + val b = spark.sql("SELECT random()") + .select(col<_, Int>("random()")) + .takeAsList(1) + .single() + + a shouldNotBe b + } + } + } + + context("non nullable") { + withSpark(logLevel = SparkLogLevel.DEBUG) { + + should("allow udfs to be non nullable") { + udf.register("test") { -> + null + } + + spark.sql("SELECT test()") + .select(col<_, Int?>("test()")) + .showDS() + .takeAsList(1) + .single() + + } + + should("allow udfs to be non nullable using delegate") { + val test by udf { -> null } + + // access it once + test.register() + + spark.sql("SELECT test()") + .select(col<_, Int?>("test()")) + .showDS() + .takeAsList(1) + .single() + + } + } + } + + context("udf return data class") { + withSpark(logLevel = SparkLogLevel.DEBUG) { + should("return NormalClass") { + listOf("a" to 1, "b" to 2).toDS().toDF().createOrReplaceTempView("test2") + + udf.register("toNormalClass") { name: String, age: Int -> + NormalClass(age, name) + } + spark.sql("select toNormalClass(first, second) from test2").show() + } + + should("not return NormalClass when not registered") { + listOf(1 to "a", 2 to "b").toDS().toDF().createOrReplaceTempView("test2") + + val toNormalClass2 = udf("toNormalClass2", ::NormalClass) + + shouldThrow { + spark.sql("select toNormalClass2(first, second) from test2").show() + } + } + + should("return NormalClass using accessed by delegate") { + listOf(1 to "a", 2 to "b").toDS().toDF().createOrReplaceTempView("test2") + val toNormalClass3 = udf("toNormalClass3", ::NormalClass) + toNormalClass3.register() + + spark.sql("select toNormalClass3(first, second) from test2").show() + } + } + } + + } + + context("UDAF tests") { + + context("Test all notations") { + withSpark(logLevel = SparkLogLevel.DEBUG) { + + should("Support Spark notation") { + val ds = dsOf( + Employee("Michael", 3000), + Employee("Andy", 4500), + Employee("Justin", 3500), + Employee("Berta", 4000), + ) + + // Convert the function to a `TypedColumn` and give it a name + val averageSalary = MyAverage.toColumn().name("average_salary") + val result = ds.select(averageSalary) + + result.collectAsList().single() shouldBe 3750.0 + } + + should("Support all udaf creation methods") { + val a = udaf(MyAverage) + a.name shouldBe "MyAverage" + + val b = udaf("myAverage", MyAverage) + b.name shouldBe "myAverage" + + val c = udafUnnamed(MyAverage) + c should beOfType>() + + val d = udaf(aggregator) + d.name shouldBe "Aggregator" + + val e = object : Aggregator(), Serializable { + override fun zero() = Average(0L, 0L) + override fun reduce(buffer: Average, it: Long) = + buffer.apply { sum += it; count += 1 } + + override fun merge(buffer: Average, it: Average) = + buffer.apply { sum += it.sum; count += it.count } + + override fun finish(it: Average) = it.sum.toDouble() / it.count + override fun bufferEncoder() = encoder() + override fun outputEncoder() = encoder() + } + + shouldThrow { + // cannot get name of an unnamed object + udaf(e) + } + // should use instead + udafUnnamed(e) + // or + udaf("someName", e) + + + val f = udaf( + zero = { Average(0L, 0L) }, + reduce = applyFun { + sum += it + count += 1 + }, + merge = applyFun { + sum += it.sum + count += it.count + }, + finish = { it.sum.toDouble() / it.count }, + nondeterministic = false, + ) + f should beOfType>() + + val g = udaf( + name = "g", + zero = { Average(0L, 0L) }, + reduce = applyFun { + sum += it + count += 1 + }, + merge = applyFun { + sum += it.sum + count += it.count + }, + finish = { it.sum.toDouble() / it.count }, + nondeterministic = false, + ) + g.name shouldBe "g" + } + } + } + + context("Registering") { + withSpark(logLevel = SparkLogLevel.DEBUG) { + + val ds = dsOf( + Employee("Michael", 3000), + Employee("Andy", 4500), + Employee("Justin", 3500), + Employee("Berta", 4000), + ) + ds.createOrReplaceTempView("employees") + + should("Support registering udafs from Aggregator") { + val a = udaf("myAverage", aggregator).register() + a.name shouldBe "myAverage" + + @Language("SQL") + val result = spark.sql("SELECT myAverage(salary) as average_salary FROM employees") + .to() + result.collectAsList().single() shouldBe 3750.0 + } + + should("Support registering udafs from Aggregator alternative") { + val a = udf.register("myAverage0", aggregator) + a.name shouldBe "myAverage0" + + @Language("SQL") + val result = spark.sql("SELECT myAverage0(salary) as average_salary FROM employees") + .to() + result.collectAsList().single() shouldBe 3750.0 + } + + should("Support registering udaf in place") { + val a = udf.register( + name = "myAverage1", + zero = { Average(0L, 0L) }, + reduce = applyFun { + sum += it + count += 1 + }, + merge = applyFun { + sum += it.sum + count += it.count + }, + finish = { it.sum.toDouble() / it.count }, + ) + a.name shouldBe "myAverage1" + + @Language("SQL") + val result = spark.sql("SELECT myAverage1(salary) as average_salary FROM employees") + .to() + result.collectAsList().single() shouldBe 3750.0 + } + } + } + + context("Dataset select") { + withSpark(logLevel = SparkLogLevel.DEBUG) { + val ds = dsOf( + Employee("Michael", 3000), + Employee("Andy", 4500), + Employee("Justin", 3500), + Employee("Berta", 4000), + ) + + should("Allow unnamed UDAFs to work with datasets") { + val myAverage = udafUnnamed( + object : Aggregator(), Serializable { + override fun zero() = Average(0L, 0L) + override fun reduce(buffer: Average, it: Long) = + buffer.apply { sum += it; count += 1 } + + override fun merge(buffer: Average, it: Average) = + buffer.apply { sum += it.sum; count += it.count } + + override fun finish(it: Average) = it.sum.toDouble() / it.count + override fun bufferEncoder() = encoder() + override fun outputEncoder() = encoder() + } + ) + + val result = ds.select( + myAverage(col(Employee::salary)) + ).showDS() + + "(${Employee::salary.name})" shouldBe result.columns().single() + result should beOfType>() + result.collectAsList().single() shouldBe 3750.0 + } + + should("Allow named UDAFs to work with datasets") { + val myAverage = udaf(aggregator) + + val result = ds.select( + myAverage(col(Employee::salary)) + ).showDS() + + "${myAverage.name.lowercase()}(${Employee::salary.name})" shouldBe result.columns().single() + result should beOfType>() + result.collectAsList().single() shouldBe 3750.0 + } + + + + } + } + + } + + context("vararg UDF tests") { + fun firstByte(vararg a: Byte) = a.firstOrNull() + fun firstShort(vararg a: Short) = a.firstOrNull() + fun firstInt(vararg a: Int) = a.firstOrNull() + fun firstLong(vararg a: Long) = a.firstOrNull() + fun firstFloat(vararg a: Float) = a.firstOrNull() + fun firstDouble(vararg a: Double) = a.firstOrNull() + fun firstBoolean(vararg a: Boolean) = a.firstOrNull() + fun firstString(vararg a: String) = a.firstOrNull() + + + context("Creating Vararg UDF") { + withSpark(logLevel = SparkLogLevel.DEBUG) { + + should("Create Byte vararg udf") { + udf(::firstByte).let { + it should beOfType>() + it.name shouldBe "firstByte" + } + udf("test", ::firstByte).let { + it should beOfType>() + it.name shouldBe "test" + } + udf(::firstByteVal).let { + it should beOfType>() + it.name shouldBe "firstByteVal" + } + udf("test", ::firstByteVal).let { + it should beOfType>() + it.name shouldBe "test" + } + udf { a: ByteArray -> a.firstOrNull() }.let { + it should beOfType>() + } + udf("test") { a: ByteArray -> a.firstOrNull() }.let { + it should beOfType>() + it.name shouldBe "test" + } + } + + should("Create Short vararg udf") { + udf(::firstShort).let { + it should beOfType>() + it.name shouldBe "firstShort" + } + udf("test", ::firstShort).let { + it should beOfType>() + it.name shouldBe "test" + } + udf(::firstShortVal).let { + it should beOfType>() + it.name shouldBe "firstShortVal" + } + udf("test", ::firstShortVal).let { + it should beOfType>() + it.name shouldBe "test" + } + udf { a: ShortArray -> a.firstOrNull() }.let { + it should beOfType>() + } + udf("test") { a: ShortArray -> a.firstOrNull() }.let { + it should beOfType>() + it.name shouldBe "test" + } + } + + should("Create Int vararg udf") { + udf(::firstInt).let { + it should beOfType>() + it.name shouldBe "firstInt" + } + udf("test", ::firstInt).let { + it should beOfType>() + it.name shouldBe "test" + } + udf(::firstIntVal).let { + it should beOfType>() + it.name shouldBe "firstIntVal" + } + udf("test", ::firstIntVal).let { + it should beOfType>() + it.name shouldBe "test" + } + udf { a: IntArray -> a.firstOrNull() }.let { + it should beOfType>() + } + udf("test") { a: IntArray -> a.firstOrNull() }.let { + it should beOfType>() + it.name shouldBe "test" + } + } + + should("Create Long vararg udf") { + udf(::firstLong).let { + it should beOfType>() + it.name shouldBe "firstLong" + } + udf("test", ::firstLong).let { + it should beOfType>() + it.name shouldBe "test" + } + udf(::firstLongVal).let { + it should beOfType>() + it.name shouldBe "firstLongVal" + } + udf("test", ::firstLongVal).let { + it should beOfType>() + it.name shouldBe "test" + } + udf { a: LongArray -> a.firstOrNull() }.let { + it should beOfType>() + } + udf("test") { a: LongArray -> a.firstOrNull() }.let { + it should beOfType>() + it.name shouldBe "test" + } + } + + should("Create Float vararg udf") { + udf(::firstFloat).let { + it should beOfType>() + it.name shouldBe "firstFloat" + } + udf("test", ::firstFloat).let { + it should beOfType>() + it.name shouldBe "test" + } + udf(::firstFloatVal).let { + it should beOfType>() + it.name shouldBe "firstFloatVal" + } + udf("test", ::firstFloatVal).let { + it should beOfType>() + it.name shouldBe "test" + } + udf { a: FloatArray -> a.firstOrNull() }.let { + it should beOfType>() + } + udf("test") { a: FloatArray -> a.firstOrNull() }.let { + it should beOfType>() + it.name shouldBe "test" + } + } + + should("Create Double vararg udf") { + udf(::firstDouble).let { + it should beOfType>() + it.name shouldBe "firstDouble" + } + udf("test", ::firstDouble).let { + it should beOfType>() + it.name shouldBe "test" + } + udf(::firstDoubleVal).let { + it should beOfType>() + it.name shouldBe "firstDoubleVal" + } + udf("test", ::firstDoubleVal).let { + it should beOfType>() + it.name shouldBe "test" + } + udf { a: DoubleArray -> a.firstOrNull() }.let { + it should beOfType>() + } + udf("test") { a: DoubleArray -> a.firstOrNull() }.let { + it should beOfType>() + it.name shouldBe "test" + } + } + + should("Create Boolean vararg udf") { + udf(::firstBoolean).let { + it should beOfType>() + it.name shouldBe "firstBoolean" + } + udf("test", ::firstBoolean).let { + it should beOfType>() + it.name shouldBe "test" + } + udf(::firstBooleanVal).let { + it should beOfType>() + it.name shouldBe "firstBooleanVal" + } + udf("test", ::firstBooleanVal).let { + it should beOfType>() + it.name shouldBe "test" + } + udf { a: BooleanArray -> a.firstOrNull() }.let { + it should beOfType>() + } + udf("test") { a: BooleanArray -> a.firstOrNull() }.let { + it should beOfType>() + it.name shouldBe "test" + } + } + + should("Create Any vararg udf") { + udf(::firstString).let { + it should beOfType>() + it.name shouldBe "firstString" + } + udf("test", ::firstString).let { + it should beOfType>() + it.name shouldBe "test" + } + udf(::firstStringVal).let { + it should beOfType>() + it.name shouldBe "firstStringVal" + } + udf("test", ::firstStringVal).let { + it should beOfType>() + it.name shouldBe "test" + } + udf { a: Array -> a.firstOrNull() }.let { + it should beOfType>() + } + udf("test") { a: Array -> a.firstOrNull() }.let { + it should beOfType>() + it.name shouldBe "test" + } + } + } + } + + context("Call vararg udf from sql") { + withSpark(logLevel = SparkLogLevel.DEBUG) { + should("with Bytes") { + val value = 1.toByte() + udf.register(::firstByte) + + spark.sql("select firstByte()") + .collectAsList() + .single() + .getAs(0) shouldBe null + + (1..22).forEach { nr -> + val values = (1..nr).map { value } + spark.sql("select firstByte(" + values.joinToString() + ")") + .collectAsList() + .single() + .getAs(0) shouldBe value + } + + val values = (1..23).map { value } + shouldThrow { + spark.sql("select firstByte(" + values.joinToString() + ")") + } + } + + should("with Shorts") { + val value = 1.toShort() + udf.register(::firstShort) + + spark.sql("select firstShort()") + .collectAsList() + .single() + .getAs(0) shouldBe null + + (1..22).forEach { nr -> + val values = (1..nr).map { value } + spark.sql("select firstShort(" + values.joinToString() + ")") + .collectAsList() + .single() + .getAs(0) shouldBe value + } + + val values = (1..23).map { value } + shouldThrow { + spark.sql("select firstShort(" + values.joinToString() + ")") + } + } + + should("with Ints") { + val value = 1 + udf.register(::firstInt) + + spark.sql("select firstInt()") + .collectAsList() + .single() + .getAs(0) shouldBe null + + (1..22).forEach { nr -> + val values = (1..nr).map { value } + spark.sql("select firstInt(" + values.joinToString() + ")") + .collectAsList() + .single() + .getAs(0) shouldBe value + } + + val values = (1..23).map { value } + shouldThrow { + spark.sql("select firstInt(" + values.joinToString() + ")") + } + } + + should("with Longs") { + val value = 1L + udf.register(::firstLong) + + spark.sql("select firstLong()") + .collectAsList() + .single() + .getAs(0) shouldBe null + + (1..22).forEach { nr -> + val values = (1..nr).map { value } + spark.sql("select firstLong(" + values.joinToString() + ")") + .collectAsList() + .single() + .getAs(0) shouldBe value + } + + val values = (1..23).map { value } + shouldThrow { + spark.sql("select firstLong(" + values.joinToString() + ")") + } + } + + should("with Floats") { + val value = 1f + udf.register(::firstFloat) + + spark.sql("select firstFloat()") + .collectAsList() + .single() + .getAs(0) shouldBe null + + (1..22).forEach { nr -> + val values = (1..nr).map { value } + spark.sql("select firstFloat(" + values.joinToString() + ")") + .collectAsList() + .single() + .getAs(0) shouldBe value + } + + val values = (1..23).map { value } + shouldThrow { + spark.sql("select firstFloat(" + values.joinToString() + ")") + } + } + + should("with Doubles") { + val value = 1.0 + udf.register(::firstDouble) + + spark.sql("select firstDouble()") + .collectAsList() + .single() + .getAs(0) shouldBe null + + (1..22).forEach { nr -> + val values = (1..nr).map { value } + spark.sql("select firstDouble(" + values.joinToString() + ")") + .collectAsList() + .single() + .getAs(0) shouldBe value + } + + val values = (1..23).map { value } + shouldThrow { + spark.sql("select firstDouble(" + values.joinToString() + ")") + } + } + + should("with Booleans") { + val value = true + udf.register(::firstBoolean) + + spark.sql("select firstBoolean()") + .collectAsList() + .single() + .getAs(0) shouldBe null + + (1..22).forEach { nr -> + val values = (1..nr).map { value } + spark.sql("select firstBoolean(" + values.joinToString() + ")") + .collectAsList() + .single() + .getAs(0) shouldBe value + } + + val values = (1..23).map { value } + shouldThrow { + spark.sql("select firstBoolean(" + values.joinToString() + ")") + } + } + + should("with Anys") { + val value = "test" + udf.register(::firstString) + + spark.sql("select firstString()") + .collectAsList() + .single() + .getAs(0) shouldBe null + + (1..22).forEach { nr -> + val values = (1..nr).map { value } + spark.sql("select firstString(" + values.joinToString { "\"$it\"" } + ")") + .collectAsList() + .single() + .getAs(0) shouldBe value + } + + val values = (1..23).map { value } + shouldThrow { + spark.sql("select firstString(" + values.joinToString { "\"$it\"" } + ")") + } + } + + + } + } + + context("Call vararg udf from dataset select") { + withSpark(logLevel = SparkLogLevel.DEBUG) { + should("with Bytes") { + val value = 1.toByte() + val ds = dsOf(value) + + val firstByte = udf.register(::firstByte) + + ds.select(firstByte()) + .collectAsList() + .single() shouldBe null + + (1..22).forEach { nr -> + val values = (1..nr).map { ds.singleCol() }.toTypedArray() + + ds.select(firstByte(*values)) + .collectAsList() + .single() shouldBe value + } + + val values = (1..23).map { ds.singleCol() }.toTypedArray() + shouldThrow { + ds.select(firstByte(*values)) + } + } + + should("with Shorts") { + val value = 1.toShort() + val ds = dsOf(value) + + val firstShort = udf.register(::firstShort) + + ds.select(firstShort()) + .collectAsList() + .single() shouldBe null + + (1..22).forEach { nr -> + val values = (1..nr).map { ds.singleCol() }.toTypedArray() + + ds.select(firstShort(*values)) + .collectAsList() + .single() shouldBe value + } + + val values = (1..23).map { ds.singleCol() }.toTypedArray() + shouldThrow { + ds.select(firstShort(*values)) + } + } + + should("with Ints") { + val value = 1 + val ds = dsOf(value) + + val firstInt = udf.register(::firstInt) + + ds.select(firstInt()) + .collectAsList() + .single() shouldBe null + + (1..22).forEach { nr -> + val values = (1..nr).map { ds.singleCol() }.toTypedArray() + + ds.select(firstInt(*values)) + .collectAsList() + .single() shouldBe value + } + + val values = (1..23).map { ds.singleCol() }.toTypedArray() + shouldThrow { + ds.select(firstInt(*values)) + } + } + + should("with Longs") { + val value = 1L + val ds = dsOf(value) + + val firstLong = udf.register(::firstLong) + + ds.select(firstLong()) + .collectAsList() + .single() shouldBe null + + (1..22).forEach { nr -> + val values = (1..nr).map { ds.singleCol() }.toTypedArray() + + ds.select(firstLong(*values)) + .collectAsList() + .single() shouldBe value + } + + val values = (1..23).map { ds.singleCol() }.toTypedArray() + shouldThrow { + ds.select(firstLong(*values)) + } + } + + should("with Floats") { + val value = 1f + val ds = dsOf(value) + + val firstFloat = udf.register(::firstFloat) + + ds.select(firstFloat()) + .collectAsList() + .single() shouldBe null + + (1..22).forEach { nr -> + val values = (1..nr).map { ds.singleCol() }.toTypedArray() + + ds.select(firstFloat(*values)) + .collectAsList() + .single() shouldBe value + } + + val values = (1..23).map { ds.singleCol() }.toTypedArray() + shouldThrow { + ds.select(firstFloat(*values)) + } + } + + should("with Doubles") { + val value = 1.0 + val ds = dsOf(value) + + val firstDouble = udf.register(::firstDouble) + + ds.select(firstDouble()) + .collectAsList() + .single() shouldBe null + + (1..22).forEach { nr -> + val values = (1..nr).map { ds.singleCol() }.toTypedArray() + + ds.select(firstDouble(*values)) + .collectAsList() + .single() shouldBe value + } + + val values = (1..23).map { ds.singleCol() }.toTypedArray() + shouldThrow { + ds.select(firstDouble(*values)) + } + } + + should("with Booleans") { + val value = true + val ds = dsOf(value) + + val firstBoolean = udf.register(::firstBoolean) + + ds.select(firstBoolean()) + .collectAsList() + .single() shouldBe null + + (1..22).forEach { nr -> + val values = (1..nr).map { ds.singleCol() }.toTypedArray() + + ds.select(firstBoolean(*values)) + .collectAsList() + .single() shouldBe value + } + + val values = (1..23).map { ds.singleCol() }.toTypedArray() + shouldThrow { + ds.select(firstBoolean(*values)) + } + } + + should("with Anys") { + val value = "test" + val ds = dsOf(value) + + val firstString = udf.register(::firstString) + + ds.select(firstString()) + .collectAsList() + .single() shouldBe null + + (1..22).forEach { nr -> + val values = (1..nr).map { ds.singleCol() }.toTypedArray() + + ds.select(firstString(*values)) + .collectAsList() + .single() shouldBe value + } + + val values = (1..23).map { ds.singleCol() }.toTypedArray() + shouldThrow { + ds.select(firstString(*values)) + } + } + } + } + + } +}) + +data class Employee(val name: String, val salary: Long) +data class Average(var sum: Long, var count: Long) + +private object MyAverage : Aggregator() { + // A zero value for this aggregation. Should satisfy the property that any b + zero = b + + override fun zero(): Average = Average(0L, 0L) + + // Combine two values to produce a new value. For performance, the function may modify `buffer` + // and return it instead of constructing a new object + override fun reduce(buffer: Average, employee: Employee): Average { + buffer.sum += employee.salary + buffer.count += 1L + return buffer + } + + // Merge two intermediate values + override fun merge(b1: Average, b2: Average): Average { + b1.sum += b2.sum + b1.count += b2.count + return b1 + } + + // Transform the output of the reduction + override fun finish(reduction: Average): Double = reduction.sum.toDouble() / reduction.count + + // Specifies the Encoder for the intermediate value type + override fun bufferEncoder(): Encoder = encoder() + + // Specifies the Encoder for the final output value type + override fun outputEncoder(): Encoder = encoder() + +} + +// small but fun helpers I couldn't help but to leave somewhere in the code +// allows 2-argument lambdas to be converted in a this+it lambda +fun extensionFun(block: S.(T) -> U): S.(T) -> U = block +fun applyFun(block: S.(T) -> Unit): S.(T) -> S = extensionFun { block(it); this } + +private val aggregator = aggregatorOf( + zero = { Average(0L, 0L) }, + reduce = applyFun { + sum += it + count += 1 + }, + merge = applyFun { + sum += it.sum + count += it.count + }, + finish = { it.sum.toDouble() / it.count }, +) + + +private val addTwoConst = { x: Int, y: Int -> x + y } + +data class NormalClass( + val age: Int, + val name: String +) + +private val firstByteVal = { a: ByteArray -> a.firstOrNull() } +private val firstShortVal = { a: ShortArray -> a.firstOrNull() } +private val firstIntVal = { a: IntArray -> a.firstOrNull() } +private val firstLongVal = { a: LongArray -> a.firstOrNull() } +private val firstFloatVal = { a: FloatArray -> a.firstOrNull() } +private val firstDoubleVal = { a: DoubleArray -> a.firstOrNull() } +private val firstBooleanVal = { a: BooleanArray -> a.firstOrNull() } +private val firstStringVal = { a: Array -> a.firstOrNull() } \ No newline at end of file diff --git a/pom.xml b/pom.xml index 1b51d8d4..6e08e4d2 100644 --- a/pom.xml +++ b/pom.xml @@ -15,9 +15,9 @@ 3.1.0 3.3.1 1.3.1 - 5.2.3 + 5.3.1 0.11.0-95 - 1.6.21 + 1.7.0 0.7.5 3.2.1