diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 63d49c25..98b033b4 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -26,11 +26,11 @@ jobs: restore-keys: ${{ runner.os }}-m2 - name: Build with Maven run: ./mvnw -B package --file pom.xml -Pscala-2.12 -Dkotest.tags="!Kafka" - qodana: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v3 - - name: 'Qodana Scan' - uses: JetBrains/qodana-action@v5.0.2 +# qodana: +# runs-on: ubuntu-latest +# steps: +# - uses: actions/checkout@v3 +# - name: 'Qodana Scan' +# uses: JetBrains/qodana-action@v5.0.2 # vim: ts=2:sts=2:sw=2:expandtab diff --git a/.github/workflows/publish_dev_version.yml b/.github/workflows/publish_dev_version.yml new file mode 100644 index 00000000..80266c62 --- /dev/null +++ b/.github/workflows/publish_dev_version.yml @@ -0,0 +1,31 @@ +name: Generate and publish docs + +on: + push: + branches: + - "spark-3.2" + +jobs: + build-and-deploy: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v2 + - name: Set up JDK 11 + uses: actions/setup-java@v1 + with: + distributions: adopt + java-version: 11 + check-latest: true + - name: Cache Maven packages + uses: actions/cache@v2 + with: + path: ~/.m2 + key: ${{ runner.os }}-m2-${{ hashFiles('**/pom.xml') }} + restore-keys: ${{ runner.os }}-m2 + - name: Deploy to GH Packages + run: ./mvnw --batch-mode deploy -Dkotest.tags="!Kafka" + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + + diff --git a/README.md b/README.md index bd227403..9aa51579 100644 --- a/README.md +++ b/README.md @@ -73,6 +73,32 @@ Once you have configured the dependency, you only need to add the following impo import org.jetbrains.kotlinx.spark.api.* ``` +### Jupyter + +The Kotlin Spark API also supports Kotlin Jupyter notebooks. +To it, simply add + +```jupyterpython +%use kotlin-spark-api +``` +to the top of your notebook. This will get the latest version of the API, together with the latest version of Spark. +To define a certain version of Spark or the API itself, simply add it like this: +```jupyterpython +%use kotlin-spark-api(spark=3.2, version=1.0.4) +``` + +Inside the notebook a Spark session will be initiated automatically. This can be accessed via the `spark` value. +`sc: JavaSparkContext` can also be accessed directly. The API operates pretty similarly. + +There is also support for HTML rendering of Datasets and simple (Java)RDDs. + +To use Spark Streaming abilities, instead use +```jupyterpython +%use kotlin-spark-api-streaming +``` +This does not start a Spark session right away, meaning you can call `withSparkStreaming(batchDuration) {}` +in whichever cell you want. + ## Kotlin for Apache Spark features ### Creating a SparkSession in Kotlin @@ -81,12 +107,13 @@ val spark = SparkSession .builder() .master("local[2]") .appName("Simple Application").orCreate - ``` +This is not needed when running the Kotlin Spark API from a Jupyter notebook. + ### Creating a Dataset in Kotlin ```kotlin -spark.toDS("a" to 1, "b" to 2) +spark.dsOf("a" to 1, "b" to 2) ``` The example above produces `Dataset>`. While Kotlin Pairs and Triples are supported, Scala Tuples are reccomended for better support. @@ -102,6 +129,8 @@ We provide you with useful function `withSpark`, which accepts everything that m After work block ends, `spark.stop()` is called automatically. +Do not use this when running the Kotlin Spark API from a Jupyter notebook. + ```kotlin withSpark { dsOf(1, 2) diff --git a/core/3.2/pom_2.12.xml b/core/3.2/pom_2.12.xml index 8ed1eb4f..5fed5559 100644 --- a/core/3.2/pom_2.12.xml +++ b/core/3.2/pom_2.12.xml @@ -1,75 +1,81 @@ - - 4.0.0 + + 4.0.0 - Kotlin Spark API: Scala core for Spark 3.2+ (Scala 2.12) - Scala-Spark 3.2+ compatibility layer for Kotlin for Apache Spark - core-3.2_2.12 - - org.jetbrains.kotlinx.spark - kotlin-spark-api-parent_2.12 - 1.0.4-SNAPSHOT - ../../pom_2.12.xml - + Kotlin Spark API: Scala core for Spark 3.2+ (Scala 2.12) + Scala-Spark 3.2+ compatibility layer for Kotlin for Apache Spark + core-3.2_2.12 + + org.jetbrains.kotlinx.spark + kotlin-spark-api-parent_2.12 + 1.0.4-SNAPSHOT + ../../pom_2.12.xml + + + + + org.scala-lang + scala-library + ${scala.version} + + + org.jetbrains.kotlin + kotlin-reflect + + + + + + org.apache.spark + spark-sql_${scala.compat.version} + ${spark3.version} + provided + + + + + + src/main/scala + src/test/scala + target/${scala.compat.version} + + + net.alchim31.maven + scala-maven-plugin + ${scala-maven-plugin.version} + + + compile + + compile + testCompile + + + + -dependencyfile + ${project.build.directory}/.scala_dependencies + + + + + docjar + + doc-jar + + pre-integration-test + + + + + org.apache.maven.plugins + maven-site-plugin + + true + + + + - - - org.scala-lang - scala-library - ${scala.version} - - - org.jetbrains.kotlin - kotlin-reflect - - - - org.apache.spark - spark-sql_${scala.compat.version} - ${spark3.version} - provided - - - - src/main/scala - src/test/scala - target/${scala.compat.version} - - - net.alchim31.maven - scala-maven-plugin - ${scala-maven-plugin.version} - - - compile - - compile - testCompile - - - - -dependencyfile - ${project.build.directory}/.scala_dependencies - - - - - docjar - - doc-jar - - pre-integration-test - - - - - org.apache.maven.plugins - maven-site-plugin - - true - - - - diff --git a/examples/pom-3.2_2.12.xml b/examples/pom-3.2_2.12.xml index c3b95b0e..d069c058 100644 --- a/examples/pom-3.2_2.12.xml +++ b/examples/pom-3.2_2.12.xml @@ -1,108 +1,110 @@ - + - 4.0.0 + 4.0.0 - Kotlin Spark API: Examples for Spark 3.2+ (Scala 2.12) - Example of usage - examples-3.2_2.12 - - org.jetbrains.kotlinx.spark - kotlin-spark-api-parent_2.12 - 1.0.4-SNAPSHOT - ../pom_2.12.xml - + Kotlin Spark API: Examples for Spark 3.2+ (Scala 2.12) + Example of usage + examples-3.2_2.12 + + org.jetbrains.kotlinx.spark + kotlin-spark-api-parent_2.12 + 1.0.4-SNAPSHOT + ../pom_2.12.xml + - - - org.jetbrains.kotlinx.spark - kotlin-spark-api-3.2 - ${project.version} - - - org.apache.spark - spark-sql_${scala.compat.version} - ${spark3.version} - - - org.apache.spark - spark-streaming_${scala.compat.version} - ${spark3.version} - - - org.apache.spark - spark-streaming-kafka-0-10_${scala.compat.version} - ${spark3.version} - - + + + org.jetbrains.kotlinx.spark + kotlin-spark-api-3.2 + ${project.version} + + + org.apache.spark + spark-sql_${scala.compat.version} + ${spark3.version} + + + org.apache.spark + spark-streaming_${scala.compat.version} + ${spark3.version} + + + org.apache.spark + spark-streaming-kafka-0-10_${scala.compat.version} + ${spark3.version} + + - - src/main/kotlin - src/test/kotlin - target/3.2/${scala.compat.version} - - - org.jetbrains.kotlin - kotlin-maven-plugin - - - compile - - compile - - - - test-compile - - test-compile - - - - - - org.apache.maven.plugins - maven-assembly-plugin - ${maven-assembly-plugin.version} - - - jar-with-dependencies - - - - org.jetbrains.spark.api.examples.WordCountKt - - - - - - org.apache.maven.plugins - maven-site-plugin - - true - - - - org.apache.maven.plugins - maven-deploy-plugin - - true - - - - org.sonatype.plugins - nexus-staging-maven-plugin - - true - - - - org.apache.maven.plugins - maven-compiler-plugin - - 8 - 8 - - - - + + src/main/kotlin + src/test/kotlin + target/3.2/${scala.compat.version} + + + org.jetbrains.kotlin + kotlin-maven-plugin + + + compile + + compile + + + + test-compile + + test-compile + + + + + + org.apache.maven.plugins + maven-assembly-plugin + ${maven-assembly-plugin.version} + + + jar-with-dependencies + + + + org.jetbrains.spark.api.examples.WordCountKt + + + + + + org.apache.maven.plugins + maven-site-plugin + + true + + + + org.apache.maven.plugins + maven-deploy-plugin + + true + + + + org.sonatype.plugins + nexus-staging-maven-plugin + + true + + + + org.apache.maven.plugins + maven-compiler-plugin + ${maven-compiler-plugin.version} + + 9 + 9 + + + + diff --git a/jupyter/pom.xml b/jupyter/pom.xml new file mode 100644 index 00000000..bfc67e6d --- /dev/null +++ b/jupyter/pom.xml @@ -0,0 +1,166 @@ + + + + 4.0.0 + + Kotlin Spark API: Jupyter integration for Spark 3.2+ (Scala 2.12) + kotlin-spark-api-jupyter-3.2 + Jupyter integration + + org.jetbrains.kotlinx.spark + kotlin-spark-api-parent_2.12 + 1.0.4-SNAPSHOT + ../pom_2.12.xml + + jar + + + + 11 + 11 + + + + + kotlinx-html + kotlinx-html + https://maven.pkg.jetbrains.space/public/p/kotlinx-html/maven + + + kotlin + kotlin + https://maven.pkg.jetbrains.space/kotlin/p/kotlin/dev + + + + + + org.jetbrains.kotlinx.spark + kotlin-spark-api-3.2 + ${project.version} + + + org.jetbrains.kotlinx + kotlinx-html-jvm + ${kotlinx.html.version} + + + org.apache.spark + spark-sql_${scala.compat.version} + ${spark3.version} + + + org.apache.spark + spark-repl_${scala.compat.version} + ${spark3.version} + + + org.apache.spark + spark-streaming_${scala.compat.version} + ${spark3.version} + + + org.apache.hadoop + hadoop-client + ${hadoop.version} + + + org.jetbrains.kotlinx + kotlin-jupyter-api + ${kotlin-jupyter-api.version} + + + + + io.kotest + kotest-runner-junit5-jvm + ${kotest.version} + test + + + io.kotest.extensions + kotest-extensions-allure + ${kotest-extensions-allure.version} + test + + + org.jetbrains.kotlinx + kotlin-jupyter-test-kit + ${kotlin-jupyter-api.version} + test + + + + + src/main/kotlin + src/test/kotlin + target/${scala.compat.version} + + + org.jetbrains.kotlin + kotlin-maven-plugin + + + compile + + compile + + + + test-compile + + test-compile + + + + + + org.apache.maven.plugins + maven-assembly-plugin + ${maven-assembly-plugin.version} + + + jar-with-dependencies + + + + org.jetbrains.spark.api.examples.WordCountKt + + + + + + org.apache.maven.plugins + maven-site-plugin + + true + + + + org.apache.maven.plugins + maven-deploy-plugin + + false + + + + org.sonatype.plugins + nexus-staging-maven-plugin + + false + + + + org.apache.maven.plugins + maven-compiler-plugin + ${maven-compiler-plugin.version} + + 9 + 9 + + + + + \ No newline at end of file diff --git a/jupyter/src/main/kotlin/org/jetbrains/kotlinx/spark/api/jupyter/HtmlRendering.kt b/jupyter/src/main/kotlin/org/jetbrains/kotlinx/spark/api/jupyter/HtmlRendering.kt new file mode 100644 index 00000000..ad083962 --- /dev/null +++ b/jupyter/src/main/kotlin/org/jetbrains/kotlinx/spark/api/jupyter/HtmlRendering.kt @@ -0,0 +1,136 @@ +/*- + * =LICENSE= + * Kotlin Spark API: API for Spark 3.2+ (Scala 2.12) + * ---------- + * Copyright (C) 2019 - 2022 JetBrains + * ---------- + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * =LICENSEEND= + */ +package org.jetbrains.kotlinx.spark.api.jupyter + +import kotlinx.html.* +import kotlinx.html.stream.appendHTML +import org.apache.spark.SparkException +import org.apache.spark.api.java.JavaRDDLike +import org.apache.spark.sql.Dataset +import org.apache.spark.unsafe.array.ByteArrayMethods +import org.jetbrains.kotlinx.spark.api.asKotlinIterable +import org.jetbrains.kotlinx.spark.api.asKotlinIterator +import org.jetbrains.kotlinx.spark.api.asKotlinList +import scala.Product +import java.io.InputStreamReader +import java.io.Serializable + +private fun createHtmlTable(fillTable: TABLE.() -> Unit): String = buildString { + appendHTML().head { + style("text/css") { + unsafe { + val resource = "/table.css" + val res = SparkIntegration::class.java + .getResourceAsStream(resource) ?: error("Resource '$resource' not found") + val readRes = InputStreamReader(res).readText() + raw("\n" + readRes) + } + } + } + + appendHTML().table("dataset", fillTable) +} + + +internal fun JavaRDDLike.toHtml(limit: Int = 20, truncate: Int = 30): String = try { + createHtmlTable { + val numRows = limit.coerceIn(0 until ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH) + val tmpRows = take(numRows).toList() + + val hasMoreData = tmpRows.size - 1 > numRows + val rows = tmpRows.take(numRows) + + tr { th { +"Values" } } + + for (row in rows) tr { + td { + val string = when (row) { + is ByteArray -> row.joinToString(prefix = "[", postfix = "]") { "%02X".format(it) } + + is CharArray -> row.iterator().asSequence().toList().toString() + is ShortArray -> row.iterator().asSequence().toList().toString() + is IntArray -> row.iterator().asSequence().toList().toString() + is LongArray -> row.iterator().asSequence().toList().toString() + is FloatArray -> row.iterator().asSequence().toList().toString() + is DoubleArray -> row.iterator().asSequence().toList().toString() + is BooleanArray -> row.iterator().asSequence().toList().toString() + is Array<*> -> row.iterator().asSequence().toList().toString() + is Iterable<*> -> row.iterator().asSequence().toList().toString() + is scala.collection.Iterable<*> -> row.asKotlinIterable().iterator().asSequence().toList().toString() + is Iterator<*> -> row.asSequence().toList().toString() + is scala.collection.Iterator<*> -> row.asKotlinIterator().asSequence().toList().toString() + is Product -> row.productIterator().asKotlinIterator().asSequence().toList().toString() + is Serializable -> row.toString() + // maybe others? + + is Any? -> row.toString() + else -> row.toString() + } + + +if (truncate > 0 && string.length > truncate) { + // do not show ellipses for strings shorter than 4 characters. + if (truncate < 4) string.substring(0, truncate) + else string.substring(0, truncate - 3) + "..." + } else { + string + } + } + } + + if (hasMoreData) tr { + +"only showing top $numRows ${if (numRows == 1) "row" else "rows"}" + } + } +} catch (e: SparkException) { + // Whenever toString() on the contents doesn't work, since the class might be unknown... + """${toString()} + |Cannot render this RDD of this class.""".trimMargin() +} + +internal fun Dataset.toHtml(limit: Int = 20, truncate: Int = 30): String = createHtmlTable { + val numRows = limit.coerceIn(0 until ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH) + val tmpRows = getRows(numRows, truncate).asKotlinList().map { it.asKotlinList() } + + val hasMoreData = tmpRows.size - 1 > numRows + val rows = tmpRows.take(numRows + 1) + + tr { + for (header in rows.first()) th { + +if (truncate > 0 && header.length > truncate) { + // do not show ellipses for strings shorter than 4 characters. + if (truncate < 4) header.substring(0, truncate) + else header.substring(0, truncate - 3) + "..." + } else { + header + } + + } + } + + for (row in rows.drop(1)) tr { + for (item in row) td { + +item + } + } + + if (hasMoreData) tr { + +"only showing top $numRows ${if (numRows == 1) "row" else "rows"}" + } +} diff --git a/jupyter/src/main/kotlin/org/jetbrains/kotlinx/spark/api/jupyter/Integration.kt b/jupyter/src/main/kotlin/org/jetbrains/kotlinx/spark/api/jupyter/Integration.kt new file mode 100644 index 00000000..0b2a8306 --- /dev/null +++ b/jupyter/src/main/kotlin/org/jetbrains/kotlinx/spark/api/jupyter/Integration.kt @@ -0,0 +1,93 @@ +/*- + * =LICENSE= + * Kotlin Spark API: API for Spark 3.2+ (Scala 2.12) + * ---------- + * Copyright (C) 2019 - 2022 JetBrains + * ---------- + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * =LICENSEEND= + */ +package org.jetbrains.kotlinx.spark.api.jupyter + +import org.apache.spark.api.java.JavaRDDLike +import org.apache.spark.rdd.RDD +import org.apache.spark.sql.Dataset +import org.jetbrains.kotlinx.jupyter.api.HTML +import org.jetbrains.kotlinx.jupyter.api.KotlinKernelHost +import org.jetbrains.kotlinx.jupyter.api.libraries.JupyterIntegration + +abstract class Integration : JupyterIntegration() { + + private val kotlinVersion = "1.6.21" + private val scalaCompatVersion = "2.12" + private val scalaVersion = "2.12.15" + private val spark3Version = "3.2.1" + + abstract fun KotlinKernelHost.onLoaded() + + override fun Builder.onLoaded() { + + dependencies( + "org.apache.spark:spark-repl_$scalaCompatVersion:$spark3Version", + "org.jetbrains.kotlin:kotlin-stdlib-jdk8:$kotlinVersion", + "org.jetbrains.kotlin:kotlin-reflect:$kotlinVersion", + "org.apache.spark:spark-sql_$scalaCompatVersion:$spark3Version", + "org.apache.spark:spark-streaming_$scalaCompatVersion:$spark3Version", + "org.apache.spark:spark-mllib_$scalaCompatVersion:$spark3Version", + "org.apache.spark:spark-sql_$scalaCompatVersion:$spark3Version", + "org.apache.spark:spark-graphx_$scalaCompatVersion:$spark3Version", + "org.apache.spark:spark-launcher_$scalaCompatVersion:$spark3Version", + "org.apache.spark:spark-catalyst_$scalaCompatVersion:$spark3Version", + "org.apache.spark:spark-streaming_$scalaCompatVersion:$spark3Version", + "org.apache.spark:spark-core_$scalaCompatVersion:$spark3Version", + "org.scala-lang:scala-library:$scalaVersion", + "org.scala-lang.modules:scala-xml_$scalaCompatVersion:2.0.1", + "org.scala-lang:scala-reflect:$scalaVersion", + "org.scala-lang:scala-compiler:$scalaVersion", + "commons-io:commons-io:2.11.0", + ) + + import( + "org.jetbrains.kotlinx.spark.api.*", + "org.jetbrains.kotlinx.spark.api.tuples.*", + *(1..22).map { "scala.Tuple$it" }.toTypedArray(), + "org.apache.spark.sql.functions.*", + "org.apache.spark.*", + "org.apache.spark.sql.*", + "org.apache.spark.api.java.*", + "scala.collection.Seq", + "org.apache.spark.rdd.*", + "java.io.Serializable", + "org.apache.spark.streaming.api.java.*", + "org.apache.spark.streaming.api.*", + "org.apache.spark.streaming.*", + ) + + onLoaded { + onLoaded() + } + + // Render Dataset + render> { + HTML(it.toHtml()) + } + + render> { + HTML(it.toJavaRDD().toHtml()) + } + + render> { + HTML(it.toHtml()) + } + } +} diff --git a/jupyter/src/main/kotlin/org/jetbrains/kotlinx/spark/api/jupyter/SparkIntegration.kt b/jupyter/src/main/kotlin/org/jetbrains/kotlinx/spark/api/jupyter/SparkIntegration.kt new file mode 100644 index 00000000..fe758700 --- /dev/null +++ b/jupyter/src/main/kotlin/org/jetbrains/kotlinx/spark/api/jupyter/SparkIntegration.kt @@ -0,0 +1,71 @@ +/*- + * =LICENSE= + * Kotlin Spark API: API for Spark 3.2+ (Scala 2.12) + * ---------- + * Copyright (C) 2019 - 2022 JetBrains + * ---------- + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * =LICENSEEND= + */ +package org.jetbrains.kotlinx.spark.api.jupyter + + +import org.intellij.lang.annotations.Language +import org.jetbrains.kotlinx.jupyter.api.KotlinKernelHost + +/** + * %use kotlin-spark-api + */ +@Suppress("UNUSED_VARIABLE", "LocalVariableName") +@OptIn(ExperimentalStdlibApi::class) +internal class SparkIntegration : Integration() { + + override fun KotlinKernelHost.onLoaded() { + val _0 = execute("""%dumpClassesForSpark""") + + @Language("kts") + val _1 = listOf( + """ + val spark = org.jetbrains.kotlinx.spark.api.SparkSession + .builder() + .master(SparkConf().get("spark.master", "local[*]")) + .appName("Jupyter") + .config("spark.sql.codegen.wholeStage", false) + .getOrCreate()""".trimIndent(), + """ + spark.sparkContext.setLogLevel(org.jetbrains.kotlinx.spark.api.SparkLogLevel.ERROR)""".trimIndent(), + """ + val sc by lazy { + org.apache.spark.api.java.JavaSparkContext(spark.sparkContext) + }""".trimIndent(), + """ + println("Spark session has been started and is running. No `withSpark { }` necessary, you can access `spark` and `sc` directly. To use Spark streaming, use `%use kotlin-spark-api-streaming` instead.")""".trimIndent(), + """ + inline fun List.toDS(): Dataset = toDS(spark)""".trimIndent(), + """ + inline fun Array.toDS(): Dataset = spark.dsOf(*this)""".trimIndent(), + """ + inline fun dsOf(vararg arg: T): Dataset = spark.dsOf(*arg)""".trimIndent(), + """ + inline fun RDD.toDS(): Dataset = toDS(spark)""".trimIndent(), + """ + inline fun JavaRDDLike.toDS(): Dataset = toDS(spark)""".trimIndent(), + """ + inline fun RDD.toDF(): Dataset = toDF(spark)""".trimIndent(), + """ + inline fun JavaRDDLike.toDF(): Dataset = toDF(spark)""".trimIndent(), + """ + val udf: UDFRegistration get() = spark.udf()""".trimIndent(), + ).map(::execute) + } +} diff --git a/jupyter/src/main/kotlin/org/jetbrains/kotlinx/spark/api/jupyter/SparkStreamingIntegration.kt b/jupyter/src/main/kotlin/org/jetbrains/kotlinx/spark/api/jupyter/SparkStreamingIntegration.kt new file mode 100644 index 00000000..a0834cc7 --- /dev/null +++ b/jupyter/src/main/kotlin/org/jetbrains/kotlinx/spark/api/jupyter/SparkStreamingIntegration.kt @@ -0,0 +1,60 @@ +/*- + * =LICENSE= + * Kotlin Spark API: API for Spark 3.2+ (Scala 2.12) + * ---------- + * Copyright (C) 2019 - 2022 JetBrains + * ---------- + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * =LICENSEEND= + */ +package org.jetbrains.kotlinx.spark.api.jupyter + +import kotlinx.html.* +import kotlinx.html.stream.appendHTML +import org.apache.spark.api.java.JavaRDDLike +import org.apache.spark.rdd.RDD +import org.apache.spark.sql.Dataset +import org.apache.spark.unsafe.array.ByteArrayMethods +import org.intellij.lang.annotations.Language +import org.jetbrains.kotlinx.jupyter.api.HTML +import org.jetbrains.kotlinx.jupyter.api.libraries.JupyterIntegration +import org.jetbrains.kotlinx.spark.api.* +import java.io.InputStreamReader + + +import org.apache.spark.* +import org.jetbrains.kotlinx.jupyter.api.KotlinKernelHost +import scala.collection.* +import org.jetbrains.kotlinx.spark.api.SparkSession +import scala.Product +import java.io.Serializable +import scala.collection.Iterable as ScalaIterable +import scala.collection.Iterator as ScalaIterator + +/** + * %use kotlin-spark-api-streaming + */ +@Suppress("UNUSED_VARIABLE", "LocalVariableName") +@OptIn(ExperimentalStdlibApi::class) +internal class SparkStreamingIntegration : Integration() { + + override fun KotlinKernelHost.onLoaded() { + val _0 = execute("""%dumpClassesForSpark""") + + @Language("kts") + val _1 = listOf( + """ + println("To start a spark streaming session, simply use `withSparkStreaming { }` inside a cell. To use Spark normally, use `withSpark { }` in a cell, or use `%use kotlin-spark-api` to start a Spark session for the whole notebook.")""".trimIndent(), + ).map(::execute) + } +} diff --git a/jupyter/src/main/resources/META-INF/kotlin-jupyter-libraries/libraries.json b/jupyter/src/main/resources/META-INF/kotlin-jupyter-libraries/libraries.json new file mode 100644 index 00000000..82c7354e --- /dev/null +++ b/jupyter/src/main/resources/META-INF/kotlin-jupyter-libraries/libraries.json @@ -0,0 +1,11 @@ +{ + "definitions": [], + "producers": [ + { + "fqn": "org.jetbrains.kotlinx.spark.api.jupyter.SparkIntegration" + }, + { + "fqn": "org.jetbrains.kotlinx.spark.api.jupyter.SparkStreamingIntegration" + } + ] +} diff --git a/jupyter/src/main/resources/table.css b/jupyter/src/main/resources/table.css new file mode 100644 index 00000000..f656add9 --- /dev/null +++ b/jupyter/src/main/resources/table.css @@ -0,0 +1,146 @@ +:root { + --background: #fff; + --background-odd: #f5f5f5; + --background-hover: #d9edfd; + --header-text-color: #474747; + --text-color: #848484; + --text-color-dark: #000; + --text-color-medium: #737373; + --text-color-pale: #b3b3b3; + --inner-border-color: #aaa; + --bold-border-color: #000; + --link-color: #296eaa; + --link-color-pale: #296eaa; + --link-hover: #1a466c; +} + +:root[theme="dark"], :root [data-jp-theme-light="false"]{ + --background: #303030; + --background-odd: #3c3c3c; + --background-hover: #464646; + --header-text-color: #dddddd; + --text-color: #b3b3b3; + --text-color-dark: #dddddd; + --text-color-medium: #b2b2b2; + --text-color-pale: #737373; + --inner-border-color: #707070; + --bold-border-color: #777777; + --link-color: #008dc0; + --link-color-pale: #97e1fb; + --link-hover: #00688e; +} + +table.dataset { + font-family: "Helvetica Neue", Helvetica, Arial, sans-serif; + font-size: 12px; + background-color: var(--background); + color: var(--text-color); + border: none; + border-collapse: collapse; +} + +table.dataset th, td { + padding: 6px; + border: 1px solid transparent; + text-align: left; +} + +table.dataset th { + background-color: var(--background); + color: var(--header-text-color); +} + +table.dataset td { + vertical-align: top; +} + +table.dataset th.bottomBorder { + border-bottom-color: var(--bold-border-color); +} + +table.dataset tbody > tr:nth-child(odd) { + background: var(--background-odd); +} + +table.dataset tbody > tr:nth-child(even) { + background: var(--background); +} + +table.dataset tbody > tr:hover { + background: var(--background-hover); +} + +table.dataset a { + cursor: pointer; + color: var(--link-color); + text-decoration: none; +} + +table.dataset tr:hover > td a { + color: var(--link-color-pale); +} + +table.dataset a:hover { + color: var(--link-hover); + text-decoration: underline; +} + +table.dataset img { + max-width: fit-content; +} + +table.dataset th.complex { + background-color: var(--background); + border: 1px solid var(--background); +} + +table.dataset .leftBorder { + border-left-color: var(--inner-border-color); +} + +table.dataset .rightBorder { + border-right-color: var(--inner-border-color); +} + +table.dataset .rightAlign { + text-align: right; +} + +table.dataset .expanderSvg { + width: 8px; + height: 8px; + margin-right: 3px; +} + +table.dataset .expander { + display: flex; + align-items: center; +} + +/* formatting */ + +table.dataset .null { + color: var(--text-color-pale); +} + +table.dataset .structural { + color: var(--text-color-medium); + font-weight: bold; +} + +table.dataset .datasetCaption { + font-weight: bold; +} + +table.dataset .numbers { + color: var(--text-color-dark); +} + +table.dataset td:hover .formatted .structural, .null { + color: var(--text-color-dark); +} + +table.dataset tr:hover .formatted .structural, .null { + color: var(--text-color-dark); +} + diff --git a/jupyter/src/test/kotlin/org/jetbrains/kotlinx/spark/api/jupyter/JupyterTests.kt b/jupyter/src/test/kotlin/org/jetbrains/kotlinx/spark/api/jupyter/JupyterTests.kt new file mode 100644 index 00000000..2f35bee4 --- /dev/null +++ b/jupyter/src/test/kotlin/org/jetbrains/kotlinx/spark/api/jupyter/JupyterTests.kt @@ -0,0 +1,332 @@ +/*- + * =LICENSE= + * Kotlin Spark API: API for Spark 3.2+ (Scala 2.12) + * ---------- + * Copyright (C) 2019 - 2022 JetBrains + * ---------- + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * =LICENSEEND= + */ +package org.jetbrains.kotlinx.spark.api.jupyter + +import io.kotest.assertions.throwables.shouldThrowAny +import io.kotest.core.spec.style.ShouldSpec +import io.kotest.matchers.collections.shouldBeIn +import io.kotest.matchers.nulls.shouldNotBeNull +import io.kotest.matchers.shouldBe +import io.kotest.matchers.shouldNotBe +import io.kotest.matchers.string.shouldContain +import io.kotest.matchers.types.shouldBeInstanceOf +import jupyter.kotlin.DependsOn +import org.apache.spark.api.java.JavaSparkContext +import org.apache.spark.streaming.Duration +import org.intellij.lang.annotations.Language +import org.jetbrains.kotlinx.jupyter.EvalRequestData +import org.jetbrains.kotlinx.jupyter.ReplForJupyter +import org.jetbrains.kotlinx.jupyter.ReplForJupyterImpl +import org.jetbrains.kotlinx.jupyter.api.Code +import org.jetbrains.kotlinx.jupyter.api.MimeTypedResult +import org.jetbrains.kotlinx.jupyter.libraries.EmptyResolutionInfoProvider +import org.jetbrains.kotlinx.jupyter.repl.EvalResultEx +import org.jetbrains.kotlinx.jupyter.testkit.ReplProvider +import org.jetbrains.kotlinx.jupyter.util.PatternNameAcceptanceRule +import org.jetbrains.kotlinx.spark.api.tuples.* +import org.jetbrains.kotlinx.spark.api.* +import scala.Tuple2 +import java.io.Serializable +import java.util.* +import kotlin.script.experimental.jvm.util.classpathFromClassloader + +class JupyterTests : ShouldSpec({ + val replProvider = ReplProvider { classpath -> + ReplForJupyterImpl( + resolutionInfoProvider = EmptyResolutionInfoProvider, + scriptClasspath = classpath, + isEmbedded = true, + ).apply { + eval { + librariesScanner.addLibrariesFromClassLoader( + classLoader = currentClassLoader, + host = this, + integrationTypeNameRules = listOf( + PatternNameAcceptanceRule(false, "org.jetbrains.kotlinx.spark.api.jupyter.**"), + PatternNameAcceptanceRule(true, "org.jetbrains.kotlinx.spark.api.jupyter.SparkIntegration"), + ), + ) + } + } + } + + val currentClassLoader = DependsOn::class.java.classLoader + val scriptClasspath = classpathFromClassloader(currentClassLoader).orEmpty() + + fun createRepl(): ReplForJupyter = replProvider(scriptClasspath) + suspend fun withRepl(action: suspend ReplForJupyter.() -> Unit): Unit = createRepl().action() + + context("Jupyter") { + withRepl { + + should("Allow functions on local data classes") { + @Language("kts") + val klass = exec("""data class Test(val a: Int, val b: String)""") + + @Language("kts") + val ds = exec("""val ds = dsOf(Test(1, "hi"), Test(2, "something"))""") + + @Language("kts") + val filtered = exec("""val filtered = ds.filter { it.a > 1 }""") + + @Language("kts") + val filteredShow = exec("""filtered.show()""") + } + + should("Have spark instance") { + @Language("kts") + val spark = exec("""spark""") + spark as? SparkSession shouldNotBe null + } + + should("Have JavaSparkContext instance") { + @Language("kts") + val sc = exec("""sc""") + sc as? JavaSparkContext shouldNotBe null + } + + should("render Datasets") { + @Language("kts") + val html = execHtml( + """ + val ds = listOf(1, 2, 3).toDS() + ds + """.trimIndent() + ) + println(html) + + html shouldContain "value" + html shouldContain "1" + html shouldContain "2" + html shouldContain "3" + } + + should("render JavaRDDs") { + @Language("kts") + val html = execHtml( + """ + val rdd: JavaRDD> = sc.parallelize(listOf( + listOf(1, 2, 3), + listOf(4, 5, 6), + )) + rdd + """.trimIndent() + ) + println(html) + + html shouldContain "1, 2, 3" + html shouldContain "4, 5, 6" + } + + should("render JavaRDDs with Arrays") { + @Language("kts") + val html = execHtml( + """ + val rdd: JavaRDD = sc.parallelize(listOf( + intArrayOf(1, 2, 3), + intArrayOf(4, 5, 6), + )) + rdd + """.trimIndent() + ) + println(html) + + html shouldContain "1, 2, 3" + html shouldContain "4, 5, 6" + } + + should("render JavaRDDs with custom class") { + + @Language("kts") + val klass = exec(""" + data class Test( + val longFirstName: String, + val second: LongArray, + val somethingSpecial: Map, + ): Serializable + """.trimIndent()) + + @Language("kts") + val html = execHtml(""" + val rdd = sc.parallelize( + listOf( + Test("aaaaaaaaa", longArrayOf(1L, 100000L, 24L), mapOf(1 to "one", 2 to "two")), + Test("aaaaaaaaa", longArrayOf(1L, 100000L, 24L), mapOf(1 to "one", 2 to "two")), + ) + ) + rdd + """.trimIndent() + ) + html shouldContain "Test(longFirstName=aaaaaaaa..." + } + + should("render JavaPairRDDs") { + @Language("kts") + val html = execHtml( + """ + val rdd: JavaPairRDD = sc.parallelizePairs(listOf( + c(1, 2).toTuple(), + c(3, 4).toTuple(), + )) + rdd + """.trimIndent() + ) + println(html) + + html shouldContain "1, 2" + html shouldContain "3, 4" + } + + should("render JavaDoubleRDD") { + @Language("kts") + val html = execHtml( + """ + val rdd: JavaDoubleRDD = sc.parallelizeDoubles(listOf(1.0, 2.0, 3.0, 4.0,)) + rdd + """.trimIndent() + ) + println(html) + + html shouldContain "1.0" + html shouldContain "2.0" + html shouldContain "3.0" + html shouldContain "4.0" + } + + should("render Scala RDD") { + @Language("kts") + val html = execHtml( + """ + val rdd: RDD> = sc.parallelize(listOf( + listOf(1, 2, 3), + listOf(4, 5, 6), + )).rdd() + rdd + """.trimIndent() + ) + println(html) + + html shouldContain "1, 2, 3" + html shouldContain "4, 5, 6" + } + + @Language("kts") + val _stop = exec("""spark.stop()""") + } + } +}) + +class JupyterStreamingTests : ShouldSpec({ + val replProvider = ReplProvider { classpath -> + ReplForJupyterImpl( + resolutionInfoProvider = EmptyResolutionInfoProvider, + scriptClasspath = classpath, + isEmbedded = true, + ).apply { + eval { + librariesScanner.addLibrariesFromClassLoader( + classLoader = currentClassLoader, + host = this, + integrationTypeNameRules = listOf( + PatternNameAcceptanceRule(false, "org.jetbrains.kotlinx.spark.api.jupyter.**"), + PatternNameAcceptanceRule(true, + "org.jetbrains.kotlinx.spark.api.jupyter.SparkStreamingIntegration"), + ), + ) + } + } + } + + val currentClassLoader = DependsOn::class.java.classLoader + val scriptClasspath = classpathFromClassloader(currentClassLoader).orEmpty() + + fun createRepl(): ReplForJupyter = replProvider(scriptClasspath) + suspend fun withRepl(action: suspend ReplForJupyter.() -> Unit): Unit = createRepl().action() + + context("Jupyter") { + withRepl { + + should("Not have spark instance") { + shouldThrowAny { + @Language("kts") + val spark = exec("""spark""") + Unit + } + } + + should("Not have sc instance") { + shouldThrowAny { + @Language("kts") + val sc = exec("""sc""") + Unit + } + } + + should("stream") { + val input = listOf("aaa", "bbb", "aaa", "ccc") + val counter = Counter(0) + + withSparkStreaming(Duration(10), timeout = 1000) { + + val (counterBroadcast, queue) = withSpark(ssc) { + spark.broadcast(counter) X LinkedList(listOf(sc.parallelize(input))) + } + + val inputStream = ssc.queueStream(queue) + + inputStream.foreachRDD { rdd, _ -> + withSpark(rdd) { + rdd.toDS().forEach { + it shouldBeIn input + counterBroadcast.value.value++ + } + } + } + } + + counter.value shouldBe input.size + } + + } + } +}) + + +private fun ReplForJupyter.execEx(code: Code): EvalResultEx = evalEx(EvalRequestData(code)) + +private fun ReplForJupyter.exec(code: Code): Any? = execEx(code).renderedValue + +private fun ReplForJupyter.execRaw(code: Code): Any? = execEx(code).rawValue + +@JvmName("execTyped") +private inline fun ReplForJupyter.exec(code: Code): T { + val res = exec(code) + res.shouldBeInstanceOf() + return res +} + +private fun ReplForJupyter.execHtml(code: Code): String { + val res = exec(code) + val html = res["text/html"] + html.shouldNotBeNull() + return html +} + +class Counter(@Volatile var value: Int) : Serializable diff --git a/kotlin-spark-api/3.2/pom_2.12.xml b/kotlin-spark-api/3.2/pom_2.12.xml index 99172895..32c5f0f9 100644 --- a/kotlin-spark-api/3.2/pom_2.12.xml +++ b/kotlin-spark-api/3.2/pom_2.12.xml @@ -1,191 +1,188 @@ - + - 4.0.0 + 4.0.0 - Kotlin Spark API: API for Spark 3.2+ (Scala 2.12) - kotlin-spark-api-3.2 - Kotlin API compatible with spark 3.2.0 Kotlin for Apache Spark - - org.jetbrains.kotlinx.spark - kotlin-spark-api-parent_2.12 - 1.0.4-SNAPSHOT - ../../pom_2.12.xml - - jar + Kotlin Spark API: API for Spark 3.2+ (Scala 2.12) + kotlin-spark-api-3.2 + Kotlin API compatible with spark 3.2.0 Kotlin for Apache Spark + + org.jetbrains.kotlinx.spark + kotlin-spark-api-parent_2.12 + 1.0.4-SNAPSHOT + ../../pom_2.12.xml + + jar - - - org.jetbrains.kotlin - kotlin-stdlib-jdk8 - - - org.jetbrains.kotlin - kotlin-reflect - - - org.jetbrains.kotlinx.spark - core-3.2_${scala.compat.version} - - - org.jetbrains.kotlinx.spark - scala-tuples-in-kotlin - - - org.apache.spark - spark-streaming-kafka-0-10_${scala.compat.version} - ${spark3.version} - + + + org.jetbrains.kotlin + kotlin-stdlib-jdk8 + + + org.jetbrains.kotlin + kotlin-reflect + + + org.jetbrains.kotlinx.spark + core-3.2_${scala.compat.version} + + + org.jetbrains.kotlinx.spark + scala-tuples-in-kotlin + - - - org.apache.spark - spark-sql_${scala.compat.version} - ${spark3.version} - provided - - - org.apache.spark - spark-streaming_${scala.compat.version} - ${spark3.version} - provided - - - org.apache.hadoop - hadoop-client - ${hadoop.version} - provided - + + + org.apache.spark + spark-sql_${scala.compat.version} + ${spark3.version} + provided + + + org.apache.spark + spark-streaming_${scala.compat.version} + ${spark3.version} + provided + + + org.apache.hadoop + hadoop-client + ${hadoop.version} + provided + - - - io.kotest - kotest-runner-junit5-jvm - ${kotest.version} - test - - - io.kotest.extensions - kotest-extensions-allure - ${kotest-extensions-allure.version} - test - - - io.github.embeddedkafka - embedded-kafka_${scala.compat.version} - ${embedded-kafka.version} - test - - - io.kotest.extensions - kotest-extensions-testcontainers - ${kotest-extensions-testcontainers.version} - test - - - com.beust - klaxon - ${klaxon.version} - test - - - ch.tutteli.atrium - atrium-fluent-en_GB - ${atrium.version} - test - - - org.apache.spark - spark-streaming_${scala.compat.version} - ${spark3.version} - tests - test - - - org.apache.kafka - kafka-streams-test-utils - 3.1.0 - test - - + + + org.apache.spark + spark-streaming-kafka-0-10_${scala.compat.version} + ${spark3.version} + test + + + io.kotest + kotest-runner-junit5-jvm + ${kotest.version} + test + + + io.kotest.extensions + kotest-extensions-allure + ${kotest-extensions-allure.version} + test + + + io.github.embeddedkafka + embedded-kafka_${scala.compat.version} + ${embedded-kafka.version} + test + + + com.beust + klaxon + ${klaxon.version} + test + + + ch.tutteli.atrium + atrium-fluent-en_GB + ${atrium.version} + test + + + org.apache.spark + spark-streaming_${scala.compat.version} + ${spark3.version} + tests + test + + + org.apache.kafka + kafka-streams-test-utils + 3.1.0 + test + + - - src/main/kotlin - src/test/kotlin - target/${scala.compat.version} - + + src/main/kotlin + src/test/kotlin + target/${scala.compat.version} + - - org.jetbrains.kotlin - kotlin-maven-plugin - - - compile - - compile - - - - test-compile - - test-compile - - - - + + org.jetbrains.kotlin + kotlin-maven-plugin + + + compile + + compile + + + + test-compile + + test-compile + + + + - - org.apache.maven.plugins - maven-surefire-plugin - + + org.apache.maven.plugins + maven-surefire-plugin + - - org.jetbrains.dokka - dokka-maven-plugin - ${dokka.version} - - 8 - - - - dokka - - dokka - - pre-site - - - javadocjar - - javadocJar - - pre-integration-test - - - + + org.jetbrains.dokka + dokka-maven-plugin + ${dokka.version} + + 8 + + + + dokka + + dokka + + pre-site + + + javadocjar + + javadocJar + + pre-integration-test + + + - - io.qameta.allure - allure-maven - - ${project.basedir}/allure-results/${scala.compat.version} - - + + io.qameta.allure + allure-maven + + ${project.basedir}/allure-results/${scala.compat.version} + + - - org.jacoco - jacoco-maven-plugin - - - org.apache.maven.plugins - maven-compiler-plugin - - 8 - 8 - - + + org.jacoco + jacoco-maven-plugin + + + org.apache.maven.plugins + maven-compiler-plugin + ${maven-compiler-plugin.version} + + 8 + 8 + + - - + + diff --git a/kotlin-spark-api/3.2/src/main/kotlin/org/jetbrains/kotlinx/spark/api/Column.kt b/kotlin-spark-api/3.2/src/main/kotlin/org/jetbrains/kotlinx/spark/api/Column.kt index ffa42ada..72e8a9b7 100644 --- a/kotlin-spark-api/3.2/src/main/kotlin/org/jetbrains/kotlinx/spark/api/Column.kt +++ b/kotlin-spark-api/3.2/src/main/kotlin/org/jetbrains/kotlinx/spark/api/Column.kt @@ -68,7 +68,7 @@ inline operator fun Dataset.invoke(column: KProperty1< replaceWith = ReplaceWith("this `===` c"), level = DeprecationLevel.ERROR, ) -infix fun Column.`==`(c: Column) = `$eq$eq$eq`(c) +infix fun Column.`==`(c: Column): Column = `$eq$eq$eq`(c) /** * Unary minus, i.e. negate the expression. diff --git a/kotlin-spark-api/3.2/src/test/kotlin/org/jetbrains/kotlinx/spark/api/EncodingTest.kt b/kotlin-spark-api/3.2/src/test/kotlin/org/jetbrains/kotlinx/spark/api/EncodingTest.kt index 29a073ad..afb87435 100644 --- a/kotlin-spark-api/3.2/src/test/kotlin/org/jetbrains/kotlinx/spark/api/EncodingTest.kt +++ b/kotlin-spark-api/3.2/src/test/kotlin/org/jetbrains/kotlinx/spark/api/EncodingTest.kt @@ -51,7 +51,11 @@ class EncodingTest : ShouldSpec({ should("handle Instant Datasets") { val instants = listOf(Instant.now(), Instant.now()) val dataset: Dataset = instants.toDS() - dataset.collectAsList() shouldBe instants + dataset.collectAsList().let { (first, second) -> + val (a, b) = instants + a.compareTo(first) shouldBe 0 + b.compareTo(second) shouldBe 0 + } } should("handle Timestamp Datasets") { @@ -107,7 +111,11 @@ class EncodingTest : ShouldSpec({ should("be able to serialize Instant") { val instantPair = Instant.now() to Instant.now() val dataset = dsOf(instantPair) - dataset.collectAsList() shouldBe listOf(instantPair) + dataset.collectAsList().single().let { (first, second) -> + val (a, b) = instantPair + a.compareTo(first) shouldBe 0 + b.compareTo(second) shouldBe 0 + } } should("be able to serialize Date") { diff --git a/kotlin-spark-api/3.2/src/test/kotlin/org/jetbrains/kotlinx/spark/api/StreamingTest.kt b/kotlin-spark-api/3.2/src/test/kotlin/org/jetbrains/kotlinx/spark/api/StreamingTest.kt index 8ae7f5c2..9719e8fc 100644 --- a/kotlin-spark-api/3.2/src/test/kotlin/org/jetbrains/kotlinx/spark/api/StreamingTest.kt +++ b/kotlin-spark-api/3.2/src/test/kotlin/org/jetbrains/kotlinx/spark/api/StreamingTest.kt @@ -49,7 +49,6 @@ class StreamingTest : ShouldSpec({ context("streaming") { should("stream") { - val input = listOf("aaa", "bbb", "aaa", "ccc") val counter = Counter(0) @@ -72,7 +71,6 @@ class StreamingTest : ShouldSpec({ } counter.value shouldBe input.size - } should("Work with checkpointpath") { diff --git a/pom.xml b/pom.xml index 8c99ec41..c224b020 100644 --- a/pom.xml +++ b/pom.xml @@ -10,14 +10,15 @@ pom - 1.6.20 + 1.6.21 1.6.10 0.17.0 5.2.3 1.1.0 - 1.3.1 3.1.0 3.2.1 + 0.11.0-83 + 0.7.5 3.3.1 @@ -32,7 +33,8 @@ 3.2.0 3.9.1 3.2.1 - 3.0.0-M5 + 3.10.1 + 3.0.0-M6 1.6.8 4.5.6 official @@ -178,6 +180,12 @@ update-file-header + + + **/*.json + **/*.css + + process-sources @@ -227,19 +235,7 @@ true false forked-path - scala-2.12,release-sign - - - - org.sonatype.plugins - nexus-staging-maven-plugin - ${nexus-staging-plugin.version} - true - - ossrh - https://oss.sonatype.org/ - false - 20 + scala-2.12,central-deploy @@ -285,8 +281,11 @@ https://oss.sonatype.org/service/local/staging/deploy/maven2/ - ossrh - https://oss.sonatype.org/content/repositories/snapshots + github + GitHub JetBrains Apache Maven Packages + https://maven.pkg.github.com/JetBrains/kotlin-spark-api + + @@ -301,7 +300,7 @@ - release-sign + central-deploy performRelease @@ -310,6 +309,18 @@ + + org.sonatype.plugins + nexus-staging-maven-plugin + ${nexus-staging-plugin.version} + true + + ossrh + https://oss.sonatype.org/ + false + 20 + + org.apache.maven.plugins maven-gpg-plugin diff --git a/pom_2.12.xml b/pom_2.12.xml index 29b854c1..f59970af 100644 --- a/pom_2.12.xml +++ b/pom_2.12.xml @@ -14,6 +14,7 @@ pom + true 2.12.15 2.12 @@ -23,6 +24,7 @@ scala-tuples-in-kotlin/pom_2.12.xml kotlin-spark-api/3.2/pom_2.12.xml examples/pom-3.2_2.12.xml + jupyter diff --git a/scala-tuples-in-kotlin/pom_2.12.xml b/scala-tuples-in-kotlin/pom_2.12.xml index cf67af41..7e5f02fc 100644 --- a/scala-tuples-in-kotlin/pom_2.12.xml +++ b/scala-tuples-in-kotlin/pom_2.12.xml @@ -142,7 +142,7 @@ org.apache.maven.plugins maven-deploy-plugin - true + false @@ -150,7 +150,7 @@ org.sonatype.plugins nexus-staging-maven-plugin - true + false diff --git a/scala-tuples-in-kotlin/src/test/kotlin/org/jetbrains/kotlinx/spark/api/tuples/TuplesTest.kt b/scala-tuples-in-kotlin/src/test/kotlin/org/jetbrains/kotlinx/spark/api/tuples/TuplesTest.kt index 75d6f49a..5cbf6c72 100644 --- a/scala-tuples-in-kotlin/src/test/kotlin/org/jetbrains/kotlinx/spark/api/tuples/TuplesTest.kt +++ b/scala-tuples-in-kotlin/src/test/kotlin/org/jetbrains/kotlinx/spark/api/tuples/TuplesTest.kt @@ -21,15 +21,14 @@ package org.jetbrains.kotlinx.spark.api.tuples import io.kotest.assertions.throwables.shouldThrow import io.kotest.core.spec.style.ShouldSpec -import io.kotest.matchers.collections.shouldNotBeIn import io.kotest.matchers.shouldBe import io.kotest.matchers.shouldNotBe import org.jetbrains.kotlinx.spark.api.tuples.* import org.jetbrains.kotlinx.spark.api.* import scala.Tuple3 -import io.kotest.matchers.types.shouldBeInstanceOf import scala.Tuple1 import scala.Tuple2 +import kotlin.reflect.typeOf @Suppress("ShouldBeInstanceOfInspection", "RedundantLambdaArrow", "USELESS_IS_CHECK") class TuplesTest : ShouldSpec({ @@ -159,7 +158,7 @@ class TuplesTest : ShouldSpec({ tupleOf(1, 2, 3).toTriple() shouldBe Triple(1, 2, 3) tupleOf(1, 2, 3, 4, 5, 6, 7)[1..3].let { - it.shouldBeInstanceOf>() + (it is List) shouldBe true it.containsAll(listOf(2, 3, 4)) shouldBe true } tupleOf(1, 1, 2)[1..2] shouldBe tupleOf(1, 2, 2)[0..1]