Merge branch 'spark-3.1' into spark-3.0

Jolanrensen · Jolanrensen · commit fb65aaa44de0 · 2022-05-12T14:11:48.000+02:00
# Conflicts:
#	kotlin-spark-api/3.0/src/test/kotlin/org/jetbrains/kotlinx/spark/api/KafkaHelper.kt
#	kotlin-spark-api/3.0/src/test/kotlin/org/jetbrains/kotlinx/spark/api/KafkaStreamingTest.kt
#	pom.xml
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
@@ -25,7 +25,7 @@ jobs:
           key: ${{ runner.os }}-m2-${{ hashFiles('**/pom.xml') }}
           restore-keys: ${{ runner.os }}-m2
       - name: Build with Maven
-        run: ./mvnw -B package --file pom.xml -Pscala-2.12 -Dkotest.tags="!Kafka"
+        run: ./mvnw -B package --file pom.xml -Pscala-2.12
 #  qodana:
 #    runs-on: ubuntu-latest
 #    steps:
diff --git a/.github/workflows/publish_dev_version.yml b/.github/workflows/publish_dev_version.yml
@@ -24,7 +24,7 @@ jobs:
           key: ${{ runner.os }}-m2-${{ hashFiles('**/pom.xml') }}
           restore-keys: ${{ runner.os }}-m2
       - name: Deploy to GH Packages
-        run: ./mvnw --batch-mode deploy -Dkotest.tags="!Kafka"
+        run: ./mvnw --batch-mode deploy
         env:
           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
 
diff --git a/kotlin-spark-api/3.0/pom_2.12.xml b/kotlin-spark-api/3.0/pom_2.12.xml
@@ -73,6 +73,12 @@
             <version>${kotest-extensions-allure.version}</version>
             <scope>test</scope>
         </dependency>
+        <dependency>
+            <groupId>io.kotest.extensions</groupId>
+            <artifactId>kotest-extensions-testcontainers</artifactId>
+            <version>${kotest-extensions-testcontainers.version}</version>
+            <scope>test</scope>
+        </dependency>
         <dependency>
             <groupId>io.github.embeddedkafka</groupId>
             <artifactId>embedded-kafka_${scala.compat.version}</artifactId>
diff --git a/kotlin-spark-api/3.0/src/test/kotlin/org/jetbrains/kotlinx/spark/api/KafkaHelper.kt b/kotlin-spark-api/3.0/src/test/kotlin/org/jetbrains/kotlinx/spark/api/KafkaHelper.kt
diff --git a/kotlin-spark-api/3.0/src/test/kotlin/org/jetbrains/kotlinx/spark/api/KafkaStreamingTest.kt b/kotlin-spark-api/3.0/src/test/kotlin/org/jetbrains/kotlinx/spark/api/KafkaStreamingTest.kt
@@ -7,9 +7,9 @@
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
  * You may obtain a copy of the License at
- * 
+ *
  *      http://www.apache.org/licenses/LICENSE-2.0
- * 
+ *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -20,8 +20,13 @@
 package org.jetbrains.kotlinx.spark.api
 
 import io.kotest.core.Tag
-import io.kotest.core.spec.style.ShouldSpec
-import io.kotest.matchers.collections.shouldBeIn
+import io.kotest.core.extensions.install
+import io.kotest.core.spec.style.FunSpec
+import io.kotest.extensions.testcontainers.TestContainerExtension
+import io.kotest.extensions.testcontainers.kafka.createStringStringConsumer
+import io.kotest.extensions.testcontainers.kafka.createStringStringProducer
+import io.kotest.matchers.collections.shouldContain
+import io.kotest.matchers.collections.shouldContainAll
 import org.apache.kafka.clients.consumer.ConsumerConfig
 import org.apache.kafka.clients.consumer.ConsumerRecord
 import org.apache.kafka.clients.producer.ProducerRecord
@@ -32,85 +37,94 @@ import org.apache.spark.streaming.kafka010.ConsumerStrategies
 import org.apache.spark.streaming.kafka010.KafkaUtils
 import org.apache.spark.streaming.kafka010.LocationStrategies
 import org.jetbrains.kotlinx.spark.api.tuples.*
+import org.testcontainers.containers.KafkaContainer
+import org.testcontainers.utility.DockerImageName
+import scala.Tuple3
 import java.io.Serializable
+import java.time.Duration
 
 object Kafka : Tag()
 
-class KafkaStreamingTest : ShouldSpec({
+class KafkaStreamingTest : FunSpec() {
+    init {
 
-    // making sure it can be skipped on Github actions since it times out
-    tags(Kafka)
+        tags(Kafka)
 
-    xcontext("kafka") {
-        val port = 9092
-        val broker = "localhost:$port"
-        val topic1 = "test1"
-        val topic2 = "test2"
-        val kafkaListener = EmbeddedKafkaListener(port)
-        listener(kafkaListener)
+        val kafka = install(
+            TestContainerExtension(KafkaContainer(DockerImageName.parse("confluentinc/cp-kafka:7.0.1")))
+        ) {
+            withEmbeddedZookeeper()
+            withEnv("KAFKA_AUTO_CREATE_TOPICS_ENABLE", "true")
+        }
+        println(kafka.bootstrapServers)
+        test("Streaming should support kafka") {
+            val topic1 = "test1"
+            val topic2 = "test2"
 
-        should("support kafka streams") {
-            val producer = kafkaListener.stringStringProducer()
-            producer.send(ProducerRecord(topic1, "Hello this is a test test test"))
-            producer.send(ProducerRecord(topic2, "This is also also a test test something"))
-            producer.close()
+            val resultLists = mapOf(
+                topic1 to listOf(
+                    "Hello" X 1,
+                    "this" X 1,
+                    "is" X 1,
+                    "a" X 1,
+                    "test" X 3,
+                ),
+                topic2 to listOf(
+                    "This" X 1,
+                    "is" X 1,
+                    "also" X 2,
+                    "a" X 1,
+                    "test" X 2,
+                    "something" X 1,
+                )
+            )
+            val data = arrayListOf<List<Tuple3<String, String, Int>>>()
 
             withSparkStreaming(
-                batchDuration = Durations.seconds(2),
+                batchDuration = Durations.milliseconds(1000),
                 appName = "KotlinDirectKafkaWordCount",
-                timeout = 1000L,
+                timeout = 10_000L,
+                master = "local"
             ) {
 
+                setRunAfterStart {
+                    val producer = autoClose(kafka.createStringStringProducer())
+                    producer.send(ProducerRecord(topic1, "Hello this is a test test test"))
+                    producer.send(ProducerRecord(topic2, "This is also also a test test something"))
+                }
+
                 val kafkaParams: Map<String, Serializable> = mapOf(
-                    ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG to broker,
+                    ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG to "${kafka.host}:${kafka.getMappedPort(KafkaContainer.KAFKA_PORT)}",
                     ConsumerConfig.GROUP_ID_CONFIG to "consumer-group",
                     ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG to StringDeserializer::class.java,
                     ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG to StringDeserializer::class.java,
                 )
-
                 // Create direct kafka stream with brokers and topics
                 val messages: JavaInputDStream<ConsumerRecord<String, String>> = KafkaUtils.createDirectStream(
                     ssc,
-                    LocationStrategies.PreferConsistent(),
+                    LocationStrategies.PreferBrokers(),
                     ConsumerStrategies.Subscribe(setOf(topic1, topic2), kafkaParams),
                 )
 
                 // Get the lines, split them into words, count the words and print
-                val lines = messages.map { it.topic() X it.value() }
-                val words = lines.flatMapValues { it.split(" ").iterator() }
 
-                val wordCounts = words
+                val wordCounts = messages
+                    .map { it.topic() X it.value() }
+                    .flatMapValues { it.split(" ").iterator() }
                     .map { t(it, 1) }
                     .reduceByKey { a: Int, b: Int -> a + b }
                     .map { (tup, counter) -> tup + counter }
 
-                val resultLists = mapOf(
-                    topic1 to listOf(
-                        "Hello" X 1,
-                        "this" X 1,
-                        "is" X 1,
-                        "a" X 1,
-                        "test" X 3,
-                    ),
-                    topic2 to listOf(
-                        "This" X 1,
-                        "is" X 1,
-                        "also" X 2,
-                        "a" X 1,
-                        "test" X 2,
-                        "something" X 1,
-                    )
-                )
 
                 wordCounts.foreachRDD { rdd, _ ->
-                    rdd.foreach { (topic, word, count) ->
-                        t(word, count).shouldBeIn(collection = resultLists[topic]!!)
-                    }
+                    data.add(rdd.collect())
                 }
+            }
 
-                wordCounts.print()
+            val resultList = resultLists.flatMap { (topic, tuples) ->
+                tuples.map { it.prependedBy(topic) }
             }
+            data.flatten() shouldContainAll resultList
         }
-
     }
-})
+}
diff --git a/kotlin-spark-api/3.0/src/test/kotlin/org/jetbrains/kotlinx/spark/api/ProjectConfig.kt b/kotlin-spark-api/3.0/src/test/kotlin/org/jetbrains/kotlinx/spark/api/ProjectConfig.kt
@@ -7,9 +7,9 @@
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
  * You may obtain a copy of the License at
- * 
+ *
  *      http://www.apache.org/licenses/LICENSE-2.0
- * 
+ *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -24,7 +24,6 @@ import io.kotest.extensions.allure.AllureTestReporter
 
 @Suppress("unused")
 object ProjectConfig : AbstractProjectConfig() {
-    override fun listeners() = super.listeners() + AllureTestReporter(true)
-
     override fun extensions() = super.extensions() + AllureTestReporter(true)
+
 }
diff --git a/pom.xml b/pom.xml