Skip to content

Commit 63bebaf

Browse files
authored
Merge pull request #137 from JetBrains/jupyter-test
Adds Kotlin Jupyter notebook support
2 parents 5a1fa5a + e8f4ee4 commit 63bebaf

File tree

21 files changed

+1483
-385
lines changed

21 files changed

+1483
-385
lines changed

.github/workflows/build.yml

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -26,11 +26,11 @@ jobs:
2626
restore-keys: ${{ runner.os }}-m2
2727
- name: Build with Maven
2828
run: ./mvnw -B package --file pom.xml -Pscala-2.12 -Dkotest.tags="!Kafka"
29-
qodana:
30-
runs-on: ubuntu-latest
31-
steps:
32-
- uses: actions/checkout@v3
33-
- name: 'Qodana Scan'
34-
uses: JetBrains/[email protected]
29+
# qodana:
30+
# runs-on: ubuntu-latest
31+
# steps:
32+
# - uses: actions/checkout@v3
33+
# - name: 'Qodana Scan'
34+
# uses: JetBrains/[email protected]
3535

3636
# vim: ts=2:sts=2:sw=2:expandtab
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
name: Generate and publish docs
2+
3+
on:
4+
push:
5+
branches:
6+
- "spark-3.2"
7+
8+
jobs:
9+
build-and-deploy:
10+
runs-on: ubuntu-latest
11+
12+
steps:
13+
- uses: actions/checkout@v2
14+
- name: Set up JDK 11
15+
uses: actions/setup-java@v1
16+
with:
17+
distributions: adopt
18+
java-version: 11
19+
check-latest: true
20+
- name: Cache Maven packages
21+
uses: actions/cache@v2
22+
with:
23+
path: ~/.m2
24+
key: ${{ runner.os }}-m2-${{ hashFiles('**/pom.xml') }}
25+
restore-keys: ${{ runner.os }}-m2
26+
- name: Deploy to GH Packages
27+
run: ./mvnw --batch-mode deploy -Dkotest.tags="!Kafka"
28+
env:
29+
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
30+
31+

README.md

Lines changed: 31 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,32 @@ Once you have configured the dependency, you only need to add the following impo
7373
import org.jetbrains.kotlinx.spark.api.*
7474
```
7575

76+
### Jupyter
77+
78+
The Kotlin Spark API also supports Kotlin Jupyter notebooks.
79+
To it, simply add
80+
81+
```jupyterpython
82+
%use kotlin-spark-api
83+
```
84+
to the top of your notebook. This will get the latest version of the API, together with the latest version of Spark.
85+
To define a certain version of Spark or the API itself, simply add it like this:
86+
```jupyterpython
87+
%use kotlin-spark-api(spark=3.2, version=1.0.4)
88+
```
89+
90+
Inside the notebook a Spark session will be initiated automatically. This can be accessed via the `spark` value.
91+
`sc: JavaSparkContext` can also be accessed directly. The API operates pretty similarly.
92+
93+
There is also support for HTML rendering of Datasets and simple (Java)RDDs.
94+
95+
To use Spark Streaming abilities, instead use
96+
```jupyterpython
97+
%use kotlin-spark-api-streaming
98+
```
99+
This does not start a Spark session right away, meaning you can call `withSparkStreaming(batchDuration) {}`
100+
in whichever cell you want.
101+
76102
## Kotlin for Apache Spark features
77103

78104
### Creating a SparkSession in Kotlin
@@ -81,12 +107,13 @@ val spark = SparkSession
81107
.builder()
82108
.master("local[2]")
83109
.appName("Simple Application").orCreate
84-
85110
```
86111

112+
This is not needed when running the Kotlin Spark API from a Jupyter notebook.
113+
87114
### Creating a Dataset in Kotlin
88115
```kotlin
89-
spark.toDS("a" to 1, "b" to 2)
116+
spark.dsOf("a" to 1, "b" to 2)
90117
```
91118
The example above produces `Dataset<Pair<String, Int>>`. While Kotlin Pairs and Triples are supported, Scala Tuples are reccomended for better support.
92119

@@ -102,6 +129,8 @@ We provide you with useful function `withSpark`, which accepts everything that m
102129

103130
After work block ends, `spark.stop()` is called automatically.
104131

132+
Do not use this when running the Kotlin Spark API from a Jupyter notebook.
133+
105134
```kotlin
106135
withSpark {
107136
dsOf(1, 2)

core/3.2/pom_2.12.xml

Lines changed: 76 additions & 70 deletions
Original file line numberDiff line numberDiff line change
@@ -1,75 +1,81 @@
11
<?xml version="1.0" encoding="UTF-8"?>
2-
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
3-
<modelVersion>4.0.0</modelVersion>
2+
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
3+
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
4+
<modelVersion>4.0.0</modelVersion>
45

5-
<name>Kotlin Spark API: Scala core for Spark 3.2+ (Scala 2.12)</name>
6-
<description>Scala-Spark 3.2+ compatibility layer for Kotlin for Apache Spark</description>
7-
<artifactId>core-3.2_2.12</artifactId>
8-
<parent>
9-
<groupId>org.jetbrains.kotlinx.spark</groupId>
10-
<artifactId>kotlin-spark-api-parent_2.12</artifactId>
11-
<version>1.0.4-SNAPSHOT</version>
12-
<relativePath>../../pom_2.12.xml</relativePath>
13-
</parent>
6+
<name>Kotlin Spark API: Scala core for Spark 3.2+ (Scala 2.12)</name>
7+
<description>Scala-Spark 3.2+ compatibility layer for Kotlin for Apache Spark</description>
8+
<artifactId>core-3.2_2.12</artifactId>
9+
<parent>
10+
<groupId>org.jetbrains.kotlinx.spark</groupId>
11+
<artifactId>kotlin-spark-api-parent_2.12</artifactId>
12+
<version>1.0.4-SNAPSHOT</version>
13+
<relativePath>../../pom_2.12.xml</relativePath>
14+
</parent>
15+
16+
<dependencies>
17+
<dependency>
18+
<groupId>org.scala-lang</groupId>
19+
<artifactId>scala-library</artifactId>
20+
<version>${scala.version}</version>
21+
</dependency>
22+
<dependency>
23+
<groupId>org.jetbrains.kotlin</groupId>
24+
<artifactId>kotlin-reflect</artifactId>
25+
</dependency>
26+
27+
<!-- Provided dependencies -->
28+
29+
<dependency>
30+
<groupId>org.apache.spark</groupId>
31+
<artifactId>spark-sql_${scala.compat.version}</artifactId>
32+
<version>${spark3.version}</version>
33+
<scope>provided</scope>
34+
</dependency>
35+
36+
</dependencies>
37+
38+
<build>
39+
<sourceDirectory>src/main/scala</sourceDirectory>
40+
<testSourceDirectory>src/test/scala</testSourceDirectory>
41+
<directory>target/${scala.compat.version}</directory>
42+
<plugins>
43+
<plugin>
44+
<groupId>net.alchim31.maven</groupId>
45+
<artifactId>scala-maven-plugin</artifactId>
46+
<version>${scala-maven-plugin.version}</version>
47+
<executions>
48+
<execution>
49+
<id>compile</id>
50+
<goals>
51+
<goal>compile</goal>
52+
<goal>testCompile</goal>
53+
</goals>
54+
<configuration>
55+
<args>
56+
<arg>-dependencyfile</arg>
57+
<arg>${project.build.directory}/.scala_dependencies</arg>
58+
</args>
59+
</configuration>
60+
</execution>
61+
<execution>
62+
<id>docjar</id>
63+
<goals>
64+
<goal>doc-jar</goal>
65+
</goals>
66+
<phase>pre-integration-test</phase>
67+
</execution>
68+
</executions>
69+
</plugin>
70+
<plugin>
71+
<groupId>org.apache.maven.plugins</groupId>
72+
<artifactId>maven-site-plugin</artifactId>
73+
<configuration>
74+
<skip>true</skip>
75+
</configuration>
76+
</plugin>
77+
</plugins>
78+
</build>
1479

15-
<dependencies>
16-
<dependency>
17-
<groupId>org.scala-lang</groupId>
18-
<artifactId>scala-library</artifactId>
19-
<version>${scala.version}</version>
20-
</dependency>
21-
<dependency>
22-
<groupId>org.jetbrains.kotlin</groupId>
23-
<artifactId>kotlin-reflect</artifactId>
24-
</dependency>
25-
<!-- Provided dependencies -->
26-
<dependency>
27-
<groupId>org.apache.spark</groupId>
28-
<artifactId>spark-sql_${scala.compat.version}</artifactId>
29-
<version>${spark3.version}</version>
30-
<scope>provided</scope>
31-
</dependency>
32-
</dependencies>
3380

34-
<build>
35-
<sourceDirectory>src/main/scala</sourceDirectory>
36-
<testSourceDirectory>src/test/scala</testSourceDirectory>
37-
<directory>target/${scala.compat.version}</directory>
38-
<plugins>
39-
<plugin>
40-
<groupId>net.alchim31.maven</groupId>
41-
<artifactId>scala-maven-plugin</artifactId>
42-
<version>${scala-maven-plugin.version}</version>
43-
<executions>
44-
<execution>
45-
<id>compile</id>
46-
<goals>
47-
<goal>compile</goal>
48-
<goal>testCompile</goal>
49-
</goals>
50-
<configuration>
51-
<args>
52-
<arg>-dependencyfile</arg>
53-
<arg>${project.build.directory}/.scala_dependencies</arg>
54-
</args>
55-
</configuration>
56-
</execution>
57-
<execution>
58-
<id>docjar</id>
59-
<goals>
60-
<goal>doc-jar</goal>
61-
</goals>
62-
<phase>pre-integration-test</phase>
63-
</execution>
64-
</executions>
65-
</plugin>
66-
<plugin>
67-
<groupId>org.apache.maven.plugins</groupId>
68-
<artifactId>maven-site-plugin</artifactId>
69-
<configuration>
70-
<skip>true</skip>
71-
</configuration>
72-
</plugin>
73-
</plugins>
74-
</build>
7581
</project>

0 commit comments

Comments
 (0)