Skip to content

Commit 8dc0b0f

Browse files
committed
added sorting functions and tests
1 parent 5f7536c commit 8dc0b0f

File tree

2 files changed

+61
-0
lines changed

2 files changed

+61
-0
lines changed

kotlin-spark-api/3.2/src/main/kotlin/org/jetbrains/kotlinx/spark/api/Dataset.kt

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -309,6 +309,30 @@ inline fun <reified L : Any?, reified R : Any?> Dataset<L>.fullJoin(
309309
*/
310310
inline fun <reified T> Dataset<T>.sort(columns: (Dataset<T>) -> Array<Column>): Dataset<T> = sort(*columns(this))
311311

312+
/** Returns a dataset sorted by the first (`_1`) value of each [Tuple2] inside. */
313+
@JvmName("sortByTuple2Key")
314+
fun <T1, T2> Dataset<Tuple2<T1, T2>>.sortByKey(): Dataset<Tuple2<T1, T2>> = sort("_1")
315+
316+
/** Returns a dataset sorted by the second (`_2`) value of each [Tuple2] inside. */
317+
@JvmName("sortByTuple2Value")
318+
fun <T1, T2> Dataset<Tuple2<T1, T2>>.sortByValue(): Dataset<Tuple2<T1, T2>> = sort("_2")
319+
320+
/** Returns a dataset sorted by the first (`_1`) value of each [Arity2] inside. */
321+
@JvmName("sortByArity2Key")
322+
fun <T1, T2> Dataset<Arity2<T1, T2>>.sortByKey(): Dataset<Arity2<T1, T2>> = sort("_1")
323+
324+
/** Returns a dataset sorted by the second (`_2`) value of each [Arity2] inside. */
325+
@JvmName("sortByArity2Value")
326+
fun <T1, T2> Dataset<Arity2<T1, T2>>.sortByValue(): Dataset<Arity2<T1, T2>> = sort("_2")
327+
328+
/** Returns a dataset sorted by the first (`first`) value of each [Pair] inside. */
329+
@JvmName("sortByPairKey")
330+
fun <T1, T2> Dataset<Pair<T1, T2>>.sortByKey(): Dataset<Pair<T1, T2>> = sort("first")
331+
332+
/** Returns a dataset sorted by the second (`second`) value of each [Pair] inside. */
333+
@JvmName("sortByPairValue")
334+
fun <T1, T2> Dataset<Pair<T1, T2>>.sortByValue(): Dataset<Pair<T1, T2>> = sort("second")
335+
312336
/**
313337
* This function creates block, where one can call any further computations on already cached dataset
314338
* Data will be unpersisted automatically at the end of computation

kotlin-spark-api/3.2/src/test/kotlin/org/jetbrains/kotlinx/spark/api/DatasetFunctionTest.kt

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -282,6 +282,42 @@ class DatasetFunctionTest : ShouldSpec({
282282

283283
dataset6.toList<List<Int>>() shouldBe listOf(listOf(1, 2, 3), listOf(4, 5, 6))
284284
}
285+
286+
should("Sort Arity2 Dataset") {
287+
val list = listOf(
288+
c(1, 6),
289+
c(2, 5),
290+
c(3, 4),
291+
)
292+
val dataset = list.toDS()
293+
294+
dataset.sortByKey().collectAsList() shouldBe list.sortedBy { it._1 }
295+
dataset.sortByValue().collectAsList() shouldBe list.sortedBy { it._2 }
296+
}
297+
298+
should("Sort Tuple2 Dataset") {
299+
val list = listOf(
300+
Tuple2(1, 6),
301+
Tuple2(2, 5),
302+
Tuple2(3, 4),
303+
)
304+
val dataset = list.toDS()
305+
306+
dataset.sortByKey().collectAsList() shouldBe list.sortedBy { it._1 }
307+
dataset.sortByValue().collectAsList() shouldBe list.sortedBy { it._2 }
308+
}
309+
310+
should("Sort Pair Dataset") {
311+
val list = listOf(
312+
Pair(1, 6),
313+
Pair(2, 5),
314+
Pair(3, 4),
315+
)
316+
val dataset = list.toDS()
317+
318+
dataset.sortByKey().collectAsList() shouldBe list.sortedBy { it.first }
319+
dataset.sortByValue().collectAsList() shouldBe list.sortedBy { it.second }
320+
}
285321
}
286322
}
287323

@@ -401,6 +437,7 @@ class DatasetFunctionTest : ShouldSpec({
401437
b.count() shouldBe 1
402438
}
403439

440+
404441
}
405442
}
406443
})

0 commit comments

Comments
 (0)