Skip to content

Commit 3ce53d2

Browse files
authored
Merge pull request #452 from Kotlin/asGroupBy-emptyDataFrame
Public `empty(DataFrameSchema)` API
2 parents cf23bfa + a4490b2 commit 3ce53d2

File tree

6 files changed

+80
-0
lines changed

6 files changed

+80
-0
lines changed

core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/DataFrame.kt

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,9 @@ import org.jetbrains.kotlinx.dataframe.impl.DataFrameSize
1616
import org.jetbrains.kotlinx.dataframe.impl.getColumnsImpl
1717
import org.jetbrains.kotlinx.dataframe.impl.headPlusArray
1818
import org.jetbrains.kotlinx.dataframe.impl.headPlusIterable
19+
import org.jetbrains.kotlinx.dataframe.impl.schema.createEmptyDataFrame
1920
import org.jetbrains.kotlinx.dataframe.impl.schema.createEmptyDataFrameOf
21+
import org.jetbrains.kotlinx.dataframe.schema.DataFrameSchema
2022
import kotlin.reflect.KType
2123

2224
/**
@@ -32,7 +34,18 @@ public interface DataFrame<out T> : Aggregatable<T>, ColumnsContainer<T> {
3234
public val Empty: AnyFrame = DataFrameImpl<Unit>(emptyList(), 0)
3335
public fun empty(nrow: Int = 0): AnyFrame = if (nrow == 0) Empty else DataFrameImpl<Unit>(emptyList(), nrow)
3436

37+
/**
38+
* Creates a DataFrame with empty columns (rows = 0).
39+
* Can be used as a "null object" in aggregation operations, operations that work on columns (select, reorder, ...)
40+
*
41+
*/
3542
public inline fun <reified T> emptyOf(): DataFrame<T> = createEmptyDataFrameOf(T::class).cast()
43+
44+
/**
45+
* Creates a DataFrame with empty columns (rows = 0).
46+
* Can be used as a "null object" in aggregation operations, operations that work on columns (select, reorder, ...)
47+
*/
48+
public fun empty(schema: DataFrameSchema): AnyFrame = schema.createEmptyDataFrame()
3649
}
3750

3851
// region columns

core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/GroupByImpl.kt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,9 @@ import org.jetbrains.kotlinx.dataframe.ncol
2020
import org.jetbrains.kotlinx.dataframe.nrow
2121
import org.jetbrains.kotlinx.dataframe.values
2222

23+
/**
24+
* @property df DataFrame containing [groups] column and key columns. Represents GroupBy.
25+
*/
2326
internal class GroupByImpl<T, G>(
2427
val df: DataFrame<T>,
2528
override val groups: FrameColumn<G>,

core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/testSets/person/DataFrameTests.kt

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2383,4 +2383,28 @@ class DataFrameTests : BaseTest() {
23832383
val newName by column<String>()
23842384
typed.select { name into newName and age }.columnNames() shouldBe listOf("newName", "age")
23852385
}
2386+
2387+
@Test
2388+
fun `api for creating GroupBy with empty groups which can be aggregated using statistics`() {
2389+
val df1 = dataFrameOf("a", "b")(1, "c")
2390+
val df2 = DataFrame.empty()
2391+
val groupBy = dataFrameOf(columnOf("group1", "group2") named "group", columnOf(df1, df2)).asGroupBy()
2392+
2393+
val exception = shouldThrow<IllegalStateException> {
2394+
groupBy.aggregate {
2395+
sum("a")
2396+
}
2397+
}
2398+
2399+
exception.message shouldBe "Column [a] not found"
2400+
2401+
val groupBy1 = groupBy
2402+
.updateGroups { if (it.isEmpty()) DataFrame.empty(groupBy.groups.schema.value) else it }
2403+
2404+
val res = groupBy1.aggregate {
2405+
sum("a")
2406+
}
2407+
2408+
res["aggregated"].values() shouldBe listOf(1, 0)
2409+
}
23862410
}

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/DataFrame.kt

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,9 @@ import org.jetbrains.kotlinx.dataframe.impl.DataFrameSize
1616
import org.jetbrains.kotlinx.dataframe.impl.getColumnsImpl
1717
import org.jetbrains.kotlinx.dataframe.impl.headPlusArray
1818
import org.jetbrains.kotlinx.dataframe.impl.headPlusIterable
19+
import org.jetbrains.kotlinx.dataframe.impl.schema.createEmptyDataFrame
1920
import org.jetbrains.kotlinx.dataframe.impl.schema.createEmptyDataFrameOf
21+
import org.jetbrains.kotlinx.dataframe.schema.DataFrameSchema
2022
import kotlin.reflect.KType
2123

2224
/**
@@ -32,7 +34,18 @@ public interface DataFrame<out T> : Aggregatable<T>, ColumnsContainer<T> {
3234
public val Empty: AnyFrame = DataFrameImpl<Unit>(emptyList(), 0)
3335
public fun empty(nrow: Int = 0): AnyFrame = if (nrow == 0) Empty else DataFrameImpl<Unit>(emptyList(), nrow)
3436

37+
/**
38+
* Creates a DataFrame with empty columns (rows = 0).
39+
* Can be used as a "null object" in aggregation operations, operations that work on columns (select, reorder, ...)
40+
*
41+
*/
3542
public inline fun <reified T> emptyOf(): DataFrame<T> = createEmptyDataFrameOf(T::class).cast()
43+
44+
/**
45+
* Creates a DataFrame with empty columns (rows = 0).
46+
* Can be used as a "null object" in aggregation operations, operations that work on columns (select, reorder, ...)
47+
*/
48+
public fun empty(schema: DataFrameSchema): AnyFrame = schema.createEmptyDataFrame()
3649
}
3750

3851
// region columns

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/GroupByImpl.kt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,9 @@ import org.jetbrains.kotlinx.dataframe.ncol
2020
import org.jetbrains.kotlinx.dataframe.nrow
2121
import org.jetbrains.kotlinx.dataframe.values
2222

23+
/**
24+
* @property df DataFrame containing [groups] column and key columns. Represents GroupBy.
25+
*/
2326
internal class GroupByImpl<T, G>(
2427
val df: DataFrame<T>,
2528
override val groups: FrameColumn<G>,

core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/testSets/person/DataFrameTests.kt

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2383,4 +2383,28 @@ class DataFrameTests : BaseTest() {
23832383
val newName by column<String>()
23842384
typed.select { name into newName and age }.columnNames() shouldBe listOf("newName", "age")
23852385
}
2386+
2387+
@Test
2388+
fun `api for creating GroupBy with empty groups which can be aggregated using statistics`() {
2389+
val df1 = dataFrameOf("a", "b")(1, "c")
2390+
val df2 = DataFrame.empty()
2391+
val groupBy = dataFrameOf(columnOf("group1", "group2") named "group", columnOf(df1, df2)).asGroupBy()
2392+
2393+
val exception = shouldThrow<IllegalStateException> {
2394+
groupBy.aggregate {
2395+
sum("a")
2396+
}
2397+
}
2398+
2399+
exception.message shouldBe "Column [a] not found"
2400+
2401+
val groupBy1 = groupBy
2402+
.updateGroups { if (it.isEmpty()) DataFrame.empty(groupBy.groups.schema.value) else it }
2403+
2404+
val res = groupBy1.aggregate {
2405+
sum("a")
2406+
}
2407+
2408+
res["aggregated"].values() shouldBe listOf(1, 0)
2409+
}
23862410
}

0 commit comments

Comments
 (0)