Skip to content

Commit b121286

Browse files
authored
Merge pull request #1144 from Kotlin/nested-dataframeof-inplace
Add a constructor to create a nested dataframe from columns inplace
2 parents deba694 + 7253950 commit b121286

File tree

13 files changed

+173
-2
lines changed

13 files changed

+173
-2
lines changed

core/api/core.api

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1387,6 +1387,7 @@ public final class org/jetbrains/kotlinx/dataframe/api/ConstructorsKt {
13871387
public static final fun columnGroupTyped (Lorg/jetbrains/kotlinx/dataframe/columns/ColumnReference;Lorg/jetbrains/kotlinx/dataframe/columns/ColumnPath;)Lorg/jetbrains/kotlinx/dataframe/columns/ColumnAccessor;
13881388
public static final fun columnOf (Ljava/lang/Iterable;)Lorg/jetbrains/kotlinx/dataframe/DataColumn;
13891389
public static final fun columnOf (Ljava/lang/Iterable;)Lorg/jetbrains/kotlinx/dataframe/columns/FrameColumn;
1390+
public static final fun columnOf ([Lkotlin/Pair;)Lorg/jetbrains/kotlinx/dataframe/columns/ColumnGroup;
13901391
public static final fun columnOf ([Lorg/jetbrains/kotlinx/dataframe/DataFrame;)Lorg/jetbrains/kotlinx/dataframe/columns/FrameColumn;
13911392
public static final fun columnOf ([Lorg/jetbrains/kotlinx/dataframe/columns/BaseColumn;)Lorg/jetbrains/kotlinx/dataframe/DataColumn;
13921393
public static final fun dataFrameOf (Ljava/lang/Iterable;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
@@ -1397,6 +1398,7 @@ public final class org/jetbrains/kotlinx/dataframe/api/ConstructorsKt {
13971398
public static final fun dataFrameOf ([Lkotlin/Pair;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
13981399
public static final fun dataFrameOf ([Lorg/jetbrains/kotlinx/dataframe/columns/BaseColumn;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
13991400
public static final fun dataFrameOf ([Lorg/jetbrains/kotlinx/dataframe/columns/ColumnReference;)Lorg/jetbrains/kotlinx/dataframe/api/DataFrameBuilder;
1401+
public static final fun dataFrameOfColumns ([Lkotlin/Pair;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
14001402
public static final fun emptyDataFrame ()Lorg/jetbrains/kotlinx/dataframe/DataFrame;
14011403
public static final fun frameColumn ()Lorg/jetbrains/kotlinx/dataframe/api/ColumnDelegate;
14021404
public static final fun frameColumn (Ljava/lang/String;)Lorg/jetbrains/kotlinx/dataframe/columns/ColumnAccessor;

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/constructors.kt

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ import org.jetbrains.kotlinx.dataframe.annotations.AccessApiOverload
1313
import org.jetbrains.kotlinx.dataframe.annotations.Interpretable
1414
import org.jetbrains.kotlinx.dataframe.annotations.Refine
1515
import org.jetbrains.kotlinx.dataframe.columns.ColumnAccessor
16+
import org.jetbrains.kotlinx.dataframe.columns.ColumnGroup
1617
import org.jetbrains.kotlinx.dataframe.columns.ColumnPath
1718
import org.jetbrains.kotlinx.dataframe.columns.ColumnReference
1819
import org.jetbrains.kotlinx.dataframe.columns.FrameColumn
@@ -269,6 +270,15 @@ public inline fun <reified T> column(values: Iterable<T>): DataColumn<T> =
269270
allColsMakesColGroup = true,
270271
).forceResolve()
271272

273+
@Refine
274+
@Interpretable("ColumnOfPairs")
275+
public fun columnOf(vararg columns: Pair<String, AnyBaseCol>): ColumnGroup<*> =
276+
dataFrameOf(
277+
columns.map { (name, col) ->
278+
col.rename(name)
279+
},
280+
).asColumnGroup()
281+
272282
// endregion
273283

274284
// region create DataFrame
@@ -290,6 +300,12 @@ public fun dataFrameOf(columns: Iterable<AnyBaseCol>): DataFrame<*> {
290300
return DataFrameImpl<Unit>(cols, nrow)
291301
}
292302

303+
@Refine
304+
@JvmName("dataFrameOfColumns")
305+
@Interpretable("DataFrameOfPairs")
306+
public fun dataFrameOf(vararg columns: Pair<String, AnyBaseCol>): DataFrame<*> =
307+
dataFrameOf(columns.map { (name, col) -> col.rename(name) })
308+
293309
public fun dataFrameOf(vararg header: ColumnReference<*>): DataFrameBuilder = DataFrameBuilder(header.map { it.name() })
294310

295311
public fun dataFrameOf(vararg columns: AnyBaseCol): DataFrame<*> = dataFrameOf(columns.asIterable())

core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/Create.kt

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -261,6 +261,21 @@ class Create : TestBase() {
261261
// SampleEnd
262262
}
263263

264+
@Test
265+
@TransformDataFrameExpressions
266+
fun createNestedDataFrameInplace() {
267+
// SampleStart
268+
// DataFrame with 2 columns and 3 rows
269+
val df = dataFrameOf(
270+
"name" to columnOf(
271+
"firstName" to columnOf("Alice", "Bob", "Charlie"),
272+
"lastName" to columnOf("Cooper", "Dylan", "Daniels"),
273+
),
274+
"age" to columnOf(15, 20, 100),
275+
)
276+
// SampleEnd
277+
}
278+
264279
@Test
265280
@TransformDataFrameExpressions
266281
fun createDataFrameWithFill() {

core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/testSets/person/DataFrameTests.kt

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1934,6 +1934,21 @@ class DataFrameTests : BaseTest() {
19341934
df.columns().forEach { col -> col.forEachIndexed { row, value -> value shouldBe row + 1 } }
19351935
}
19361936

1937+
@Test
1938+
fun `create nested dataframe inplace`() {
1939+
val df = dataFrameOf(
1940+
"a" to columnOf("1"),
1941+
"b" to columnOf(
1942+
"c" to columnOf("2"),
1943+
),
1944+
"d" to columnOf(dataFrameOf("a")(123)),
1945+
"gr" to listOf("1").toDataFrame().asColumnGroup(),
1946+
)
1947+
1948+
df.columnNames() shouldBe listOf("a", "b", "d", "gr")
1949+
df.getColumnGroup("gr")["value"].values() shouldBe listOf("1")
1950+
}
1951+
19371952
@Test
19381953
fun `get typed column by name`() {
19391954
val col = df.getColumn("name").cast<String>()

docs/StardustDocs/topics/createDataFrame.md

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,23 @@ val df = dataFrameOf(
4444

4545
<!---END-->
4646

47+
Create DataFrame with nested columns inplace:
48+
49+
<!---FUN createNestedDataFrameInplace-->
50+
51+
```kotlin
52+
// DataFrame with 2 columns and 3 rows
53+
val df = dataFrameOf(
54+
"name" to columnOf(
55+
"firstName" to columnOf("Alice", "Bob", "Charlie"),
56+
"lastName" to columnOf("Cooper", "Dylan", "Daniels"),
57+
),
58+
"age" to columnOf(15, 20, 100),
59+
)
60+
```
61+
62+
<!---END-->
63+
4764
<!---FUN createDataFrameFromColumns-->
4865

4966
```kotlin

plugins/kotlin-dataframe/src/org/jetbrains/kotlinx/dataframe/plugin/extensions/FunctionCallTransformer.kt

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,13 @@ class FunctionCallTransformer(
101101
fun transformOrNull(call: FirFunctionCall, originalSymbol: FirNamedFunctionSymbol): FirFunctionCall?
102102
}
103103

104-
private val transformers = listOf(GroupByCallTransformer(), DataFrameCallTransformer(), DataRowCallTransformer())
104+
// also update [ReturnTypeBasedReceiverInjector.SCHEMA_TYPES]
105+
private val transformers = listOf(
106+
GroupByCallTransformer(),
107+
DataFrameCallTransformer(),
108+
DataRowCallTransformer(),
109+
ColumnGroupCallTransformer(),
110+
)
105111

106112
override fun intercept(callInfo: CallInfo, symbol: FirNamedFunctionSymbol): CallReturnType? {
107113
val callSiteAnnotations = (callInfo.callSite as? FirAnnotationContainer)?.annotations ?: emptyList()
@@ -194,6 +200,8 @@ class FunctionCallTransformer(
194200

195201
inner class DataRowCallTransformer : CallTransformer by DataSchemaLikeCallTransformer(Names.DATA_ROW_CLASS_ID)
196202

203+
inner class ColumnGroupCallTransformer : CallTransformer by DataSchemaLikeCallTransformer(Names.COLUM_GROUP_CLASS_ID)
204+
197205
inner class GroupByCallTransformer : CallTransformer {
198206
override fun interceptOrNull(
199207
callInfo: CallInfo,

plugins/kotlin-dataframe/src/org/jetbrains/kotlinx/dataframe/plugin/extensions/ReturnTypeBasedReceiverInjector.kt

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,10 +16,19 @@ import org.jetbrains.kotlin.fir.types.toRegularClassSymbol
1616
import org.jetbrains.kotlinx.dataframe.plugin.utils.Names
1717

1818
class ReturnTypeBasedReceiverInjector(session: FirSession) : FirExpressionResolutionExtension(session) {
19+
companion object {
20+
private val SCHEMA_TYPES = setOf(
21+
Names.DF_CLASS_ID,
22+
Names.GROUP_BY_CLASS_ID,
23+
Names.DATA_ROW_CLASS_ID,
24+
Names.COLUM_GROUP_CLASS_ID,
25+
)
26+
}
27+
1928
@OptIn(SymbolInternals::class)
2029
override fun addNewImplicitReceivers(functionCall: FirFunctionCall): List<ConeKotlinType> {
2130
val callReturnType = functionCall.resolvedType
22-
return if (callReturnType.classId in setOf(Names.DF_CLASS_ID, Names.GROUP_BY_CLASS_ID, Names.DATA_ROW_CLASS_ID)) {
31+
return if (callReturnType.classId in SCHEMA_TYPES) {
2332
val typeArguments = callReturnType.typeArguments
2433
typeArguments
2534
.mapNotNull {

plugins/kotlin-dataframe/src/org/jetbrains/kotlinx/dataframe/plugin/impl/api/dataFrameOf.kt

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,9 @@ package org.jetbrains.kotlinx.dataframe.plugin.impl.api
44
import org.jetbrains.kotlin.fir.expressions.FirExpression
55
import org.jetbrains.kotlin.fir.expressions.FirLiteralExpression
66
import org.jetbrains.kotlin.fir.expressions.FirVarargArgumentsExpression
7+
import org.jetbrains.kotlin.fir.plugin.createConeType
8+
import org.jetbrains.kotlin.fir.types.ConeKotlinType
9+
import org.jetbrains.kotlin.fir.types.classId
710
import org.jetbrains.kotlin.fir.types.commonSuperTypeOrNull
811
import org.jetbrains.kotlin.fir.types.resolvedType
912
import org.jetbrains.kotlin.fir.types.type
@@ -15,6 +18,7 @@ import org.jetbrains.kotlinx.dataframe.plugin.impl.Interpreter
1518
import org.jetbrains.kotlinx.dataframe.plugin.impl.PluginDataFrameSchema
1619
import org.jetbrains.kotlinx.dataframe.plugin.impl.simpleColumnOf
1720
import org.jetbrains.kotlinx.dataframe.impl.api.withValuesImpl
21+
import org.jetbrains.kotlinx.dataframe.plugin.utils.Names
1822

1923
class DataFrameOf0 : AbstractInterpreter<DataFrameBuilderApproximation>() {
2024
val Arguments.header: List<String> by arg()
@@ -53,3 +57,30 @@ class DataFrameOf3 : AbstractSchemaModificationInterpreter() {
5357
return PluginDataFrameSchema(res)
5458
}
5559
}
60+
61+
abstract class SchemaConstructor : AbstractSchemaModificationInterpreter() {
62+
val Arguments.columns: List<Interpreter.Success<Pair<*, *>>> by arg()
63+
64+
override fun Arguments.interpret(): PluginDataFrameSchema {
65+
val res = columns.map {
66+
val it = it.value
67+
val name = (it.first as? FirLiteralExpression)?.value as? String
68+
val resolvedType = (it.second as? FirExpression)?.resolvedType
69+
val type: ConeKotlinType? = when (resolvedType?.classId) {
70+
Names.COLUM_GROUP_CLASS_ID -> Names.DATA_ROW_CLASS_ID.createConeType(session, arrayOf(resolvedType.typeArguments[0]))
71+
Names.FRAME_COLUMN_CLASS_ID -> Names.DF_CLASS_ID.createConeType(session, arrayOf(resolvedType.typeArguments[0]))
72+
Names.DATA_COLUMN_CLASS_ID -> resolvedType.typeArguments[0] as? ConeKotlinType
73+
Names.BASE_COLUMN_CLASS_ID -> resolvedType.typeArguments[0] as? ConeKotlinType
74+
else -> null
75+
}
76+
if (name == null || type == null) return PluginDataFrameSchema(emptyList())
77+
simpleColumnOf(name, type)
78+
}
79+
return PluginDataFrameSchema(res)
80+
}
81+
}
82+
83+
class DataFrameOfPairs : SchemaConstructor()
84+
85+
class ColumnOfPairs : SchemaConstructor()
86+

plugins/kotlin-dataframe/src/org/jetbrains/kotlinx/dataframe/plugin/loadInterpreter.kt

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,11 +96,13 @@ import org.jetbrains.kotlinx.dataframe.plugin.impl.api.ColsAtAnyDepth2
9696
import org.jetbrains.kotlinx.dataframe.plugin.impl.api.ColsOf0
9797
import org.jetbrains.kotlinx.dataframe.plugin.impl.api.ColsOf1
9898
import org.jetbrains.kotlinx.dataframe.plugin.impl.api.ColsOf2
99+
import org.jetbrains.kotlinx.dataframe.plugin.impl.api.ColumnOfPairs
99100
import org.jetbrains.kotlinx.dataframe.plugin.impl.api.ColumnRange
100101
import org.jetbrains.kotlinx.dataframe.plugin.impl.api.ConcatWithKeys
101102
import org.jetbrains.kotlinx.dataframe.plugin.impl.api.DataFrameBuilderInvoke0
102103
import org.jetbrains.kotlinx.dataframe.plugin.impl.api.DataFrameOf0
103104
import org.jetbrains.kotlinx.dataframe.plugin.impl.api.DataFrameOf3
105+
import org.jetbrains.kotlinx.dataframe.plugin.impl.api.DataFrameOfPairs
104106
import org.jetbrains.kotlinx.dataframe.plugin.impl.api.DataFrameUnfold
105107
import org.jetbrains.kotlinx.dataframe.plugin.impl.api.DataFrameXs
106108
import org.jetbrains.kotlinx.dataframe.plugin.impl.api.Drop0
@@ -411,6 +413,8 @@ internal inline fun <reified T> String.load(): T {
411413
"toDataFrameDefault" -> ToDataFrameDefault()
412414
"ToDataFrameDslStringInvoke" -> ToDataFrameDslStringInvoke()
413415
"DataFrameOf0" -> DataFrameOf0()
416+
"DataFrameOfPairs" -> DataFrameOfPairs()
417+
"ColumnOfPairs" -> ColumnOfPairs()
414418
"DataFrameBuilderInvoke0" -> DataFrameBuilderInvoke0()
415419
"ToDataFrameColumn" -> ToDataFrameColumn()
416420
"FillNulls0" -> FillNulls0()

plugins/kotlin-dataframe/src/org/jetbrains/kotlinx/dataframe/plugin/utils/Names.kt

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,11 +31,18 @@ object Names {
3131

3232
val COLUM_GROUP_CLASS_ID: ClassId
3333
get() = ClassId(FqName("org.jetbrains.kotlinx.dataframe.columns"), Name.identifier("ColumnGroup"))
34+
val FRAME_COLUMN_CLASS_ID: ClassId
35+
get() = ClassId(FqName("org.jetbrains.kotlinx.dataframe.columns"), Name.identifier("FrameColumn"))
3436
val DATA_COLUMN_CLASS_ID: ClassId
3537
get() = ClassId(
3638
FqName.fromSegments(listOf("org", "jetbrains", "kotlinx", "dataframe")),
3739
Name.identifier("DataColumn")
3840
)
41+
val BASE_COLUMN_CLASS_ID: ClassId
42+
get() = ClassId(
43+
FqName.fromSegments(listOf("org", "jetbrains", "kotlinx", "dataframe", "columns")),
44+
Name.identifier("BaseColumn")
45+
)
3946
val COLUMNS_CONTAINER_CLASS_ID: ClassId
4047
get() = ClassId(
4148
FqName.fromSegments(listOf("org", "jetbrains", "kotlinx", "dataframe")),
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
import org.jetbrains.kotlinx.dataframe.*
2+
import org.jetbrains.kotlinx.dataframe.annotations.*
3+
import org.jetbrains.kotlinx.dataframe.api.*
4+
import org.jetbrains.kotlinx.dataframe.io.*
5+
6+
fun box(): String {
7+
val group = columnOf(
8+
"c" to columnOf("2"),
9+
"d" to columnOf(123),
10+
)
11+
val str: DataColumn<String> = group.c
12+
val i: DataColumn<Int> = group.d
13+
14+
return "OK"
15+
}
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
import org.jetbrains.kotlinx.dataframe.*
2+
import org.jetbrains.kotlinx.dataframe.annotations.*
3+
import org.jetbrains.kotlinx.dataframe.api.*
4+
import org.jetbrains.kotlinx.dataframe.io.*
5+
6+
fun box(): String {
7+
val df = dataFrameOf(
8+
"a" to columnOf("1"),
9+
"b" to columnOf(
10+
"c" to columnOf("2"),
11+
),
12+
"d" to columnOf(dataFrameOf("a")(123)),
13+
"gr" to listOf("1").toDataFrame().asColumnGroup(),
14+
)
15+
val str: DataColumn<String> = df.a
16+
val str1: DataColumn<String> = df.b.c
17+
val i: DataColumn<Int> = df.d[0].a
18+
val str2: DataColumn<String> = df.gr.value
19+
return "OK"
20+
}

plugins/kotlin-dataframe/tests-gen/org/jetbrains/kotlin/fir/dataframe/DataFrameBlackBoxCodegenTestGenerated.java

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,12 @@ public void testColumnName_invalidSymbol() {
7777
runTest("testData/box/columnName_invalidSymbol.kt");
7878
}
7979

80+
@Test
81+
@TestMetadata("columnOf_nested.kt")
82+
public void testColumnOf_nested() {
83+
runTest("testData/box/columnOf_nested.kt");
84+
}
85+
8086
@Test
8187
@TestMetadata("columnWithStarProjection.kt")
8288
public void testColumnWithStarProjection() {
@@ -119,6 +125,12 @@ public void testDataFrameOf() {
119125
runTest("testData/box/dataFrameOf.kt");
120126
}
121127

128+
@Test
129+
@TestMetadata("dataFrameOf_nested.kt")
130+
public void testDataFrameOf_nested() {
131+
runTest("testData/box/dataFrameOf_nested.kt");
132+
}
133+
122134
@Test
123135
@TestMetadata("dataFrameOf_to.kt")
124136
public void testDataFrameOf_to() {

0 commit comments

Comments
 (0)