Skip to content

Commit cb3a60b

Browse files
committed
Update generated sources with recent changes
1 parent 300ba63 commit cb3a60b

File tree

29 files changed

+725
-148
lines changed

29 files changed

+725
-148
lines changed

core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/DataColumn.kt

Lines changed: 107 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -20,11 +20,14 @@ import org.jetbrains.kotlinx.dataframe.impl.columns.ColumnGroupImpl
2020
import org.jetbrains.kotlinx.dataframe.impl.columns.FrameColumnImpl
2121
import org.jetbrains.kotlinx.dataframe.impl.columns.ValueColumnImpl
2222
import org.jetbrains.kotlinx.dataframe.impl.columns.addPath
23-
import org.jetbrains.kotlinx.dataframe.impl.columns.guessColumnType
23+
import org.jetbrains.kotlinx.dataframe.impl.columns.createColumnGuessingType
2424
import org.jetbrains.kotlinx.dataframe.impl.columns.toColumnKind
2525
import org.jetbrains.kotlinx.dataframe.impl.getValuesType
2626
import org.jetbrains.kotlinx.dataframe.impl.splitByIndices
2727
import org.jetbrains.kotlinx.dataframe.schema.DataFrameSchema
28+
import org.jetbrains.kotlinx.dataframe.util.CREATE_FRAME_COLUMN
29+
import org.jetbrains.kotlinx.dataframe.util.CREATE_FRAME_COLUMN_IMPORT
30+
import org.jetbrains.kotlinx.dataframe.util.CREATE_FRAME_COLUMN_REPLACE
2831
import kotlin.reflect.KClass
2932
import kotlin.reflect.KProperty
3033
import kotlin.reflect.KType
@@ -45,6 +48,9 @@ public interface DataColumn<out T> : BaseColumn<T> {
4548
/**
4649
* Creates [ValueColumn] using given [name], [values] and [type].
4750
*
51+
* Be careful; values are NOT checked to adhere to [type] for efficiency,
52+
* unless you specify [infer].
53+
*
4854
* @param name name of the column
4955
* @param values list of column values
5056
* @param type type of the column
@@ -56,11 +62,20 @@ public interface DataColumn<out T> : BaseColumn<T> {
5662
type: KType,
5763
infer: Infer = Infer.None,
5864
defaultValue: T? = null,
59-
): ValueColumn<T> = ValueColumnImpl(values, name, getValuesType(values, type, infer), defaultValue)
65+
): ValueColumn<T> =
66+
ValueColumnImpl(
67+
values = values,
68+
name = name,
69+
type = getValuesType(values, type, infer),
70+
defaultValue = defaultValue,
71+
)
6072

6173
/**
6274
* Creates [ValueColumn] using given [name], [values] and reified column [type].
6375
*
76+
* Be careful; values are NOT checked to adhere to [type] for efficiency,
77+
* unless you specify [infer].
78+
*
6479
* Note, that column [type] will be defined at compile-time using [T] argument
6580
*
6681
* @param T type of the column
@@ -74,33 +89,92 @@ public interface DataColumn<out T> : BaseColumn<T> {
7489
infer: Infer = Infer.None,
7590
): ValueColumn<T> =
7691
createValueColumn(
77-
name,
78-
values,
79-
getValuesType(
80-
values,
81-
typeOf<T>(),
82-
infer,
83-
),
92+
name = name,
93+
values = values,
94+
type = typeOf<T>(),
95+
infer = infer,
8496
)
8597

98+
/**
99+
* Creates [ColumnGroup] using the given [name] and [df] representing the group of columns.
100+
*
101+
* @param name name of the column group
102+
* @param df the collection of columns representing the column group
103+
*/
86104
public fun <T> createColumnGroup(name: String, df: DataFrame<T>): ColumnGroup<T> = ColumnGroupImpl(name, df)
87105

106+
@Deprecated(
107+
message = CREATE_FRAME_COLUMN,
108+
replaceWith = ReplaceWith(CREATE_FRAME_COLUMN_REPLACE, CREATE_FRAME_COLUMN_IMPORT),
109+
level = DeprecationLevel.WARNING,
110+
)
88111
public fun <T> createFrameColumn(name: String, df: DataFrame<T>, startIndices: Iterable<Int>): FrameColumn<T> =
89112
FrameColumnImpl(name, df.splitByIndices(startIndices.asSequence()).toList(), lazy { df.schema() })
90113

114+
/**
115+
* Creates [FrameColumn] using the given [name] and list of dataframes [groups].
116+
*
117+
* Be careful; [groups] must be a non-null list of [DataFrames][DataFrame].
118+
* This is NOT checked at runtime for efficiency, nor is the validity of given [schema].
119+
*
120+
* @param name name of the frame column
121+
* @param groups the dataframes to be put in the column
122+
* @param schema an optional (lazily calculated) [DataFrameSchema] representing
123+
* the intersecting schema of [groups]
124+
*/
91125
public fun <T> createFrameColumn(
92126
name: String,
93127
groups: List<DataFrame<T>>,
94128
schema: Lazy<DataFrameSchema>? = null,
95129
): FrameColumn<T> = FrameColumnImpl(name, groups, schema)
96130

131+
/**
132+
* Creates either a [FrameColumn], [ColumnGroup], or [ValueColumn] by analyzing each value in
133+
* [values].
134+
* This is safer but less efficient than the other functions.
135+
*
136+
* Some conversions are done automatically to attempt to unify the values, like:
137+
* - `null` -> [DataFrame.empty][DataFrame.empty]`()` and [DataRow] -> single-row [DataFrame] when there are other
138+
* [DataFrames][DataFrame] present in [values]
139+
* - [List][List]`<`[DataRow][DataRow]`<*>>` -> [DataFrame]
140+
* etc.
141+
*
142+
* @param name name of the column
143+
* @param values the values to represent each row in the column
144+
* @param nullable optionally you can specify whether [values] contains nulls, if `null` it is inferred.
145+
* @param allColsMakesColGroup if `true`, then, if all values are non-null same-sized columns,
146+
* a column group will be created instead of a [DataColumn][DataColumn]`<`[AnyCol][AnyCol]`>`.
147+
*/
97148
public fun <T> createWithTypeInference(
98149
name: String,
99150
values: List<T>,
100151
nullable: Boolean? = null,
101-
): DataColumn<T> = guessColumnType(name, values, nullable = nullable)
152+
allColsMakesColGroup: Boolean = false,
153+
): DataColumn<T> =
154+
createColumnGuessingType(
155+
name = name,
156+
values = values,
157+
nullable = nullable,
158+
allColsMakesColGroup = allColsMakesColGroup,
159+
)
102160

103-
public fun <T> create(
161+
/**
162+
* Calls [createColumnGroup], [createFrameColumn], or [createValueColumn] based on
163+
* [type].
164+
*
165+
* Be careful; Values in [values] are NOT checked to adhere to the given [type], nor
166+
* do we check whether there are nulls among the values when the given type is [DataFrame]
167+
* (a [FrameColumn] cannot contain `null`, this causes runtime exceptions).
168+
* When [type] is `DataFrame<*>?`, a [ValueColumn] is created to avoid this issue.
169+
*
170+
* This may be unsafe but is more efficient than [createWithTypeInference].
171+
*
172+
* @param name the name of the column
173+
* @param values the values to represent each row in the column
174+
* @param type the (unchecked) common type of [values]
175+
* @param infer in case a [ValueColumn] is created, this controls how/whether types need to be inferred
176+
*/
177+
public fun <T> createUnsafe(
104178
name: String,
105179
values: List<T>,
106180
type: KType,
@@ -112,9 +186,29 @@ public interface DataColumn<out T> : BaseColumn<T> {
112186
ColumnKind.Frame -> createFrameColumn(name, values as List<AnyFrame>).asDataColumn().cast()
113187
}
114188

115-
public inline fun <reified T> create(name: String, values: List<T>, infer: Infer = Infer.None): DataColumn<T> =
116-
create(name, values, typeOf<T>(), infer)
189+
/**
190+
* Calls [createColumnGroup], [createFrameColumn], or [createValueColumn] based on
191+
* type [T].
192+
*
193+
* Be careful; Values in [values] are NOT checked to adhere to the given [type], nor
194+
* do we check whether there are nulls among the values when the given type is [DataFrame]
195+
* (a [FrameColumn] cannot contain `null`, this causes runtime exceptions).
196+
* When [type] is `DataFrame<*>?`, a [ValueColumn] is created to avoid this issue.
197+
*
198+
* This may be unsafe but is more efficient than [createWithTypeInference].
199+
*
200+
* @param T the (unchecked) common type of [values]
201+
* @param name the name of the column
202+
* @param values the values to represent each row in the column
203+
* @param infer in case a [ValueColumn] is created, this controls how/whether types need to be inferred
204+
*/
205+
public inline fun <reified T> createUnsafe(
206+
name: String,
207+
values: List<T>,
208+
infer: Infer = Infer.None,
209+
): DataColumn<T> = createUnsafe(name, values, typeOf<T>(), infer)
117210

211+
/** Creates an empty [DataColumn] with given [name]. */
118212
public fun empty(name: String = ""): AnyCol = createValueColumn(name, emptyList<Unit>(), typeOf<Unit>())
119213
}
120214

core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/TypeConversions.kt

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -266,16 +266,22 @@ public enum class Infer {
266266

267267
/**
268268
* Use reified type argument of an inline [DataFrame] operation as [DataColumn.type].
269+
*
270+
* This is the most efficient but least safe option.
269271
*/
270272
None,
271273

272274
/**
273-
* Use reified type argument of an inline [DataFrame] operation as [DataColumn.type], but compute [DataColumn.hasNulls] by checking column [DataColumn.values] for an actual presence of *null* values.
275+
* Use reified type argument of an inline [DataFrame] operation as [DataColumn.type],
276+
* but compute [DataColumn.hasNulls] by checking column [DataColumn.values] for an actual presence of `null` values.
274277
*/
275278
Nulls,
276279

277280
/**
278-
* Infer [DataColumn.type] and [DataColumn.hasNulls] from actual [DataColumn.values] using optionally provided base type as an upper bound.
281+
* Infer [DataColumn.type] and [DataColumn.hasNulls] from actual [DataColumn.values] using an optionally provided
282+
* base type as an upper bound.
283+
*
284+
* This is the least efficient but safest option.
279285
*/
280286
Type,
281287

@@ -338,17 +344,17 @@ public inline fun <reified T> Iterable<T>.toColumn(name: String = "", infer: Inf
338344
if (infer == Infer.Type) {
339345
DataColumn.createWithTypeInference(name, asList())
340346
} else {
341-
DataColumn.create(name, asList(), typeOf<T>(), infer)
347+
DataColumn.createUnsafe(name, asList(), typeOf<T>(), infer)
342348
}.forceResolve()
343349

344350
public inline fun <reified T> Iterable<*>.toColumnOf(name: String = ""): DataColumn<T> =
345-
DataColumn.create(name, asList() as List<T>, typeOf<T>()).forceResolve()
351+
DataColumn.createUnsafe(name, asList() as List<T>, typeOf<T>()).forceResolve()
346352

347353
public inline fun <reified T> Iterable<T>.toColumn(ref: ColumnReference<T>): DataColumn<T> =
348-
DataColumn.create(ref.name(), asList()).forceResolve()
354+
DataColumn.createUnsafe(ref.name(), asList()).forceResolve()
349355

350356
public inline fun <reified T> Iterable<T>.toColumn(property: KProperty<T>): DataColumn<T> =
351-
DataColumn.create(property.columnName, asList()).forceResolve()
357+
DataColumn.createUnsafe(property.columnName, asList()).forceResolve()
352358

353359
public fun Iterable<String>.toPath(): ColumnPath = ColumnPath(asList())
354360

core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/chunked.kt

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,13 +6,18 @@ import org.jetbrains.kotlinx.dataframe.DataRow
66
import org.jetbrains.kotlinx.dataframe.columns.ColumnGroup
77
import org.jetbrains.kotlinx.dataframe.columns.FrameColumn
88
import org.jetbrains.kotlinx.dataframe.columns.ValueColumn
9+
import org.jetbrains.kotlinx.dataframe.impl.api.chunkedImpl
910
import org.jetbrains.kotlinx.dataframe.impl.getListType
1011
import org.jetbrains.kotlinx.dataframe.nrow
1112
import org.jetbrains.kotlinx.dataframe.type
1213

14+
/**
15+
* Creates a [FrameColumn] from [this] by splitting the dataframe into
16+
* smaller ones, with their number of rows at most [size].
17+
*/
1318
public fun <T> DataFrame<T>.chunked(size: Int, name: String = "groups"): FrameColumn<T> {
1419
val startIndices = (0 until nrow step size)
15-
return DataColumn.createFrameColumn(name, this, startIndices)
20+
return this.chunkedImpl(startIndices, name)
1621
}
1722

1823
public fun <T> DataColumn<T>.chunked(size: Int): ValueColumn<List<T>> {

core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/constructors.kt

Lines changed: 49 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ import org.jetbrains.kotlinx.dataframe.impl.api.withValuesImpl
2424
import org.jetbrains.kotlinx.dataframe.impl.asList
2525
import org.jetbrains.kotlinx.dataframe.impl.columnName
2626
import org.jetbrains.kotlinx.dataframe.impl.columns.ColumnAccessorImpl
27-
import org.jetbrains.kotlinx.dataframe.impl.columns.createColumn
27+
import org.jetbrains.kotlinx.dataframe.impl.columns.createColumnGuessingType
2828
import org.jetbrains.kotlinx.dataframe.impl.columns.createComputedColumnReference
2929
import org.jetbrains.kotlinx.dataframe.impl.columns.forceResolve
3030
import org.jetbrains.kotlinx.dataframe.impl.columns.unbox
@@ -223,7 +223,13 @@ public class ColumnDelegate<T>(private val parent: ColumnGroupReference? = null)
223223
// region create DataColumn
224224

225225
public inline fun <reified T> columnOf(vararg values: T): DataColumn<T> =
226-
createColumn(values.asIterable(), typeOf<T>(), true).forceResolve()
226+
createColumnGuessingType(
227+
values = values.asIterable(),
228+
suggestedType = typeOf<T>(),
229+
guessTypeWithSuggestedAsUpperbound = true,
230+
listifyValues = false,
231+
allColsMakesColGroup = true,
232+
).forceResolve()
227233

228234
public fun columnOf(vararg values: AnyBaseCol): DataColumn<AnyRow> = columnOf(values.asIterable()).forceResolve()
229235

@@ -244,7 +250,12 @@ public fun <T> columnOf(frames: Iterable<DataFrame<T>>): FrameColumn<T> =
244250
).forceResolve()
245251

246252
public inline fun <reified T> column(values: Iterable<T>): DataColumn<T> =
247-
createColumn(values, typeOf<T>(), false).forceResolve()
253+
createColumnGuessingType(
254+
values = values,
255+
suggestedType = typeOf<T>(),
256+
guessTypeWithSuggestedAsUpperbound = false,
257+
allColsMakesColGroup = true,
258+
).forceResolve()
248259

249260
// endregion
250261

@@ -274,8 +285,10 @@ public fun dataFrameOf(vararg columns: AnyBaseCol): DataFrame<*> = dataFrameOf(c
274285
@Interpretable("DataFrameOf0")
275286
public fun dataFrameOf(vararg header: String): DataFrameBuilder = dataFrameOf(header.toList())
276287

277-
public inline fun <reified C> dataFrameOf(vararg header: String, fill: (String) -> Iterable<C>): DataFrame<*> =
278-
dataFrameOf(header.asIterable(), fill)
288+
public inline fun <reified C> dataFrameOf(
289+
vararg header: String,
290+
crossinline fill: (String) -> Iterable<C>,
291+
): DataFrame<*> = dataFrameOf(header.asIterable()).invoke(fill)
279292

280293
public fun dataFrameOf(header: Iterable<String>): DataFrameBuilder = DataFrameBuilder(header.asList())
281294

@@ -289,9 +302,12 @@ public fun dataFrameOf(header: Iterable<String>, values: Iterable<Any?>): DataFr
289302

290303
public inline fun <T, reified C> dataFrameOf(header: Iterable<T>, fill: (T) -> Iterable<C>): DataFrame<*> =
291304
header.map { value ->
292-
fill(value).asList().let {
293-
DataColumn.create(value.toString(), it)
294-
}
305+
createColumnGuessingType(
306+
name = value.toString(),
307+
values = fill(value).asList(),
308+
suggestedType = typeOf<C>(),
309+
guessTypeWithSuggestedAsUpperbound = true,
310+
)
295311
}.toDataFrame()
296312

297313
public fun dataFrameOf(header: CharProgression): DataFrameBuilder = dataFrameOf(header.map { it.toString() })
@@ -320,16 +336,19 @@ public class DataFrameBuilder(private val header: List<String>) {
320336

321337
public operator fun invoke(args: Sequence<Any?>): DataFrame<*> = invoke(*args.toList().toTypedArray())
322338

323-
public fun withColumns(columnBuilder: (String) -> AnyCol): DataFrame<*> = header.map(columnBuilder).toDataFrame()
339+
public fun withColumns(columnBuilder: (String) -> AnyCol): DataFrame<*> =
340+
header
341+
.map { columnBuilder(it) named it } // create a columns and make sure to rename them to the given header
342+
.toDataFrame()
324343

325344
public inline operator fun <reified T> invoke(crossinline valuesBuilder: (String) -> Iterable<T>): DataFrame<*> =
326345
withColumns { name ->
327-
valuesBuilder(name).let {
328-
DataColumn.create(
329-
name = name,
330-
values = it.asList(),
331-
)
332-
}
346+
createColumnGuessingType(
347+
name = name,
348+
values = valuesBuilder(name).asList(),
349+
suggestedType = typeOf<T>(),
350+
guessTypeWithSuggestedAsUpperbound = true,
351+
)
333352
}
334353

335354
public inline fun <reified C> fill(nrow: Int, value: C): DataFrame<*> =
@@ -341,30 +360,39 @@ public class DataFrameBuilder(private val header: List<String>) {
341360
)
342361
}
343362

363+
public fun fill(nrow: Int, dataFrame: AnyFrame): DataFrame<*> =
364+
withColumns { name ->
365+
DataColumn.createFrameColumn(
366+
name = name,
367+
groups = List(nrow) { dataFrame },
368+
schema = lazy { dataFrame.schema() },
369+
)
370+
}
371+
344372
public inline fun <reified C> nulls(nrow: Int): DataFrame<*> = fill<C?>(nrow, null)
345373

346374
public inline fun <reified C> fillIndexed(nrow: Int, crossinline init: (Int, String) -> C): DataFrame<*> =
347375
withColumns { name ->
348-
DataColumn.create(
349-
name,
350-
List(nrow) { init(it, name) },
376+
DataColumn.createWithTypeInference(
377+
name = name,
378+
values = List(nrow) { init(it, name) },
351379
)
352380
}
353381

354382
public inline fun <reified C> fill(nrow: Int, crossinline init: (Int) -> C): DataFrame<*> =
355383
withColumns { name ->
356-
DataColumn.create(
384+
DataColumn.createWithTypeInference(
357385
name = name,
358386
values = List(nrow, init),
359387
)
360388
}
361389

362-
private inline fun <reified C> fillNotNull(nrow: Int, crossinline init: (Int) -> C) =
390+
private inline fun <reified C> fillNotNull(nrow: Int, crossinline init: (Int) -> C & Any) =
363391
withColumns { name ->
364392
DataColumn.createValueColumn(
365393
name = name,
366394
values = List(nrow, init),
367-
type = typeOf<C>(),
395+
type = typeOf<C>().withNullability(false),
368396
)
369397
}
370398

0 commit comments

Comments
 (0)