@@ -20,11 +20,14 @@ import org.jetbrains.kotlinx.dataframe.impl.columns.ColumnGroupImpl
20
20
import org.jetbrains.kotlinx.dataframe.impl.columns.FrameColumnImpl
21
21
import org.jetbrains.kotlinx.dataframe.impl.columns.ValueColumnImpl
22
22
import org.jetbrains.kotlinx.dataframe.impl.columns.addPath
23
- import org.jetbrains.kotlinx.dataframe.impl.columns.guessColumnType
23
+ import org.jetbrains.kotlinx.dataframe.impl.columns.createColumnGuessingType
24
24
import org.jetbrains.kotlinx.dataframe.impl.columns.toColumnKind
25
25
import org.jetbrains.kotlinx.dataframe.impl.getValuesType
26
26
import org.jetbrains.kotlinx.dataframe.impl.splitByIndices
27
27
import org.jetbrains.kotlinx.dataframe.schema.DataFrameSchema
28
+ import org.jetbrains.kotlinx.dataframe.util.CREATE_FRAME_COLUMN
29
+ import org.jetbrains.kotlinx.dataframe.util.CREATE_FRAME_COLUMN_IMPORT
30
+ import org.jetbrains.kotlinx.dataframe.util.CREATE_FRAME_COLUMN_REPLACE
28
31
import kotlin.reflect.KClass
29
32
import kotlin.reflect.KProperty
30
33
import kotlin.reflect.KType
@@ -45,6 +48,9 @@ public interface DataColumn<out T> : BaseColumn<T> {
45
48
/* *
46
49
* Creates [ValueColumn] using given [name], [values] and [type].
47
50
*
51
+ * Be careful; values are NOT checked to adhere to [type] for efficiency,
52
+ * unless you specify [infer].
53
+ *
48
54
* @param name name of the column
49
55
* @param values list of column values
50
56
* @param type type of the column
@@ -56,11 +62,20 @@ public interface DataColumn<out T> : BaseColumn<T> {
56
62
type : KType ,
57
63
infer : Infer = Infer .None ,
58
64
defaultValue : T ? = null,
59
- ): ValueColumn <T > = ValueColumnImpl (values, name, getValuesType(values, type, infer), defaultValue)
65
+ ): ValueColumn <T > =
66
+ ValueColumnImpl (
67
+ values = values,
68
+ name = name,
69
+ type = getValuesType(values, type, infer),
70
+ defaultValue = defaultValue,
71
+ )
60
72
61
73
/* *
62
74
* Creates [ValueColumn] using given [name], [values] and reified column [type].
63
75
*
76
+ * Be careful; values are NOT checked to adhere to [type] for efficiency,
77
+ * unless you specify [infer].
78
+ *
64
79
* Note, that column [type] will be defined at compile-time using [T] argument
65
80
*
66
81
* @param T type of the column
@@ -74,33 +89,92 @@ public interface DataColumn<out T> : BaseColumn<T> {
74
89
infer : Infer = Infer .None ,
75
90
): ValueColumn <T > =
76
91
createValueColumn(
77
- name,
78
- values,
79
- getValuesType(
80
- values,
81
- typeOf<T >(),
82
- infer,
83
- ),
92
+ name = name,
93
+ values = values,
94
+ type = typeOf<T >(),
95
+ infer = infer,
84
96
)
85
97
98
+ /* *
99
+ * Creates [ColumnGroup] using the given [name] and [df] representing the group of columns.
100
+ *
101
+ * @param name name of the column group
102
+ * @param df the collection of columns representing the column group
103
+ */
86
104
public fun <T > createColumnGroup (name : String , df : DataFrame <T >): ColumnGroup <T > = ColumnGroupImpl (name, df)
87
105
106
+ @Deprecated(
107
+ message = CREATE_FRAME_COLUMN ,
108
+ replaceWith = ReplaceWith (CREATE_FRAME_COLUMN_REPLACE , CREATE_FRAME_COLUMN_IMPORT ),
109
+ level = DeprecationLevel .WARNING ,
110
+ )
88
111
public fun <T > createFrameColumn (name : String , df : DataFrame <T >, startIndices : Iterable <Int >): FrameColumn <T > =
89
112
FrameColumnImpl (name, df.splitByIndices(startIndices.asSequence()).toList(), lazy { df.schema() })
90
113
114
+ /* *
115
+ * Creates [FrameColumn] using the given [name] and list of dataframes [groups].
116
+ *
117
+ * Be careful; [groups] must be a non-null list of [DataFrames][DataFrame].
118
+ * This is NOT checked at runtime for efficiency, nor is the validity of given [schema].
119
+ *
120
+ * @param name name of the frame column
121
+ * @param groups the dataframes to be put in the column
122
+ * @param schema an optional (lazily calculated) [DataFrameSchema] representing
123
+ * the intersecting schema of [groups]
124
+ */
91
125
public fun <T > createFrameColumn (
92
126
name : String ,
93
127
groups : List <DataFrame <T >>,
94
128
schema : Lazy <DataFrameSchema >? = null,
95
129
): FrameColumn <T > = FrameColumnImpl (name, groups, schema)
96
130
131
+ /* *
132
+ * Creates either a [FrameColumn], [ColumnGroup], or [ValueColumn] by analyzing each value in
133
+ * [values].
134
+ * This is safer but less efficient than the other functions.
135
+ *
136
+ * Some conversions are done automatically to attempt to unify the values, like:
137
+ * - `null` -> [DataFrame.empty][DataFrame.empty]`()` and [DataRow] -> single-row [DataFrame] when there are other
138
+ * [DataFrames][DataFrame] present in [values]
139
+ * - [List][List]`<`[DataRow][DataRow]`<*>>` -> [DataFrame]
140
+ * etc.
141
+ *
142
+ * @param name name of the column
143
+ * @param values the values to represent each row in the column
144
+ * @param nullable optionally you can specify whether [values] contains nulls, if `null` it is inferred.
145
+ * @param allColsMakesColGroup if `true`, then, if all values are non-null same-sized columns,
146
+ * a column group will be created instead of a [DataColumn][DataColumn]`<`[AnyCol][AnyCol]`>`.
147
+ */
97
148
public fun <T > createWithTypeInference (
98
149
name : String ,
99
150
values : List <T >,
100
151
nullable : Boolean? = null,
101
- ): DataColumn <T > = guessColumnType(name, values, nullable = nullable)
152
+ allColsMakesColGroup : Boolean = false,
153
+ ): DataColumn <T > =
154
+ createColumnGuessingType(
155
+ name = name,
156
+ values = values,
157
+ nullable = nullable,
158
+ allColsMakesColGroup = allColsMakesColGroup,
159
+ )
102
160
103
- public fun <T > create (
161
+ /* *
162
+ * Calls [createColumnGroup], [createFrameColumn], or [createValueColumn] based on
163
+ * [type].
164
+ *
165
+ * Be careful; Values in [values] are NOT checked to adhere to the given [type], nor
166
+ * do we check whether there are nulls among the values when the given type is [DataFrame]
167
+ * (a [FrameColumn] cannot contain `null`, this causes runtime exceptions).
168
+ * When [type] is `DataFrame<*>?`, a [ValueColumn] is created to avoid this issue.
169
+ *
170
+ * This may be unsafe but is more efficient than [createWithTypeInference].
171
+ *
172
+ * @param name the name of the column
173
+ * @param values the values to represent each row in the column
174
+ * @param type the (unchecked) common type of [values]
175
+ * @param infer in case a [ValueColumn] is created, this controls how/whether types need to be inferred
176
+ */
177
+ public fun <T > createUnsafe (
104
178
name : String ,
105
179
values : List <T >,
106
180
type : KType ,
@@ -112,9 +186,29 @@ public interface DataColumn<out T> : BaseColumn<T> {
112
186
ColumnKind .Frame -> createFrameColumn(name, values as List <AnyFrame >).asDataColumn().cast()
113
187
}
114
188
115
- public inline fun <reified T > create (name : String , values : List <T >, infer : Infer = Infer .None ): DataColumn <T > =
116
- create(name, values, typeOf<T >(), infer)
189
+ /* *
190
+ * Calls [createColumnGroup], [createFrameColumn], or [createValueColumn] based on
191
+ * type [T].
192
+ *
193
+ * Be careful; Values in [values] are NOT checked to adhere to the given [type], nor
194
+ * do we check whether there are nulls among the values when the given type is [DataFrame]
195
+ * (a [FrameColumn] cannot contain `null`, this causes runtime exceptions).
196
+ * When [type] is `DataFrame<*>?`, a [ValueColumn] is created to avoid this issue.
197
+ *
198
+ * This may be unsafe but is more efficient than [createWithTypeInference].
199
+ *
200
+ * @param T the (unchecked) common type of [values]
201
+ * @param name the name of the column
202
+ * @param values the values to represent each row in the column
203
+ * @param infer in case a [ValueColumn] is created, this controls how/whether types need to be inferred
204
+ */
205
+ public inline fun <reified T > createUnsafe (
206
+ name : String ,
207
+ values : List <T >,
208
+ infer : Infer = Infer .None ,
209
+ ): DataColumn <T > = createUnsafe(name, values, typeOf<T >(), infer)
117
210
211
+ /* * Creates an empty [DataColumn] with given [name]. */
118
212
public fun empty (name : String = ""): AnyCol = createValueColumn(name, emptyList<Unit >(), typeOf<Unit >())
119
213
}
120
214
0 commit comments