Skip to content

Commit 4554833

Browse files
authored
Merge pull request #1103 from Kotlin/sum
Sum statistics and aggregator improvements
2 parents 6f8b2bd + 7502a80 commit 4554833

File tree

14 files changed

+334
-245
lines changed

14 files changed

+334
-245
lines changed

core/api/core.api

Lines changed: 20 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1781,8 +1781,10 @@ public final class org/jetbrains/kotlinx/dataframe/api/DataColumnTypeKt {
17811781
public static final fun isComparable (Lorg/jetbrains/kotlinx/dataframe/DataColumn;)Z
17821782
public static final fun isFrameColumn (Lorg/jetbrains/kotlinx/dataframe/DataColumn;)Z
17831783
public static final fun isList (Lorg/jetbrains/kotlinx/dataframe/DataColumn;)Z
1784+
public static final fun isMixedNumber (Lorg/jetbrains/kotlinx/dataframe/DataColumn;)Z
17841785
public static final fun isNumber (Lorg/jetbrains/kotlinx/dataframe/DataColumn;)Z
17851786
public static final fun isPrimitiveNumber (Lorg/jetbrains/kotlinx/dataframe/DataColumn;)Z
1787+
public static final fun isPrimitiveOrMixedNumber (Lorg/jetbrains/kotlinx/dataframe/DataColumn;)Z
17861788
public static final fun isSubtypeOf (Lorg/jetbrains/kotlinx/dataframe/DataColumn;Lkotlin/reflect/KType;)Z
17871789
public static final fun isValueColumn (Lorg/jetbrains/kotlinx/dataframe/DataColumn;)Z
17881790
public static final fun valuesAreComparable (Lorg/jetbrains/kotlinx/dataframe/DataColumn;)Z
@@ -3816,6 +3818,7 @@ public final class org/jetbrains/kotlinx/dataframe/api/StdKt {
38163818

38173819
public final class org/jetbrains/kotlinx/dataframe/api/SumKt {
38183820
public static final fun rowSum (Lorg/jetbrains/kotlinx/dataframe/DataRow;)Ljava/lang/Number;
3821+
public static final fun rowSumOf (Lorg/jetbrains/kotlinx/dataframe/DataRow;Lkotlin/reflect/KType;)Ljava/lang/Number;
38193822
public static final fun sum (Lorg/jetbrains/kotlinx/dataframe/DataFrame;)Lorg/jetbrains/kotlinx/dataframe/DataRow;
38203823
public static final fun sum (Lorg/jetbrains/kotlinx/dataframe/DataFrame;[Ljava/lang/String;)Ljava/lang/Number;
38213824
public static final fun sum (Lorg/jetbrains/kotlinx/dataframe/api/Grouped;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
@@ -3839,6 +3842,10 @@ public final class org/jetbrains/kotlinx/dataframe/api/SumKt {
38393842
public static synthetic fun sum$default (Lorg/jetbrains/kotlinx/dataframe/api/Grouped;[Lorg/jetbrains/kotlinx/dataframe/columns/ColumnReference;Ljava/lang/String;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
38403843
public static synthetic fun sum$default (Lorg/jetbrains/kotlinx/dataframe/api/Pivot;ZILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataRow;
38413844
public static synthetic fun sum$default (Lorg/jetbrains/kotlinx/dataframe/api/PivotGroupBy;ZILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
3845+
public static final fun sumByte (Lorg/jetbrains/kotlinx/dataframe/DataColumn;)I
3846+
public static final fun sumByte (Lorg/jetbrains/kotlinx/dataframe/DataFrame;Lkotlin/jvm/functions/Function2;)I
3847+
public static final fun sumByte (Lorg/jetbrains/kotlinx/dataframe/DataFrame;[Lkotlin/reflect/KProperty;)I
3848+
public static final fun sumByte (Lorg/jetbrains/kotlinx/dataframe/DataFrame;[Lorg/jetbrains/kotlinx/dataframe/columns/ColumnReference;)I
38423849
public static final fun sumFor (Lorg/jetbrains/kotlinx/dataframe/DataFrame;Lkotlin/jvm/functions/Function2;)Lorg/jetbrains/kotlinx/dataframe/DataRow;
38433850
public static final fun sumFor (Lorg/jetbrains/kotlinx/dataframe/DataFrame;[Ljava/lang/String;)Lorg/jetbrains/kotlinx/dataframe/DataRow;
38443851
public static final fun sumFor (Lorg/jetbrains/kotlinx/dataframe/DataFrame;[Lkotlin/reflect/KProperty;)Lorg/jetbrains/kotlinx/dataframe/DataRow;
@@ -3863,8 +3870,15 @@ public final class org/jetbrains/kotlinx/dataframe/api/SumKt {
38633870
public static synthetic fun sumFor$default (Lorg/jetbrains/kotlinx/dataframe/api/PivotGroupBy;[Ljava/lang/String;ZILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
38643871
public static synthetic fun sumFor$default (Lorg/jetbrains/kotlinx/dataframe/api/PivotGroupBy;[Lkotlin/reflect/KProperty;ZILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
38653872
public static synthetic fun sumFor$default (Lorg/jetbrains/kotlinx/dataframe/api/PivotGroupBy;[Lorg/jetbrains/kotlinx/dataframe/columns/ColumnReference;ZILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
3866-
public static final fun sumT (Lorg/jetbrains/kotlinx/dataframe/DataColumn;)Ljava/lang/Number;
3867-
public static final fun sumTNullable (Lorg/jetbrains/kotlinx/dataframe/DataColumn;)Ljava/lang/Number;
3873+
public static final fun sumNumber (Lorg/jetbrains/kotlinx/dataframe/DataColumn;)Ljava/lang/Number;
3874+
public static final fun sumOfByte (Lorg/jetbrains/kotlinx/dataframe/DataColumn;Lkotlin/jvm/functions/Function1;)I
3875+
public static final fun sumOfByte (Lorg/jetbrains/kotlinx/dataframe/DataFrame;Lkotlin/jvm/functions/Function2;)I
3876+
public static final fun sumOfShort (Lorg/jetbrains/kotlinx/dataframe/DataColumn;Lkotlin/jvm/functions/Function1;)I
3877+
public static final fun sumOfShort (Lorg/jetbrains/kotlinx/dataframe/DataFrame;Lkotlin/jvm/functions/Function2;)I
3878+
public static final fun sumShort (Lorg/jetbrains/kotlinx/dataframe/DataColumn;)I
3879+
public static final fun sumShort (Lorg/jetbrains/kotlinx/dataframe/DataFrame;Lkotlin/jvm/functions/Function2;)I
3880+
public static final fun sumShort (Lorg/jetbrains/kotlinx/dataframe/DataFrame;[Lkotlin/reflect/KProperty;)I
3881+
public static final fun sumShort (Lorg/jetbrains/kotlinx/dataframe/DataFrame;[Lorg/jetbrains/kotlinx/dataframe/columns/ColumnReference;)I
38683882
}
38693883

38703884
public final class org/jetbrains/kotlinx/dataframe/api/TailKt {
@@ -5104,6 +5118,9 @@ public final class org/jetbrains/kotlinx/dataframe/impl/ExceptionUtilsKt {
51045118

51055119
public final class org/jetbrains/kotlinx/dataframe/impl/NumberTypeUtilsKt {
51065120
public static final fun getPrimitiveNumberTypes ()Ljava/util/Set;
5121+
public static final fun isMixedNumber (Lkotlin/reflect/KType;)Z
5122+
public static final fun isPrimitiveNumber (Lkotlin/reflect/KType;)Z
5123+
public static final fun isPrimitiveOrMixedNumber (Lkotlin/reflect/KType;)Z
51075124
}
51085125

51095126
public final class org/jetbrains/kotlinx/dataframe/impl/TypeUtilsKt {
@@ -6153,13 +6170,7 @@ public final class org/jetbrains/kotlinx/dataframe/math/StdKt {
61536170
}
61546171

61556172
public final class org/jetbrains/kotlinx/dataframe/math/SumKt {
6156-
public static final fun sum (Ljava/lang/Iterable;)Ljava/math/BigDecimal;
6157-
public static final fun sum (Ljava/lang/Iterable;)Ljava/math/BigInteger;
6158-
public static final fun sum (Ljava/lang/Iterable;Lkotlin/reflect/KType;)Ljava/lang/Number;
6159-
public static final fun sum (Lkotlin/sequences/Sequence;)Ljava/math/BigDecimal;
6160-
public static final fun sum (Lkotlin/sequences/Sequence;)Ljava/math/BigInteger;
6161-
public static final fun sumNullableT (Ljava/lang/Iterable;Lkotlin/reflect/KType;)Ljava/lang/Number;
6162-
public static final fun sumOf (Ljava/lang/Iterable;Lkotlin/reflect/KType;Lkotlin/jvm/functions/Function1;)Ljava/lang/Number;
6173+
public static final fun sumNullableT (Lkotlin/sequences/Sequence;Lkotlin/reflect/KType;)Ljava/lang/Number;
61636174
}
61646175

61656176
public abstract class org/jetbrains/kotlinx/dataframe/schema/ColumnSchema {

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/DataColumnType.kt

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,9 @@ import org.jetbrains.kotlinx.dataframe.columns.ColumnGroup
77
import org.jetbrains.kotlinx.dataframe.columns.ColumnKind
88
import org.jetbrains.kotlinx.dataframe.columns.FrameColumn
99
import org.jetbrains.kotlinx.dataframe.columns.ValueColumn
10-
import org.jetbrains.kotlinx.dataframe.impl.primitiveNumberTypes
10+
import org.jetbrains.kotlinx.dataframe.impl.isMixedNumber
11+
import org.jetbrains.kotlinx.dataframe.impl.isPrimitiveNumber
12+
import org.jetbrains.kotlinx.dataframe.impl.isPrimitiveOrMixedNumber
1113
import org.jetbrains.kotlinx.dataframe.type
1214
import org.jetbrains.kotlinx.dataframe.typeClass
1315
import org.jetbrains.kotlinx.dataframe.util.IS_COMPARABLE
@@ -48,11 +50,23 @@ public inline fun <reified T> AnyCol.isType(): Boolean = type() == typeOf<T>()
4850
/** Returns `true` when this column's type is a subtype of `Number?` */
4951
public fun AnyCol.isNumber(): Boolean = isSubtypeOf<Number?>()
5052

53+
/** Returns `true` only when this column's type is exactly `Number` or `Number?`. */
54+
public fun AnyCol.isMixedNumber(): Boolean = type().isMixedNumber()
55+
5156
/**
5257
* Returns `true` when this column has the (nullable) type of either:
5358
* [Byte], [Short], [Int], [Long], [Float], or [Double].
5459
*/
55-
public fun AnyCol.isPrimitiveNumber(): Boolean = type().withNullability(false) in primitiveNumberTypes
60+
public fun AnyCol.isPrimitiveNumber(): Boolean = type().isPrimitiveNumber()
61+
62+
/**
63+
* Returns `true` when this column has the (nullable) type of either:
64+
* [Byte], [Short], [Int], [Long], [Float], [Double], or [Number].
65+
*
66+
* Careful: Will return `true` if the column contains multiple number types that
67+
* might NOT be primitive.
68+
*/
69+
public fun AnyCol.isPrimitiveOrMixedNumber(): Boolean = type().isPrimitiveOrMixedNumber()
5670

5771
public fun AnyCol.isList(): Boolean = typeClass == List::class
5872

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/mean.kt

Lines changed: 11 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -18,17 +18,15 @@ import org.jetbrains.kotlinx.dataframe.impl.aggregation.modes.aggregateAll
1818
import org.jetbrains.kotlinx.dataframe.impl.aggregation.modes.aggregateFor
1919
import org.jetbrains.kotlinx.dataframe.impl.aggregation.modes.aggregateOf
2020
import org.jetbrains.kotlinx.dataframe.impl.aggregation.modes.aggregateOfRow
21-
import org.jetbrains.kotlinx.dataframe.impl.aggregation.primitiveNumberColumns
21+
import org.jetbrains.kotlinx.dataframe.impl.aggregation.primitiveOrMixedNumberColumns
2222
import org.jetbrains.kotlinx.dataframe.impl.columns.toNumberColumns
23-
import org.jetbrains.kotlinx.dataframe.impl.primitiveNumberTypes
23+
import org.jetbrains.kotlinx.dataframe.impl.isPrimitiveOrMixedNumber
2424
import kotlin.reflect.KProperty
25-
import kotlin.reflect.full.withNullability
2625
import kotlin.reflect.typeOf
2726

28-
/*
29-
* TODO KDocs:
27+
/* TODO KDocs:
3028
* Calculating the mean is supported for all primitive number types.
31-
* Nulls are filtered from columns.
29+
* Nulls are filtered out.
3230
* The return type is always Double, Double.NaN for empty input, never null.
3331
* (May introduce loss of precision for Longs).
3432
* For mixed primitive number types, [TwoStepNumbersAggregator] unifies the numbers before calculating the mean.
@@ -48,24 +46,21 @@ public inline fun <T, reified R : Number> DataColumn<T>.meanOf(
4846
// region DataRow
4947

5048
public fun AnyRow.rowMean(skipNA: Boolean = skipNA_default): Double =
51-
Aggregators.mean(skipNA).aggregateOfRow(this) {
52-
colsOf<Number?> { it.isPrimitiveNumber() }
53-
}
49+
Aggregators.mean(skipNA).aggregateOfRow(this, primitiveOrMixedNumberColumns())
5450

5551
public inline fun <reified T : Number?> AnyRow.rowMeanOf(skipNA: Boolean = skipNA_default): Double {
56-
require(typeOf<T>().withNullability(false) in primitiveNumberTypes) {
52+
require(typeOf<T>().isPrimitiveOrMixedNumber()) {
5753
"Type ${T::class.simpleName} is not a primitive number type. Mean only supports primitive number types."
5854
}
59-
return Aggregators.mean(skipNA)
60-
.aggregateOfRow(this) { colsOf<T>() }
55+
return Aggregators.mean(skipNA).aggregateOfRow(this) { colsOf<T>() }
6156
}
6257

6358
// endregion
6459

6560
// region DataFrame
6661

6762
public fun <T> DataFrame<T>.mean(skipNA: Boolean = skipNA_default): DataRow<T> =
68-
meanFor(skipNA, primitiveNumberColumns())
63+
meanFor(skipNA, primitiveOrMixedNumberColumns())
6964

7065
public fun <T, C : Number> DataFrame<T>.meanFor(
7166
skipNA: Boolean = skipNA_default,
@@ -116,7 +111,7 @@ public inline fun <T, reified D : Number> DataFrame<T>.meanOf(
116111
@Refine
117112
@Interpretable("GroupByMean1")
118113
public fun <T> Grouped<T>.mean(skipNA: Boolean = skipNA_default): DataFrame<T> =
119-
meanFor(skipNA, primitiveNumberColumns())
114+
meanFor(skipNA, primitiveOrMixedNumberColumns())
120115

121116
@Refine
122117
@Interpretable("GroupByMean0")
@@ -181,7 +176,7 @@ public inline fun <T, reified R : Number> Grouped<T>.meanOf(
181176
// region Pivot
182177

183178
public fun <T> Pivot<T>.mean(skipNA: Boolean = skipNA_default, separate: Boolean = false): DataRow<T> =
184-
meanFor(skipNA, separate, primitiveNumberColumns())
179+
meanFor(skipNA, separate, primitiveOrMixedNumberColumns())
185180

186181
public fun <T, C : Number> Pivot<T>.meanFor(
187182
skipNA: Boolean = skipNA_default,
@@ -224,7 +219,7 @@ public inline fun <T, reified R : Number> Pivot<T>.meanOf(
224219
// region PivotGroupBy
225220

226221
public fun <T> PivotGroupBy<T>.mean(separate: Boolean = false, skipNA: Boolean = skipNA_default): DataFrame<T> =
227-
meanFor(skipNA, separate, primitiveNumberColumns())
222+
meanFor(skipNA, separate, primitiveOrMixedNumberColumns())
228223

229224
public fun <T, C : Number> PivotGroupBy<T>.meanFor(
230225
skipNA: Boolean = skipNA_default,

0 commit comments

Comments
 (0)