From df0b5b137b46924a7731c5004f725a36551f4dca Mon Sep 17 00:00:00 2001 From: Jolan Rensen Date: Thu, 6 Jun 2024 13:48:35 +0200 Subject: [PATCH 1/2] Fixed double describe() issue #724 by constraining `isComparable()` to check whether the values in the `Comparable` column are actually "comparable" aka, the generic type of Comparable is not */Nothing --- .../kotlinx/dataframe/api/DataColumnType.kt | 14 +++++- .../kotlinx/dataframe/impl/TypeUtils.kt | 7 ++- .../testSets/person/DataFrameTests.kt | 48 +++++++++++++++++++ .../kotlinx/dataframe/api/DataColumnType.kt | 14 +++++- .../kotlinx/dataframe/impl/TypeUtils.kt | 7 ++- .../testSets/person/DataFrameTests.kt | 48 +++++++++++++++++++ 6 files changed, 134 insertions(+), 4 deletions(-) diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/DataColumnType.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/DataColumnType.kt index 6dd0478ee9..85d13c785e 100644 --- a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/DataColumnType.kt +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/DataColumnType.kt @@ -2,10 +2,13 @@ package org.jetbrains.kotlinx.dataframe.api import org.jetbrains.kotlinx.dataframe.AnyCol import org.jetbrains.kotlinx.dataframe.columns.ColumnKind +import org.jetbrains.kotlinx.dataframe.impl.isNothing +import org.jetbrains.kotlinx.dataframe.impl.projectTo import org.jetbrains.kotlinx.dataframe.type import org.jetbrains.kotlinx.dataframe.typeClass import kotlin.reflect.KClass import kotlin.reflect.KType +import kotlin.reflect.KTypeProjection import kotlin.reflect.full.isSubclassOf import kotlin.reflect.full.isSubtypeOf import kotlin.reflect.typeOf @@ -26,7 +29,16 @@ public fun AnyCol.isNumber(): Boolean = isSubtypeOf() public fun AnyCol.isList(): Boolean = typeClass == List::class -public fun AnyCol.isComparable(): Boolean = isSubtypeOf?>() +/** + * Returns `true` if [this] column is comparable, i.e. its type is a subtype of [Comparable] and its + * type argument is not [Nothing]. + */ +public fun AnyCol.isComparable(): Boolean = + isSubtypeOf?>() && + type().projectTo(Comparable::class).arguments[0].let { + it != KTypeProjection.STAR && + it.type?.isNothing != true + } @PublishedApi internal fun AnyCol.isPrimitive(): Boolean = typeClass.isPrimitive() diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/TypeUtils.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/TypeUtils.kt index 139bdb1772..bddaeb7610 100644 --- a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/TypeUtils.kt +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/TypeUtils.kt @@ -12,7 +12,9 @@ import kotlin.reflect.KType import kotlin.reflect.KTypeParameter import kotlin.reflect.KTypeProjection import kotlin.reflect.KVariance -import kotlin.reflect.KVariance.* +import kotlin.reflect.KVariance.IN +import kotlin.reflect.KVariance.INVARIANT +import kotlin.reflect.KVariance.OUT import kotlin.reflect.KVisibility import kotlin.reflect.full.allSuperclasses import kotlin.reflect.full.createType @@ -463,6 +465,9 @@ internal fun guessValueType(values: Sequence, upperBound: KType? = null, l } } +internal val KType.isNothing: Boolean + get() = classifier == Nothing::class + internal fun nothingType(nullable: Boolean): KType = if (nullable) { typeOf>() diff --git a/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/testSets/person/DataFrameTests.kt b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/testSets/person/DataFrameTests.kt index 59f3d6d025..2fc691c034 100644 --- a/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/testSets/person/DataFrameTests.kt +++ b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/testSets/person/DataFrameTests.kt @@ -7,6 +7,7 @@ import io.kotest.matchers.shouldBe import io.kotest.matchers.shouldNotBe import org.jetbrains.kotlinx.dataframe.AnyFrame import org.jetbrains.kotlinx.dataframe.AnyRow +import org.jetbrains.kotlinx.dataframe.DataColumn import org.jetbrains.kotlinx.dataframe.DataFrame import org.jetbrains.kotlinx.dataframe.DataRow import org.jetbrains.kotlinx.dataframe.RowExpression @@ -79,6 +80,7 @@ import org.jetbrains.kotlinx.dataframe.api.intoColumns import org.jetbrains.kotlinx.dataframe.api.intoList import org.jetbrains.kotlinx.dataframe.api.intoRows import org.jetbrains.kotlinx.dataframe.api.isColumnGroup +import org.jetbrains.kotlinx.dataframe.api.isComparable import org.jetbrains.kotlinx.dataframe.api.isEmpty import org.jetbrains.kotlinx.dataframe.api.isFrameColumn import org.jetbrains.kotlinx.dataframe.api.isNA @@ -119,6 +121,7 @@ import org.jetbrains.kotlinx.dataframe.api.rename import org.jetbrains.kotlinx.dataframe.api.reorderColumnsByName import org.jetbrains.kotlinx.dataframe.api.replace import org.jetbrains.kotlinx.dataframe.api.rows +import org.jetbrains.kotlinx.dataframe.api.schema import org.jetbrains.kotlinx.dataframe.api.select import org.jetbrains.kotlinx.dataframe.api.single import org.jetbrains.kotlinx.dataframe.api.sortBy @@ -165,18 +168,22 @@ import org.jetbrains.kotlinx.dataframe.exceptions.ExcessiveColumnsException import org.jetbrains.kotlinx.dataframe.exceptions.TypeConversionException import org.jetbrains.kotlinx.dataframe.get import org.jetbrains.kotlinx.dataframe.hasNulls +import org.jetbrains.kotlinx.dataframe.impl.DataFrameImpl import org.jetbrains.kotlinx.dataframe.impl.DataFrameSize import org.jetbrains.kotlinx.dataframe.impl.api.convertToImpl import org.jetbrains.kotlinx.dataframe.impl.between import org.jetbrains.kotlinx.dataframe.impl.columns.isMissingColumn import org.jetbrains.kotlinx.dataframe.impl.emptyPath import org.jetbrains.kotlinx.dataframe.impl.getColumnsImpl +import org.jetbrains.kotlinx.dataframe.impl.isNothing import org.jetbrains.kotlinx.dataframe.impl.nothingType +import org.jetbrains.kotlinx.dataframe.impl.projectTo import org.jetbrains.kotlinx.dataframe.impl.trackColumnAccess import org.jetbrains.kotlinx.dataframe.index import org.jetbrains.kotlinx.dataframe.io.renderValueForStdout import org.jetbrains.kotlinx.dataframe.kind import org.jetbrains.kotlinx.dataframe.math.mean +import org.jetbrains.kotlinx.dataframe.name import org.jetbrains.kotlinx.dataframe.ncol import org.jetbrains.kotlinx.dataframe.nrow import org.jetbrains.kotlinx.dataframe.size @@ -2358,6 +2365,47 @@ class DataFrameTests : BaseTest() { desc.print() } + @DataSchema + data class ComparableTest( + val int: Int, + val comparableInt: Comparable, + val string: String, + val comparableString: Comparable, + val comparableStar: Comparable<*>, + val comparableNothing: Comparable, + ) + + @Test + fun `is comparable`() { + val df = listOf( + ComparableTest(1, 1, "a", "a", 1, 1), + ComparableTest(2, 2, "b", "b", "2", "2"), + ).toDataFrame() + + df.int.isComparable() shouldBe true + df.comparableInt.isComparable() shouldBe true + df.string.isComparable() shouldBe true + df.comparableString.isComparable() shouldBe true + df.comparableStar.isComparable() shouldBe false + df.comparableNothing.isComparable() shouldBe false + } + + @Test + fun `describe twice minimal`() { + val df = dataFrameOf("a", "b")(1, "foo", 3, "bar") + val desc1 = df.describe() + val desc2 = desc1.describe() + desc2::class shouldBe DataFrameImpl::class + } + + @Test + fun `describe twice`() { + val df = typed.group { age and weight }.into("info").groupBy { city }.toDataFrame() + val desc1 = df.describe() + val desc2 = desc1.describe() + desc2::class shouldBe DataFrameImpl::class + } + @Test fun `index by column accessor`() { val col = listOf(1, 2, 3, 4, 5).toColumn("name") diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/DataColumnType.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/DataColumnType.kt index 6dd0478ee9..85d13c785e 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/DataColumnType.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/DataColumnType.kt @@ -2,10 +2,13 @@ package org.jetbrains.kotlinx.dataframe.api import org.jetbrains.kotlinx.dataframe.AnyCol import org.jetbrains.kotlinx.dataframe.columns.ColumnKind +import org.jetbrains.kotlinx.dataframe.impl.isNothing +import org.jetbrains.kotlinx.dataframe.impl.projectTo import org.jetbrains.kotlinx.dataframe.type import org.jetbrains.kotlinx.dataframe.typeClass import kotlin.reflect.KClass import kotlin.reflect.KType +import kotlin.reflect.KTypeProjection import kotlin.reflect.full.isSubclassOf import kotlin.reflect.full.isSubtypeOf import kotlin.reflect.typeOf @@ -26,7 +29,16 @@ public fun AnyCol.isNumber(): Boolean = isSubtypeOf() public fun AnyCol.isList(): Boolean = typeClass == List::class -public fun AnyCol.isComparable(): Boolean = isSubtypeOf?>() +/** + * Returns `true` if [this] column is comparable, i.e. its type is a subtype of [Comparable] and its + * type argument is not [Nothing]. + */ +public fun AnyCol.isComparable(): Boolean = + isSubtypeOf?>() && + type().projectTo(Comparable::class).arguments[0].let { + it != KTypeProjection.STAR && + it.type?.isNothing != true + } @PublishedApi internal fun AnyCol.isPrimitive(): Boolean = typeClass.isPrimitive() diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/TypeUtils.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/TypeUtils.kt index 139bdb1772..bddaeb7610 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/TypeUtils.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/TypeUtils.kt @@ -12,7 +12,9 @@ import kotlin.reflect.KType import kotlin.reflect.KTypeParameter import kotlin.reflect.KTypeProjection import kotlin.reflect.KVariance -import kotlin.reflect.KVariance.* +import kotlin.reflect.KVariance.IN +import kotlin.reflect.KVariance.INVARIANT +import kotlin.reflect.KVariance.OUT import kotlin.reflect.KVisibility import kotlin.reflect.full.allSuperclasses import kotlin.reflect.full.createType @@ -463,6 +465,9 @@ internal fun guessValueType(values: Sequence, upperBound: KType? = null, l } } +internal val KType.isNothing: Boolean + get() = classifier == Nothing::class + internal fun nothingType(nullable: Boolean): KType = if (nullable) { typeOf>() diff --git a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/testSets/person/DataFrameTests.kt b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/testSets/person/DataFrameTests.kt index 59f3d6d025..2fc691c034 100644 --- a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/testSets/person/DataFrameTests.kt +++ b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/testSets/person/DataFrameTests.kt @@ -7,6 +7,7 @@ import io.kotest.matchers.shouldBe import io.kotest.matchers.shouldNotBe import org.jetbrains.kotlinx.dataframe.AnyFrame import org.jetbrains.kotlinx.dataframe.AnyRow +import org.jetbrains.kotlinx.dataframe.DataColumn import org.jetbrains.kotlinx.dataframe.DataFrame import org.jetbrains.kotlinx.dataframe.DataRow import org.jetbrains.kotlinx.dataframe.RowExpression @@ -79,6 +80,7 @@ import org.jetbrains.kotlinx.dataframe.api.intoColumns import org.jetbrains.kotlinx.dataframe.api.intoList import org.jetbrains.kotlinx.dataframe.api.intoRows import org.jetbrains.kotlinx.dataframe.api.isColumnGroup +import org.jetbrains.kotlinx.dataframe.api.isComparable import org.jetbrains.kotlinx.dataframe.api.isEmpty import org.jetbrains.kotlinx.dataframe.api.isFrameColumn import org.jetbrains.kotlinx.dataframe.api.isNA @@ -119,6 +121,7 @@ import org.jetbrains.kotlinx.dataframe.api.rename import org.jetbrains.kotlinx.dataframe.api.reorderColumnsByName import org.jetbrains.kotlinx.dataframe.api.replace import org.jetbrains.kotlinx.dataframe.api.rows +import org.jetbrains.kotlinx.dataframe.api.schema import org.jetbrains.kotlinx.dataframe.api.select import org.jetbrains.kotlinx.dataframe.api.single import org.jetbrains.kotlinx.dataframe.api.sortBy @@ -165,18 +168,22 @@ import org.jetbrains.kotlinx.dataframe.exceptions.ExcessiveColumnsException import org.jetbrains.kotlinx.dataframe.exceptions.TypeConversionException import org.jetbrains.kotlinx.dataframe.get import org.jetbrains.kotlinx.dataframe.hasNulls +import org.jetbrains.kotlinx.dataframe.impl.DataFrameImpl import org.jetbrains.kotlinx.dataframe.impl.DataFrameSize import org.jetbrains.kotlinx.dataframe.impl.api.convertToImpl import org.jetbrains.kotlinx.dataframe.impl.between import org.jetbrains.kotlinx.dataframe.impl.columns.isMissingColumn import org.jetbrains.kotlinx.dataframe.impl.emptyPath import org.jetbrains.kotlinx.dataframe.impl.getColumnsImpl +import org.jetbrains.kotlinx.dataframe.impl.isNothing import org.jetbrains.kotlinx.dataframe.impl.nothingType +import org.jetbrains.kotlinx.dataframe.impl.projectTo import org.jetbrains.kotlinx.dataframe.impl.trackColumnAccess import org.jetbrains.kotlinx.dataframe.index import org.jetbrains.kotlinx.dataframe.io.renderValueForStdout import org.jetbrains.kotlinx.dataframe.kind import org.jetbrains.kotlinx.dataframe.math.mean +import org.jetbrains.kotlinx.dataframe.name import org.jetbrains.kotlinx.dataframe.ncol import org.jetbrains.kotlinx.dataframe.nrow import org.jetbrains.kotlinx.dataframe.size @@ -2358,6 +2365,47 @@ class DataFrameTests : BaseTest() { desc.print() } + @DataSchema + data class ComparableTest( + val int: Int, + val comparableInt: Comparable, + val string: String, + val comparableString: Comparable, + val comparableStar: Comparable<*>, + val comparableNothing: Comparable, + ) + + @Test + fun `is comparable`() { + val df = listOf( + ComparableTest(1, 1, "a", "a", 1, 1), + ComparableTest(2, 2, "b", "b", "2", "2"), + ).toDataFrame() + + df.int.isComparable() shouldBe true + df.comparableInt.isComparable() shouldBe true + df.string.isComparable() shouldBe true + df.comparableString.isComparable() shouldBe true + df.comparableStar.isComparable() shouldBe false + df.comparableNothing.isComparable() shouldBe false + } + + @Test + fun `describe twice minimal`() { + val df = dataFrameOf("a", "b")(1, "foo", 3, "bar") + val desc1 = df.describe() + val desc2 = desc1.describe() + desc2::class shouldBe DataFrameImpl::class + } + + @Test + fun `describe twice`() { + val df = typed.group { age and weight }.into("info").groupBy { city }.toDataFrame() + val desc1 = df.describe() + val desc2 = desc1.describe() + desc2::class shouldBe DataFrameImpl::class + } + @Test fun `index by column accessor`() { val col = listOf(1, 2, 3, 4, 5).toColumn("name") From 21c5c2bab2d85cb3b7ddf3f08ac4408e05a9c2d0 Mon Sep 17 00:00:00 2001 From: Jolan Rensen Date: Thu, 6 Jun 2024 13:48:52 +0200 Subject: [PATCH 2/2] linting --- .../jetbrains/kotlinx/dataframe/api/DataColumnType.kt | 10 ++++++++-- .../jetbrains/kotlinx/dataframe/api/DataColumnType.kt | 10 ++++++++-- 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/DataColumnType.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/DataColumnType.kt index 85d13c785e..9a20ebd303 100644 --- a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/DataColumnType.kt +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/DataColumnType.kt @@ -19,7 +19,8 @@ public fun AnyCol.isFrameColumn(): Boolean = kind() == ColumnKind.Frame public fun AnyCol.isValueColumn(): Boolean = kind() == ColumnKind.Value -public fun AnyCol.isSubtypeOf(type: KType): Boolean = this.type.isSubtypeOf(type) && (!this.type.isMarkedNullable || type.isMarkedNullable) +public fun AnyCol.isSubtypeOf(type: KType): Boolean = + this.type.isSubtypeOf(type) && (!this.type.isMarkedNullable || type.isMarkedNullable) public inline fun AnyCol.isSubtypeOf(): Boolean = isSubtypeOf(typeOf()) @@ -43,4 +44,9 @@ public fun AnyCol.isComparable(): Boolean = @PublishedApi internal fun AnyCol.isPrimitive(): Boolean = typeClass.isPrimitive() -internal fun KClass<*>.isPrimitive(): Boolean = isSubclassOf(Number::class) || this == String::class || this == Char::class || this == Array::class || isSubclassOf(Collection::class) +internal fun KClass<*>.isPrimitive(): Boolean = + isSubclassOf(Number::class) || + this == String::class || + this == Char::class || + this == Array::class || + isSubclassOf(Collection::class) diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/DataColumnType.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/DataColumnType.kt index 85d13c785e..9a20ebd303 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/DataColumnType.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/DataColumnType.kt @@ -19,7 +19,8 @@ public fun AnyCol.isFrameColumn(): Boolean = kind() == ColumnKind.Frame public fun AnyCol.isValueColumn(): Boolean = kind() == ColumnKind.Value -public fun AnyCol.isSubtypeOf(type: KType): Boolean = this.type.isSubtypeOf(type) && (!this.type.isMarkedNullable || type.isMarkedNullable) +public fun AnyCol.isSubtypeOf(type: KType): Boolean = + this.type.isSubtypeOf(type) && (!this.type.isMarkedNullable || type.isMarkedNullable) public inline fun AnyCol.isSubtypeOf(): Boolean = isSubtypeOf(typeOf()) @@ -43,4 +44,9 @@ public fun AnyCol.isComparable(): Boolean = @PublishedApi internal fun AnyCol.isPrimitive(): Boolean = typeClass.isPrimitive() -internal fun KClass<*>.isPrimitive(): Boolean = isSubclassOf(Number::class) || this == String::class || this == Char::class || this == Array::class || isSubclassOf(Collection::class) +internal fun KClass<*>.isPrimitive(): Boolean = + isSubclassOf(Number::class) || + this == String::class || + this == Char::class || + this == Array::class || + isSubclassOf(Collection::class)