diff --git a/dataframe-arrow/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/arrowReadingImpl.kt b/dataframe-arrow/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/arrowReadingImpl.kt index d7d67b6a87..2ae863c2b2 100644 --- a/dataframe-arrow/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/arrowReadingImpl.kt +++ b/dataframe-arrow/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/arrowReadingImpl.kt @@ -13,6 +13,7 @@ import org.apache.arrow.vector.Float8Vector import org.apache.arrow.vector.IntVector import org.apache.arrow.vector.LargeVarBinaryVector import org.apache.arrow.vector.LargeVarCharVector +import org.apache.arrow.vector.NullVector import org.apache.arrow.vector.SmallIntVector import org.apache.arrow.vector.TimeMicroVector import org.apache.arrow.vector.TimeMilliVector @@ -172,6 +173,10 @@ private fun StructVector.values(range: IntRange): List?> = ran getObject(it) } +private fun NullVector.values(range: IntRange): List = range.map { + getObject(it) as Nothing? +} + private fun VarCharVector.values(range: IntRange): List = range.map { if (isNull(it)) { null @@ -204,6 +209,12 @@ private fun LargeVarCharVector.values(range: IntRange): List = range.ma } } +internal fun nothingType(nullable: Boolean): KType = if (nullable) { + typeOf>() +} else { + typeOf>() +}.arguments.first().type!! + private inline fun List.withTypeNullable( expectedNulls: Boolean, nullabilityOptions: NullabilityOptions, @@ -212,6 +223,15 @@ private inline fun List.withTypeNullable( return this to typeOf().withNullability(nullable) } +@JvmName("withTypeNullableNothingList") +private fun List.withTypeNullable( + expectedNulls: Boolean, + nullabilityOptions: NullabilityOptions, +): Pair, KType> { + val nullable = nullabilityOptions.applyNullability(this, expectedNulls) + return this to nothingType(nullable) +} + private fun readField(root: VectorSchemaRoot, field: Field, nullability: NullabilityOptions): AnyBaseCol { try { val range = 0 until root.rowCount @@ -245,6 +265,7 @@ private fun readField(root: VectorSchemaRoot, field: Field, nullability: Nullabi is TimeStampMilliVector -> vector.values(range).withTypeNullable(field.isNullable, nullability) is TimeStampSecVector -> vector.values(range).withTypeNullable(field.isNullable, nullability) is StructVector -> vector.values(range).withTypeNullable(field.isNullable, nullability) + is NullVector -> vector.values(range).withTypeNullable(field.isNullable, nullability) else -> { throw NotImplementedError("reading from ${vector.javaClass.canonicalName} is not implemented") } diff --git a/dataframe-arrow/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/exampleEstimatesAssertions.kt b/dataframe-arrow/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/exampleEstimatesAssertions.kt index 3366e3055b..66a2713518 100644 --- a/dataframe-arrow/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/exampleEstimatesAssertions.kt +++ b/dataframe-arrow/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/exampleEstimatesAssertions.kt @@ -158,4 +158,12 @@ internal fun assertEstimations(exampleFrame: AnyFrame, expectedNullable: Boolean timeNanoCol.forEachIndexed { i, element -> assertValueOrNull(iBatch(i), element, LocalTime.ofNanoOfDay(iBatch(i).toLong())) } + + exampleFrame.getColumnOrNull("nulls")?.let { nullCol -> + nullCol.type() shouldBe nothingType(hasNulls) + assert(hasNulls) + nullCol.values().forEach { + assert(it == null) + } + } } diff --git a/dataframe-arrow/src/test/resources/test-illegal.arrow.feather b/dataframe-arrow/src/test/resources/test-illegal.arrow.feather index eddaf458ca..e1e7076e8d 100644 Binary files a/dataframe-arrow/src/test/resources/test-illegal.arrow.feather and b/dataframe-arrow/src/test/resources/test-illegal.arrow.feather differ diff --git a/dataframe-arrow/src/test/resources/test-illegal.arrow.ipc b/dataframe-arrow/src/test/resources/test-illegal.arrow.ipc index de09051e67..7c41ff909f 100644 Binary files a/dataframe-arrow/src/test/resources/test-illegal.arrow.ipc and b/dataframe-arrow/src/test/resources/test-illegal.arrow.ipc differ diff --git a/dataframe-arrow/src/test/resources/test-with-nulls.arrow.feather b/dataframe-arrow/src/test/resources/test-with-nulls.arrow.feather index 129128f9f6..1f00f22592 100644 Binary files a/dataframe-arrow/src/test/resources/test-with-nulls.arrow.feather and b/dataframe-arrow/src/test/resources/test-with-nulls.arrow.feather differ diff --git a/dataframe-arrow/src/test/resources/test-with-nulls.arrow.ipc b/dataframe-arrow/src/test/resources/test-with-nulls.arrow.ipc index 0db25e66fc..0453744e46 100644 Binary files a/dataframe-arrow/src/test/resources/test-with-nulls.arrow.ipc and b/dataframe-arrow/src/test/resources/test-with-nulls.arrow.ipc differ