From 68c99c54786e96a4d12fd84fb6735cd4bd4354ea Mon Sep 17 00:00:00 2001 From: Kopilov Aleksandr Date: Wed, 27 Sep 2023 14:58:42 +0300 Subject: [PATCH 1/4] Read arrow NullVector --- .../org/jetbrains/kotlinx/dataframe/io/arrowReadingImpl.kt | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/dataframe-arrow/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/arrowReadingImpl.kt b/dataframe-arrow/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/arrowReadingImpl.kt index d7d67b6a87..62f50c661c 100644 --- a/dataframe-arrow/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/arrowReadingImpl.kt +++ b/dataframe-arrow/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/arrowReadingImpl.kt @@ -13,6 +13,7 @@ import org.apache.arrow.vector.Float8Vector import org.apache.arrow.vector.IntVector import org.apache.arrow.vector.LargeVarBinaryVector import org.apache.arrow.vector.LargeVarCharVector +import org.apache.arrow.vector.NullVector import org.apache.arrow.vector.SmallIntVector import org.apache.arrow.vector.TimeMicroVector import org.apache.arrow.vector.TimeMilliVector @@ -172,6 +173,10 @@ private fun StructVector.values(range: IntRange): List?> = ran getObject(it) } +private fun NullVector.values(range: IntRange): List = range.map { + getObject(it) +} + private fun VarCharVector.values(range: IntRange): List = range.map { if (isNull(it)) { null @@ -245,6 +250,7 @@ private fun readField(root: VectorSchemaRoot, field: Field, nullability: Nullabi is TimeStampMilliVector -> vector.values(range).withTypeNullable(field.isNullable, nullability) is TimeStampSecVector -> vector.values(range).withTypeNullable(field.isNullable, nullability) is StructVector -> vector.values(range).withTypeNullable(field.isNullable, nullability) + is NullVector -> vector.values(range).withTypeNullable(field.isNullable, nullability) else -> { throw NotImplementedError("reading from ${vector.javaClass.canonicalName} is not implemented") } From 0b0c90f757b31a01da2538e999c5e04e2ee4f51c Mon Sep 17 00:00:00 2001 From: Kopilov Aleksandr Date: Sat, 6 Jan 2024 13:43:30 +0300 Subject: [PATCH 2/4] Read arrow NullVector test --- .../io/exampleEstimatesAssertions.kt | 7 +++++++ .../test/resources/test-illegal.arrow.feather | Bin 52738 -> 52858 bytes .../src/test/resources/test-illegal.arrow.ipc | Bin 51528 -> 51600 bytes .../resources/test-with-nulls.arrow.feather | Bin 52738 -> 52858 bytes .../test/resources/test-with-nulls.arrow.ipc | Bin 51528 -> 51600 bytes 5 files changed, 7 insertions(+) diff --git a/dataframe-arrow/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/exampleEstimatesAssertions.kt b/dataframe-arrow/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/exampleEstimatesAssertions.kt index 3366e3055b..f7f438add3 100644 --- a/dataframe-arrow/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/exampleEstimatesAssertions.kt +++ b/dataframe-arrow/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/exampleEstimatesAssertions.kt @@ -158,4 +158,11 @@ internal fun assertEstimations(exampleFrame: AnyFrame, expectedNullable: Boolean timeNanoCol.forEachIndexed { i, element -> assertValueOrNull(iBatch(i), element, LocalTime.ofNanoOfDay(iBatch(i).toLong())) } + + exampleFrame.getColumnOrNull("nulls")?.let { nullCol -> + assert(hasNulls) + nullCol.values().forEach { + assert(it == null) + } + } } diff --git a/dataframe-arrow/src/test/resources/test-illegal.arrow.feather b/dataframe-arrow/src/test/resources/test-illegal.arrow.feather index eddaf458ca110bad379e244e9334de29230426ef..e1e7076e8d7359e8f80940483fa4db8a52f69f4e 100644 GIT binary patch delta 456 zcmZpg!~AOwGp}P%kbk%#1H*qHm@tu7zh0Dqfx&=bvK0qu2#5Wij7?uEW2@tygu>=r50GhD|h%-QD0Wr`x29rPk{{tyT0U#j(6$4TX ztU#Psnv+uuq!s@BXZ*i$!V_jzkaHq7*RrrPGm36*Wp`%eOaUndf|Sh%IjZd#BR1Q* zgC$+PfRZO5attRnH+ny>tq%cm9e@~QFVKk$3qTAYNPyA>b`XAnJA{UbAE<@!kx32) b4j`8WN-{7I5k9fB3?Oj$aNPj<1QK-s$b?Rj delta 324 zcmex0hq-AEGp}P%kbk%#1H*qH$e75hA0ooQz`(-7z;J|_fnf>|X8^Gd5dUCeU^oH9 zQ-C-Eh!ueN1tSB)8X&F!VhxiFv|gX4d~eFkvFAe!VCI1A_qz1H%Vq28IJbJOzj&fLH~Hzc2yCfVc&SeSla5 zh;J}5Ff0M$5+HT~VhJFA05oF_5NCkQ0%D+X3?_g6{|8cx0zg6nDh8w&Sb;dNG$*GR zNGtsL&-j01#}kNiA~u(@uro7?Zmwl_W}G~Mor5uDa^p1N&1X0=>=+|9o4SJ~ZM}e! S0bU%8CxDU~n-_X7uLS@T6Em*> delta 150 zcmbO*nfb&dX4d~ekTH=}KSYFqfq{jEf#C=<1H%*`&H!Q^ApXI`z;FVHrvPyT5Gw%j z3q}TpH9%Ye#2!E_0mN^B`nLda0}$H)G0-T6ja!~Da{_4wkS!jY=d!RfGm30p%I?g# Y`3Fad9izwQTz9ZgsTWY_g!hSB0QI#bWdHyG diff --git a/dataframe-arrow/src/test/resources/test-with-nulls.arrow.feather b/dataframe-arrow/src/test/resources/test-with-nulls.arrow.feather index 129128f9f61b977b014425eed0787866b6c0b9fe..1f00f225921f516e76e0c5c7abe99d2e21627ad7 100644 GIT binary patch delta 456 zcmZpg!~AOwGp}P%kbk%#1H*qHm@tu7zh0Dqfx&=bvK0qu2#5Wij7?uEW2@tygu>=r50GhD|h%-QD0Wr`x29rPk|1&T!G713M5>PQ9 z#lQ;0d8Iiy#Xwr&&ws}M8z($rW(7GXVskAEJ2Ru`=2muRM$QzFVjxJ_e2}Bsjxl1h ztvguK)e9(j0wTw7VsoSS^V<3lAlCtiLG}Wj$glv!0D=T4U0?^{7q~-cnD~KO2p^f` dVBi39S)e2X0}|X8^Gd5dUCeU^oH9 zQ-C-Eh!ueN1tSB)8X&F!VhxiFv|gX4d~eFkvFAe!VCI1A_qz1H%Vq28IJbJOzj&fLH~Hzc2yCfVc&SeSla5 zh;J}5Ff0M$5+HT~VhJFA05oF_5NCkQ0%D+X3?_g6|7T!eWE23hC7@zJih&h~^Gb7a zih;DkpZ|>iH+DRMI45FrDGNI@qv+;Zc4x-P6WBQzQzkb~6W)A=Bg2j{Vza3`Skl%D TC>h|z!FU2Fsj+#X_wrf*6L>SQ delta 150 zcmbO*nfb&dX4d~ekTH=}KSYFqfq{jEf#C=<1H%*`&H!Q^ApXI`z;FVHrvPyT5Gw%j z3q}TpH9%Ye#2!E_0mN^B`nLda0}$H)G0-T6ja!~Da{_4wkS!jY=d!RfGm30p%I?g# Y`3Fad9izwQTz9ZgsTWY_g!hSB0QI#bWdHyG From 99d8e8870bac792119fc7a71decdda4d25f00676 Mon Sep 17 00:00:00 2001 From: Kopilov Aleksandr Date: Wed, 21 Feb 2024 16:22:59 +0300 Subject: [PATCH 3/4] Read NullVector as Column --- .../kotlinx/dataframe/io/arrowReadingImpl.kt | 12 +++++++++--- .../dataframe/io/exampleEstimatesAssertions.kt | 1 + 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/dataframe-arrow/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/arrowReadingImpl.kt b/dataframe-arrow/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/arrowReadingImpl.kt index 62f50c661c..b126cd9247 100644 --- a/dataframe-arrow/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/arrowReadingImpl.kt +++ b/dataframe-arrow/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/arrowReadingImpl.kt @@ -173,8 +173,8 @@ private fun StructVector.values(range: IntRange): List?> = ran getObject(it) } -private fun NullVector.values(range: IntRange): List = range.map { - getObject(it) +private fun NullVector.values(range: IntRange): List = range.map { + getObject(it) as Nothing? } private fun VarCharVector.values(range: IntRange): List = range.map { @@ -209,6 +209,12 @@ private fun LargeVarCharVector.values(range: IntRange): List = range.ma } } +internal fun nothingType(nullable: Boolean): KType = if (nullable) { + typeOf>() +} else { + typeOf>() +}.arguments.first().type!! + private inline fun List.withTypeNullable( expectedNulls: Boolean, nullabilityOptions: NullabilityOptions, @@ -250,7 +256,7 @@ private fun readField(root: VectorSchemaRoot, field: Field, nullability: Nullabi is TimeStampMilliVector -> vector.values(range).withTypeNullable(field.isNullable, nullability) is TimeStampSecVector -> vector.values(range).withTypeNullable(field.isNullable, nullability) is StructVector -> vector.values(range).withTypeNullable(field.isNullable, nullability) - is NullVector -> vector.values(range).withTypeNullable(field.isNullable, nullability) + is NullVector -> vector.values(range) to nothingType(field.isNullable) else -> { throw NotImplementedError("reading from ${vector.javaClass.canonicalName} is not implemented") } diff --git a/dataframe-arrow/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/exampleEstimatesAssertions.kt b/dataframe-arrow/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/exampleEstimatesAssertions.kt index f7f438add3..66a2713518 100644 --- a/dataframe-arrow/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/exampleEstimatesAssertions.kt +++ b/dataframe-arrow/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/exampleEstimatesAssertions.kt @@ -160,6 +160,7 @@ internal fun assertEstimations(exampleFrame: AnyFrame, expectedNullable: Boolean } exampleFrame.getColumnOrNull("nulls")?.let { nullCol -> + nullCol.type() shouldBe nothingType(hasNulls) assert(hasNulls) nullCol.values().forEach { assert(it == null) From 856c4a539481c030ec00fcab0f9702d88e33980c Mon Sep 17 00:00:00 2001 From: Kopilov Aleksandr Date: Thu, 22 Feb 2024 21:39:40 +0300 Subject: [PATCH 4/4] withTypeNullableNothingList --- .../kotlinx/dataframe/io/arrowReadingImpl.kt | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/dataframe-arrow/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/arrowReadingImpl.kt b/dataframe-arrow/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/arrowReadingImpl.kt index b126cd9247..2ae863c2b2 100644 --- a/dataframe-arrow/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/arrowReadingImpl.kt +++ b/dataframe-arrow/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/arrowReadingImpl.kt @@ -223,6 +223,15 @@ private inline fun List.withTypeNullable( return this to typeOf().withNullability(nullable) } +@JvmName("withTypeNullableNothingList") +private fun List.withTypeNullable( + expectedNulls: Boolean, + nullabilityOptions: NullabilityOptions, +): Pair, KType> { + val nullable = nullabilityOptions.applyNullability(this, expectedNulls) + return this to nothingType(nullable) +} + private fun readField(root: VectorSchemaRoot, field: Field, nullability: NullabilityOptions): AnyBaseCol { try { val range = 0 until root.rowCount @@ -256,7 +265,7 @@ private fun readField(root: VectorSchemaRoot, field: Field, nullability: Nullabi is TimeStampMilliVector -> vector.values(range).withTypeNullable(field.isNullable, nullability) is TimeStampSecVector -> vector.values(range).withTypeNullable(field.isNullable, nullability) is StructVector -> vector.values(range).withTypeNullable(field.isNullable, nullability) - is NullVector -> vector.values(range) to nothingType(field.isNullable) + is NullVector -> vector.values(range).withTypeNullable(field.isNullable, nullability) else -> { throw NotImplementedError("reading from ${vector.javaClass.canonicalName} is not implemented") }