Skip to content

Commit fc52b8a

Browse files
authored
Merge 63d5e0a into bb55f83
2 parents bb55f83 + 63d5e0a commit fc52b8a

File tree

14 files changed

+2223
-29
lines changed

14 files changed

+2223
-29
lines changed

core/build.gradle.kts

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,10 @@ dependencies {
8282
}
8383
testImplementation(libs.kotlin.scriptingJvm)
8484
testImplementation(libs.jsoup)
85+
86+
// testImplementation("org.openjdk.jol:jol-core:0.10")
87+
implementation("org.openjdk.jol:jol-core:0.10")
88+
implementation("it.unimi.dsi:fastutil:8.5.14")
8589
}
8690

8791
val samplesImplementation by configurations.getting {
Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
package org.jetbrains.kotlinx.dataframe
2+
3+
import org.jetbrains.kotlinx.dataframe.impl.columns.ColumnDataHolderImpl
4+
import kotlin.reflect.KType
5+
import kotlin.reflect.typeOf
6+
7+
public interface ColumnDataHolder<T> : Iterable<T> {
8+
9+
public val size: Int
10+
11+
public fun toSet(): Set<T>
12+
13+
public fun toList(): List<T>
14+
15+
public fun contains(value: T): Boolean
16+
17+
public operator fun get(index: Int): T
18+
19+
public operator fun get(range: IntRange): List<T>
20+
21+
public val distinct: Lazy<Set<T>>
22+
23+
public companion object
24+
}
25+
26+
public fun <T> Collection<T>.toColumnDataHolder(type: KType, distinct: Lazy<Set<T>>? = null): ColumnDataHolder<T> =
27+
ColumnDataHolderImpl.of(this, type, distinct)
28+
29+
public inline fun <reified T> Collection<T>.toColumnDataHolder(distinct: Lazy<Set<T>>? = null): ColumnDataHolder<T> =
30+
this.toColumnDataHolder(typeOf<T>(), distinct)
31+
32+
public fun <T> Array<T>.toColumnDataHolder(type: KType, distinct: Lazy<Set<T>>? = null): ColumnDataHolder<T> =
33+
ColumnDataHolderImpl.of(this, type, distinct)
34+
35+
public inline fun <reified T> Array<T>.toColumnDataHolder(distinct: Lazy<Set<T>>? = null): ColumnDataHolder<T> =
36+
this.toColumnDataHolder(typeOf<T>(), distinct)
37+
38+
public fun BooleanArray.asColumnDataHolder(distinct: Lazy<Set<Boolean>>? = null): ColumnDataHolder<Boolean> =
39+
ColumnDataHolderImpl.of(this, typeOf<Boolean>(), distinct)
40+
41+
public fun ByteArray.asColumnDataHolder(distinct: Lazy<Set<Byte>>? = null): ColumnDataHolder<Byte> =
42+
ColumnDataHolderImpl.of(this, typeOf<Byte>(), distinct)
43+
44+
public fun ShortArray.asColumnDataHolder(distinct: Lazy<Set<Short>>? = null): ColumnDataHolder<Short> =
45+
ColumnDataHolderImpl.of(this, typeOf<Short>(), distinct)
46+
47+
public fun IntArray.asColumnDataHolder(distinct: Lazy<Set<Int>>? = null): ColumnDataHolder<Int> =
48+
ColumnDataHolderImpl.of(this, typeOf<Int>(), distinct)
49+
50+
public fun LongArray.asColumnDataHolder(distinct: Lazy<Set<Long>>? = null): ColumnDataHolder<Long> =
51+
ColumnDataHolderImpl.of(this, typeOf<Long>(), distinct)
52+
53+
public fun FloatArray.asColumnDataHolder(distinct: Lazy<Set<Float>>? = null): ColumnDataHolder<Float> =
54+
ColumnDataHolderImpl.of(this, typeOf<Float>(), distinct)
55+
56+
public fun DoubleArray.asColumnDataHolder(distinct: Lazy<Set<Double>>? = null): ColumnDataHolder<Double> =
57+
ColumnDataHolderImpl.of(this, typeOf<Double>(), distinct)
58+
59+
public fun CharArray.asColumnDataHolder(distinct: Lazy<Set<Char>>? = null): ColumnDataHolder<Char> =
60+
ColumnDataHolderImpl.of(this, typeOf<Char>(), distinct)
61+
62+
public fun UByteArray.asColumnDataHolder(distinct: Lazy<Set<UByte>>? = null): ColumnDataHolder<UByte> =
63+
ColumnDataHolderImpl.of(this, typeOf<UByte>(), distinct)
64+
65+
public fun UShortArray.asColumnDataHolder(distinct: Lazy<Set<UShort>>? = null): ColumnDataHolder<UShort> =
66+
ColumnDataHolderImpl.of(this, typeOf<UShort>(), distinct)
67+
68+
public fun UIntArray.asColumnDataHolder(distinct: Lazy<Set<UInt>>? = null): ColumnDataHolder<UInt> =
69+
ColumnDataHolderImpl.of(this, typeOf<UInt>(), distinct)
70+
71+
public fun ULongArray.asColumnDataHolder(distinct: Lazy<Set<ULong>>? = null): ColumnDataHolder<ULong> =
72+
ColumnDataHolderImpl.of(this, typeOf<ULong>(), distinct)
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,135 @@
1+
@file:OptIn(ExperimentalUnsignedTypes::class)
2+
3+
package org.jetbrains.kotlinx.dataframe.impl.columns
4+
5+
import org.jetbrains.kotlinx.dataframe.ColumnDataHolder
6+
import org.jetbrains.kotlinx.dataframe.impl.asList
7+
import org.jetbrains.kotlinx.dataframe.impl.isPrimitiveArray
8+
import kotlin.reflect.KType
9+
import kotlin.reflect.typeOf
10+
11+
internal class ColumnDataHolderImpl<T> private constructor(
12+
private val list: List<T>,
13+
distinct: Lazy<Set<T>>?,
14+
) : ColumnDataHolder<T> {
15+
16+
override val distinct = distinct ?: lazy { list.toSet() }
17+
override val size: Int get() = list.size
18+
19+
override fun toSet(): Set<T> = distinct.value
20+
override fun toList(): List<T> = list
21+
override fun get(index: Int): T = list[index]
22+
override fun get(range: IntRange): List<T> = list.subList(range.first, range.last + 1)
23+
override fun contains(value: T): Boolean = list.contains(value)
24+
override fun iterator(): Iterator<T> = list.iterator()
25+
26+
companion object {
27+
28+
/**
29+
* Constructs [ColumnDataHolderImpl] using an [asList] wrapper around the [list].
30+
*/
31+
@Suppress("UNCHECKED_CAST")
32+
internal fun <T> of(list: Collection<T>, type: KType, distinct: Lazy<Set<T>>? = null): ColumnDataHolder<T> {
33+
if (list is ColumnDataHolder<*>) return list as ColumnDataHolder<T>
34+
35+
try {
36+
val newList = when (type) {
37+
BOOLEAN -> (list as Collection<Boolean>).toBooleanArray().asList()
38+
BYTE -> (list as Collection<Byte>).toByteArray().asList()
39+
SHORT -> (list as Collection<Short>).toShortArray().asList()
40+
INT -> (list as Collection<Int>).toIntArray().asList()
41+
LONG -> (list as Collection<Long>).toLongArray().asList()
42+
FLOAT -> (list as Collection<Float>).toFloatArray().asList()
43+
DOUBLE -> (list as Collection<Double>).toDoubleArray().asList()
44+
CHAR -> (list as Collection<Char>).toCharArray().asList()
45+
UBYTE -> (list as Collection<UByte>).toUByteArray().asList()
46+
USHORT -> (list as Collection<UShort>).toUShortArray().asList()
47+
UINT -> (list as Collection<UInt>).toUIntArray().asList()
48+
ULONG -> (list as Collection<ULong>).toULongArray().asList()
49+
else -> list.asList()
50+
} as List<T>
51+
52+
return ColumnDataHolderImpl(newList, distinct)
53+
} catch (e: Exception) {
54+
throw IllegalArgumentException("Can't create ColumnDataHolder from $list and type $type", e)
55+
}
56+
}
57+
58+
/**
59+
* Constructs [ColumnDataHolderImpl] using an [asList] wrapper around the [array].
60+
* If [array] is an array of primitives, it will be converted to a primitive array first before being
61+
* wrapped with [asList].
62+
*/
63+
@Suppress("UNCHECKED_CAST")
64+
internal fun <T> of(array: Array<T>, type: KType, distinct: Lazy<Set<T>>? = null): ColumnDataHolder<T> {
65+
try {
66+
val list = when (type) {
67+
BOOLEAN -> (array as Array<Boolean>).toBooleanArray().asList()
68+
BYTE -> (array as Array<Byte>).toByteArray().asList()
69+
SHORT -> (array as Array<Short>).toShortArray().asList()
70+
INT -> (array as Array<Int>).toIntArray().asList()
71+
LONG -> (array as Array<Long>).toLongArray().asList()
72+
FLOAT -> (array as Array<Float>).toFloatArray().asList()
73+
DOUBLE -> (array as Array<Double>).toDoubleArray().asList()
74+
CHAR -> (array as Array<Char>).toCharArray().asList()
75+
UBYTE -> (array as Array<UByte>).toUByteArray().asList()
76+
USHORT -> (array as Array<UShort>).toUShortArray().asList()
77+
UINT -> (array as Array<UInt>).toUIntArray().asList()
78+
ULONG -> (array as Array<ULong>).toULongArray().asList()
79+
else -> array.asList()
80+
} as List<T>
81+
82+
return ColumnDataHolderImpl(list, distinct)
83+
} catch (e: Exception) {
84+
throw IllegalArgumentException(
85+
"Can't create ColumnDataHolder from $array and mismatching type $type",
86+
e
87+
)
88+
}
89+
}
90+
91+
/**
92+
* Constructs [ColumnDataHolderImpl] using an [asList] wrapper around the [primitiveArray].
93+
* [primitiveArray] must be an array of primitives, returns `null` if something goes wrong.
94+
*/
95+
@Suppress("UNCHECKED_CAST")
96+
internal fun <T> of(primitiveArray: Any, type: KType, distinct: Lazy<Set<T>>? = null): ColumnDataHolder<T> {
97+
val newList = when {
98+
type == BOOLEAN && primitiveArray is BooleanArray -> primitiveArray.asList()
99+
type == BYTE && primitiveArray is ByteArray -> primitiveArray.asList()
100+
type == SHORT && primitiveArray is ShortArray -> primitiveArray.asList()
101+
type == INT && primitiveArray is IntArray -> primitiveArray.asList()
102+
type == LONG && primitiveArray is LongArray -> primitiveArray.asList()
103+
type == FLOAT && primitiveArray is FloatArray -> primitiveArray.asList()
104+
type == DOUBLE && primitiveArray is DoubleArray -> primitiveArray.asList()
105+
type == CHAR && primitiveArray is CharArray -> primitiveArray.asList()
106+
type == UBYTE && primitiveArray is UByteArray -> primitiveArray.asList()
107+
type == USHORT && primitiveArray is UShortArray -> primitiveArray.asList()
108+
type == UINT && primitiveArray is UIntArray -> primitiveArray.asList()
109+
type == ULONG && primitiveArray is ULongArray -> primitiveArray.asList()
110+
!primitiveArray.isPrimitiveArray -> throw IllegalArgumentException(
111+
"Can't create ColumnDataHolder from non primitive array $primitiveArray and type $type"
112+
)
113+
114+
else -> throw IllegalArgumentException(
115+
"Can't create ColumnDataHolder from primitive array $primitiveArray and type $type"
116+
)
117+
} as List<T>
118+
119+
return ColumnDataHolderImpl(newList, distinct)
120+
}
121+
}
122+
}
123+
124+
private val BOOLEAN = typeOf<Boolean>()
125+
private val BYTE = typeOf<Byte>()
126+
private val SHORT = typeOf<Short>()
127+
private val INT = typeOf<Int>()
128+
private val LONG = typeOf<Long>()
129+
private val FLOAT = typeOf<Float>()
130+
private val DOUBLE = typeOf<Double>()
131+
private val CHAR = typeOf<Char>()
132+
private val UBYTE = typeOf<UByte>()
133+
private val USHORT = typeOf<UShort>()
134+
private val UINT = typeOf<UInt>()
135+
private val ULONG = typeOf<ULong>()
Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
@file:OptIn(ExperimentalUnsignedTypes::class)
2+
3+
package org.jetbrains.kotlinx.dataframe
4+
5+
import org.jetbrains.kotlinx.dataframe.impl.columns.BOOLEAN
6+
import org.jetbrains.kotlinx.dataframe.impl.columns.BYTE
7+
import org.jetbrains.kotlinx.dataframe.impl.columns.CHAR
8+
import org.jetbrains.kotlinx.dataframe.impl.columns.ColumnDataHolderImpl
9+
import org.jetbrains.kotlinx.dataframe.impl.columns.DOUBLE
10+
import org.jetbrains.kotlinx.dataframe.impl.columns.FLOAT
11+
import org.jetbrains.kotlinx.dataframe.impl.columns.INT
12+
import org.jetbrains.kotlinx.dataframe.impl.columns.LONG
13+
import org.jetbrains.kotlinx.dataframe.impl.columns.SHORT
14+
import org.jetbrains.kotlinx.dataframe.impl.columns.UBYTE
15+
import org.jetbrains.kotlinx.dataframe.impl.columns.UINT
16+
import org.jetbrains.kotlinx.dataframe.impl.columns.ULONG
17+
import org.jetbrains.kotlinx.dataframe.impl.columns.USHORT
18+
import org.jetbrains.kotlinx.dataframe.impl.columns.ofBoxedArray
19+
import org.jetbrains.kotlinx.dataframe.impl.columns.ofCollection
20+
import org.jetbrains.kotlinx.dataframe.impl.columns.ofPrimitiveArray
21+
import kotlin.reflect.KType
22+
import kotlin.reflect.typeOf
23+
24+
/**
25+
* Represents the contents of a column; however, it may be implemented.
26+
* The default implementation is found at [ColumnDataHolderImpl].
27+
*/
28+
public interface ColumnDataHolder<T> : List<T> {
29+
30+
public fun toSet(): Set<T>
31+
32+
public operator fun get(range: IntRange): List<T>
33+
34+
public fun add(element: T)
35+
36+
public fun canAdd(element: T): Boolean
37+
38+
public val distinct: Lazy<Set<T>>
39+
40+
public companion object
41+
}
42+
43+
public fun <T> Collection<T>.toColumnDataHolder(type: KType, distinct: Lazy<Set<T>>? = null): ColumnDataHolder<T> =
44+
ColumnDataHolder.ofCollection(this, type, distinct)
45+
46+
public inline fun <reified T> Collection<T>.toColumnDataHolder(distinct: Lazy<Set<T>>? = null): ColumnDataHolder<T> =
47+
this.toColumnDataHolder(typeOf<T>(), distinct)
48+
49+
public fun <T> Array<T>.toColumnDataHolder(type: KType, distinct: Lazy<Set<T>>? = null): ColumnDataHolder<T> =
50+
ColumnDataHolder.ofBoxedArray(this, type, distinct)
51+
52+
public inline fun <reified T> Array<T>.toColumnDataHolder(distinct: Lazy<Set<T>>? = null): ColumnDataHolder<T> =
53+
this.toColumnDataHolder(typeOf<T>(), distinct)
54+
55+
public fun BooleanArray.asColumnDataHolder(distinct: Lazy<Set<Boolean>>? = null): ColumnDataHolder<Boolean> =
56+
ColumnDataHolder.ofPrimitiveArray(this, BOOLEAN, distinct)
57+
58+
public fun ByteArray.asColumnDataHolder(distinct: Lazy<Set<Byte>>? = null): ColumnDataHolder<Byte> =
59+
ColumnDataHolder.ofPrimitiveArray(this, BYTE, distinct)
60+
61+
public fun ShortArray.asColumnDataHolder(distinct: Lazy<Set<Short>>? = null): ColumnDataHolder<Short> =
62+
ColumnDataHolder.ofPrimitiveArray(this, SHORT, distinct)
63+
64+
public fun IntArray.asColumnDataHolder(distinct: Lazy<Set<Int>>? = null): ColumnDataHolder<Int> =
65+
ColumnDataHolder.ofPrimitiveArray(this, INT, distinct)
66+
67+
public fun LongArray.asColumnDataHolder(distinct: Lazy<Set<Long>>? = null): ColumnDataHolder<Long> =
68+
ColumnDataHolder.ofPrimitiveArray(this, LONG, distinct)
69+
70+
public fun FloatArray.asColumnDataHolder(distinct: Lazy<Set<Float>>? = null): ColumnDataHolder<Float> =
71+
ColumnDataHolder.ofPrimitiveArray(this, FLOAT, distinct)
72+
73+
public fun DoubleArray.asColumnDataHolder(distinct: Lazy<Set<Double>>? = null): ColumnDataHolder<Double> =
74+
ColumnDataHolder.ofPrimitiveArray(this, DOUBLE, distinct)
75+
76+
public fun CharArray.asColumnDataHolder(distinct: Lazy<Set<Char>>? = null): ColumnDataHolder<Char> =
77+
ColumnDataHolder.ofPrimitiveArray(this, CHAR, distinct)
78+
79+
public fun UByteArray.asColumnDataHolder(distinct: Lazy<Set<UByte>>? = null): ColumnDataHolder<UByte> =
80+
ColumnDataHolder.ofPrimitiveArray(this, UBYTE, distinct)
81+
82+
public fun UShortArray.asColumnDataHolder(distinct: Lazy<Set<UShort>>? = null): ColumnDataHolder<UShort> =
83+
ColumnDataHolder.ofPrimitiveArray(this, USHORT, distinct)
84+
85+
public fun UIntArray.asColumnDataHolder(distinct: Lazy<Set<UInt>>? = null): ColumnDataHolder<UInt> =
86+
ColumnDataHolder.ofPrimitiveArray(this, UINT, distinct)
87+
88+
public fun ULongArray.asColumnDataHolder(distinct: Lazy<Set<ULong>>? = null): ColumnDataHolder<ULong> =
89+
ColumnDataHolder.ofPrimitiveArray(this, ULONG, distinct)

0 commit comments

Comments
 (0)