Skip to content

Commit 3689c11

Browse files
committed
Sort df.compileTimeSchema() columns according to df.schema() so they're easier to compare
1 parent b116126 commit 3689c11

File tree

4 files changed

+80
-3
lines changed

4 files changed

+80
-3
lines changed

core/api/core.api

+4
Original file line numberDiff line numberDiff line change
@@ -10130,6 +10130,10 @@ public final class org/jetbrains/kotlinx/dataframe/impl/api/MapKt {
1013010130
public static final fun mapNotNullValues (Lorg/jetbrains/kotlinx/dataframe/DataColumn;Lkotlin/jvm/functions/Function1;)Lorg/jetbrains/kotlinx/dataframe/DataColumn;
1013110131
}
1013210132

10133+
public final class org/jetbrains/kotlinx/dataframe/impl/api/SchemaKt {
10134+
public static final fun compileTimeSchemaImpl (Lorg/jetbrains/kotlinx/dataframe/schema/DataFrameSchema;Lkotlin/reflect/KClass;)Lorg/jetbrains/kotlinx/dataframe/schema/DataFrameSchema;
10135+
}
10136+
1013310137
public final class org/jetbrains/kotlinx/dataframe/impl/api/ToDataFrameKt {
1013410138
public static final fun convertToDataFrame (Ljava/lang/Iterable;Lkotlin/reflect/KClass;Ljava/util/List;Ljava/util/Set;Ljava/util/Set;Ljava/util/Set;I)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
1013510139
public static final fun createDataFrameImpl (Ljava/lang/Iterable;Lkotlin/reflect/KClass;Lkotlin/jvm/functions/Function1;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/schema.kt

+3-3
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,9 @@ package org.jetbrains.kotlinx.dataframe.api
33
import org.jetbrains.kotlinx.dataframe.AnyFrame
44
import org.jetbrains.kotlinx.dataframe.AnyRow
55
import org.jetbrains.kotlinx.dataframe.DataFrame
6+
import org.jetbrains.kotlinx.dataframe.impl.api.compileTimeSchemaImpl
67
import org.jetbrains.kotlinx.dataframe.impl.owner
78
import org.jetbrains.kotlinx.dataframe.impl.schema.extractSchema
8-
import org.jetbrains.kotlinx.dataframe.impl.schema.getSchema
99
import org.jetbrains.kotlinx.dataframe.schema.DataFrameSchema
1010

1111
// region DataRow
@@ -26,5 +26,5 @@ public fun GroupBy<*, *>.schema(): DataFrameSchema = toDataFrame().schema()
2626

2727
// endregion
2828

29-
@Suppress("UnusedReceiverParameter")
30-
public inline fun <reified T> DataFrame<T>.compileTimeSchema(): DataFrameSchema = getSchema(T::class)
29+
public inline fun <reified T> DataFrame<T>.compileTimeSchema(): DataFrameSchema =
30+
compileTimeSchemaImpl(schema(), T::class)
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
package org.jetbrains.kotlinx.dataframe.impl.api
2+
3+
import org.jetbrains.kotlinx.dataframe.columns.ColumnPath
4+
import org.jetbrains.kotlinx.dataframe.impl.schema.DataFrameSchemaImpl
5+
import org.jetbrains.kotlinx.dataframe.impl.schema.getSchema
6+
import org.jetbrains.kotlinx.dataframe.schema.ColumnSchema
7+
import org.jetbrains.kotlinx.dataframe.schema.DataFrameSchema
8+
import kotlin.reflect.KClass
9+
10+
@PublishedApi
11+
internal fun compileTimeSchemaImpl(runtimeSchema: DataFrameSchema, klass: KClass<*>): DataFrameSchema {
12+
val compileSchema = getSchema(klass)
13+
val root = ColumnPath(emptyList())
14+
val order = mutableMapOf<ColumnPath, Int>()
15+
runtimeSchema.putColumnsOrder(order, path = root)
16+
return compileSchema.sorted(order, path = root)
17+
}
18+
19+
internal fun DataFrameSchema.putColumnsOrder(order: MutableMap<ColumnPath, Int>, path: ColumnPath) {
20+
columns.entries.forEachIndexed { i, (name, column) ->
21+
val columnPath = path + name
22+
order[columnPath] = i
23+
when (column) {
24+
is ColumnSchema.Frame -> {
25+
column.schema.putColumnsOrder(order, columnPath)
26+
}
27+
28+
is ColumnSchema.Group -> {
29+
column.schema.putColumnsOrder(order, columnPath)
30+
}
31+
}
32+
}
33+
}
34+
35+
internal fun DataFrameSchema.sorted(order: Map<ColumnPath, Int>, path: ColumnPath): DataFrameSchema {
36+
val sorted = columns.map { (name, column) ->
37+
name to when (column) {
38+
is ColumnSchema.Frame -> ColumnSchema.Group(column.schema.sorted(order, path + name), column.contentType)
39+
is ColumnSchema.Group -> ColumnSchema.Group(column.schema.sorted(order, path + name), column.contentType)
40+
is ColumnSchema.Value -> column
41+
else -> TODO("unexpected ColumnSchema class ${column::class}")
42+
}
43+
}.sortedBy { (name, _) ->
44+
order[path + name]
45+
}.toMap()
46+
return DataFrameSchemaImpl(sorted)
47+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
package org.jetbrains.kotlinx.dataframe.api
2+
3+
import io.kotest.matchers.shouldBe
4+
import org.jetbrains.kotlinx.dataframe.DataRow
5+
import org.junit.Test
6+
7+
class SchemaTests {
8+
@Test
9+
fun `columns order test`() {
10+
val row = dataFrameOf("c", "b")(4, 5).first()
11+
val df = dataFrameOf("abc", "a", "a123", "nested")(1, 2, 3, row).cast<Schema>()
12+
df.schema().toString() shouldBe df.compileTimeSchema().toString()
13+
}
14+
}
15+
16+
private interface Schema {
17+
val a: Int
18+
val abc: Int
19+
val a123: Int
20+
val nested: DataRow<Nested>
21+
}
22+
23+
private interface Nested {
24+
val b: Int
25+
val c: Int
26+
}

0 commit comments

Comments
 (0)