Skip to content

Commit 36d6acc

Browse files
committed
introducing parsing of Char? columns. It works the same as String parsing, but can never result in Char and can never fail (since it can parse to String)
1 parent 5c54f58 commit 36d6acc

File tree

6 files changed

+75
-9
lines changed

6 files changed

+75
-9
lines changed

core/api/core.api

+4
Original file line numberDiff line numberDiff line change
@@ -6501,8 +6501,12 @@ public final class org/jetbrains/kotlinx/dataframe/api/ParseKt {
65016501
public static synthetic fun parse$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame;[Lorg/jetbrains/kotlinx/dataframe/columns/ColumnReference;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
65026502
public static final fun parseAnyFrameNullable (Lorg/jetbrains/kotlinx/dataframe/DataColumn;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;)Lorg/jetbrains/kotlinx/dataframe/DataColumn;
65036503
public static synthetic fun parseAnyFrameNullable$default (Lorg/jetbrains/kotlinx/dataframe/DataColumn;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataColumn;
6504+
public static final fun parseChar (Lorg/jetbrains/kotlinx/dataframe/DataColumn;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;)Lorg/jetbrains/kotlinx/dataframe/DataColumn;
6505+
public static synthetic fun parseChar$default (Lorg/jetbrains/kotlinx/dataframe/DataColumn;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataColumn;
65046506
public static final fun tryParse (Lorg/jetbrains/kotlinx/dataframe/DataColumn;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;)Lorg/jetbrains/kotlinx/dataframe/DataColumn;
65056507
public static synthetic fun tryParse$default (Lorg/jetbrains/kotlinx/dataframe/DataColumn;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataColumn;
6508+
public static final fun tryParseChar (Lorg/jetbrains/kotlinx/dataframe/DataColumn;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;)Lorg/jetbrains/kotlinx/dataframe/DataColumn;
6509+
public static synthetic fun tryParseChar$default (Lorg/jetbrains/kotlinx/dataframe/DataColumn;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataColumn;
65066510
}
65076511

65086512
public final class org/jetbrains/kotlinx/dataframe/api/ParserOptions {

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/parse.kt

+40
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ import java.time.format.DateTimeFormatter
1818
import java.util.Locale
1919
import kotlin.reflect.KProperty
2020
import kotlin.reflect.KType
21+
import kotlin.reflect.typeOf
2122

2223
/**
2324
* ### Global Parser Options
@@ -197,6 +198,28 @@ public class ParserOptions(
197198
/** @include [tryParseImpl] */
198199
public fun DataColumn<String?>.tryParse(options: ParserOptions? = null): DataColumn<*> = tryParseImpl(options)
199200

201+
/**
202+
* Tries to parse a column of chars into a column of a different type.
203+
* Each parser in [Parsers] is run in order until a valid parser is found,
204+
* a.k.a. that parser was able to parse all values in the column successfully. If a parser
205+
* fails to parse any value, the next parser is tried. If all the others fail, the final parser
206+
* returns strings.
207+
*
208+
* Parsers that are [covered by][StringParser.coveredBy] other parsers are skipped.
209+
*
210+
* @param options options for parsing, like providing a locale or a custom date-time formatter
211+
* @throws IllegalStateException if no valid parser is found (unlikely, unless the `String` parser is disabled)
212+
* @return a new column with parsed values
213+
*/
214+
@JvmName("tryParseChar")
215+
public fun DataColumn<Char?>.tryParse(options: ParserOptions? = null): DataColumn<*> {
216+
// skip the Char parser, as we're trying to parse away from Char
217+
val providedSkipTypes = options?.skipTypes ?: DataFrame.parser.skipTypes
218+
val parserOptions = (options ?: ParserOptions()).copy(skipTypes = providedSkipTypes + typeOf<Char>())
219+
220+
return map { it?.toString() }.tryParse(parserOptions)
221+
}
222+
200223
public fun <T> DataFrame<T>.parse(options: ParserOptions? = null): DataFrame<T> =
201224
parse(options) {
202225
colsAtAnyDepth { !it.isColumnGroup() }
@@ -220,6 +243,23 @@ public fun <T> DataFrame<T>.parse(options: ParserOptions? = null): DataFrame<T>
220243
public fun DataColumn<String?>.parse(options: ParserOptions? = null): DataColumn<*> =
221244
tryParse(options).also { if (it.typeClass == String::class) error("Can't guess column type") }
222245

246+
/**
247+
* Tries to parse a column of chars as strings into a column of a different type.
248+
* Each parser in [Parsers] is run in order until a valid parser is found,
249+
* a.k.a. that parser was able to parse all values in the column successfully. If a parser
250+
* fails to parse any value, the next parser is tried.
251+
*
252+
* If all fail, the column is returned as `String`, this can never fail.
253+
*
254+
* Parsers that are [covered by][StringParser.coveredBy] other parsers are skipped.
255+
*
256+
* @param options options for parsing, like providing a locale or a custom date-time formatter
257+
* @return a new column with parsed values
258+
*/
259+
@JvmName("parseChar")
260+
public fun DataColumn<Char?>.parse(options: ParserOptions? = null): DataColumn<*> =
261+
tryParse(options) // no need to throw an exception, as Char can always be parsed as String
262+
223263
@JvmName("parseAnyFrameNullable")
224264
public fun DataColumn<AnyFrame?>.parse(options: ParserOptions? = null): DataColumn<AnyFrame?> =
225265
map { it?.parse(options) }

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/parse.kt

+8-9
Original file line numberDiff line numberDiff line change
@@ -564,29 +564,28 @@ internal fun <T> DataFrame<T>.parseImpl(options: ParserOptions?, columns: Column
564564
when {
565565
// when a frame column is requested to be parsed,
566566
// parse each value/frame column at any depth inside each DataFrame in the frame column
567-
col.isFrameColumn() -> {
567+
col.isFrameColumn() ->
568568
col.map {
569569
it.parseImpl(options) {
570570
colsAtAnyDepth { !it.isColumnGroup() }
571571
}
572572
}
573-
}
574573

575574
// when a column group is requested to be parsed,
576575
// parse each column in the group
577-
col.isColumnGroup() -> {
576+
col.isColumnGroup() ->
578577
col.parseImpl(options) { all() }
579578
.asColumnGroup(col.name())
580579
.asDataColumn()
581-
}
580+
581+
// Base case, parse the column as String if it's a `Char?` column
582+
col.isSubtypeOf<Char?>() ->
583+
col.cast<Char?>().map { it?.toString() }.tryParseImpl(options)
582584

583585
// Base case, parse the column if it's a `String?` column
584-
col.isSubtypeOf<String?>() -> {
586+
col.isSubtypeOf<String?>() ->
585587
col.cast<String?>().tryParseImpl(options)
586-
}
587588

588-
else -> {
589-
col
590-
}
589+
else -> col
591590
}
592591
}

core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/convert.kt

+2
Original file line numberDiff line numberDiff line change
@@ -221,6 +221,8 @@ class ConvertTests {
221221

222222
// but
223223
columnOf('1', '2').convertToString().convertToInt() shouldBe columnOf(1, 2)
224+
// or
225+
columnOf('1', '2').parse() shouldBe columnOf(1, 2)
224226
}
225227

226228
@Test

core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/parse.kt

+15
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,21 @@ import java.time.Duration as JavaDuration
3232
import java.time.Instant as JavaInstant
3333

3434
class ParseTests {
35+
36+
@Test
37+
fun `parse chars to string`() {
38+
val char = columnOf('a', 'b', 'c')
39+
char.parse() shouldBe columnOf("a", "b", "c")
40+
char.tryParse() shouldBe columnOf("a", "b", "c")
41+
}
42+
43+
@Test
44+
fun `parse chars to int`() {
45+
val char = columnOf('1', '2', '3')
46+
char.parse() shouldBe columnOf(1, 2, 3)
47+
char.tryParse() shouldBe columnOf(1, 2, 3)
48+
}
49+
3550
@Test
3651
fun parseDate() {
3752
val currentLocale = Locale.getDefault()

core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/ParserTests.kt

+6
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,12 @@ class ParserTests {
4646
DataFrame.parser.resetToDefault()
4747
}
4848

49+
@Test
50+
fun `parse to Char`() {
51+
val col by columnOf("a", "b")
52+
col.parse().type() shouldBe typeOf<Char>()
53+
}
54+
4955
@Test(expected = IllegalStateException::class)
5056
fun `parse should throw`() {
5157
val col by columnOf("a", "bc")

0 commit comments

Comments
 (0)