From e41f3372162bed006e9e73b6b4fbd2e7bf024af8 Mon Sep 17 00:00:00 2001 From: Emmanuel Berkowicz Date: Sun, 29 Jun 2025 17:27:42 +1000 Subject: [PATCH 1/3] Parse String to UUID #1006 ## Summary Implements UUID parsing support for DataColumn.parse() as per open issue #1006 ## Changes - Added UUID parser to `parsersOrder` list in `main/../parse.kt` using `java.util.UUID.fromString()` with exception handling - Added test cases in `test/../ParseTests.kt`: - Valid UUID strings are parsed to UUID objects - Invalid UUID strings remain String type ## Testing - Both positive and negative test cases pass - Unable to run full test suite due to Java version compatibility issue with simple-git plugin (unrelated to this change) --- .../kotlinx/dataframe/impl/api/parse.kt | 12 ++++++++++ .../jetbrains/kotlinx/dataframe/api/parse.kt | 23 +++++++++++++++++++ 2 files changed, 35 insertions(+) diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/parse.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/parse.kt index 04fb4d4ea0..59be1a9cbd 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/parse.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/parse.kt @@ -49,6 +49,7 @@ import java.time.format.DateTimeFormatterBuilder import java.time.temporal.Temporal import java.time.temporal.TemporalQuery import java.util.Locale +import java.util.UUID import kotlin.properties.Delegates import kotlin.reflect.KClass import kotlin.reflect.KType @@ -62,6 +63,8 @@ import java.time.LocalDate as JavaLocalDate import java.time.LocalDateTime as JavaLocalDateTime import java.time.LocalTime as JavaLocalTime + + private val logger = KotlinLogging.logger { } internal interface StringParser { @@ -491,6 +494,15 @@ internal object Parsers : GlobalParserOptions { posixParserToDoubleWithOptions, // Boolean stringParser { it.toBooleanOrNull() }, + //UUID + stringParser {str -> + try{ + UUID.fromString(str) + } catch(e: IllegalArgumentException){ + null + } + }, + // BigInteger stringParser { it.toBigIntegerOrNull() }, // BigDecimal diff --git a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/parse.kt b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/parse.kt index 081e28d078..0b34ce7d51 100644 --- a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/parse.kt +++ b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/parse.kt @@ -2,6 +2,7 @@ package org.jetbrains.kotlinx.dataframe.api import io.kotest.matchers.should import io.kotest.matchers.shouldBe +import io.kotest.matchers.shouldNotBe import kotlinx.datetime.DateTimeUnit import kotlinx.datetime.Instant import kotlinx.datetime.LocalDate @@ -18,6 +19,7 @@ import org.jetbrains.kotlinx.dataframe.impl.catchSilent import org.jetbrains.kotlinx.dataframe.type import org.junit.Test import java.util.Locale +import java.util.UUID import kotlin.random.Random import kotlin.reflect.typeOf import kotlin.time.Duration @@ -481,6 +483,27 @@ class ParseTests { df.parse() } + @Test + fun `parse valid UUID`() { + val uuidString = "550e8400-e29b-41d4-a716-446655440000" + val column by columnOf(uuidString) + val parsed = column.parse() + + parsed.type() shouldBe typeOf() + (parsed[0] as UUID).toString() shouldBe uuidString + } + + @Test + fun `parse invalid UUID`(){ + val invalidUUID = "this is not a UUID" + val column = columnOf(invalidUUID) + val parsed = column.tryParse() // tryParse as string is not formatted. + + parsed.type() shouldNotBe typeOf() + parsed.type() shouldBe typeOf() + } + + /** * Asserts that all elements of the iterable are equal to each other */ From 8f194d5e999da0c0409608b8b8e845f1f5c55b59 Mon Sep 17 00:00:00 2001 From: Emmanuel Berkowicz Date: Tue, 1 Jul 2025 19:15:55 +1000 Subject: [PATCH 2/3] Parse String to UUID #1006_RevB MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit parse.kt ### Changes Made - **Added regex pre-validation** - **Restructured logic** so `UUID.fromString()` is only called when regex matches - **Exceptions are no longer default flow** - only thrown in rare edge cases where regex passes but UUID parsing fails - **Maintained safety** with try/catch around `UUID.fromString()` ### Testing - All existing test cases unchanged and continue to pass - Positive test: valid UUID strings → UUID objects - Negative test: invalid strings → remain String type --- .../kotlinx/dataframe/impl/api/parse.kt | 20 +++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/parse.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/parse.kt index 59be1a9cbd..32b1f623b0 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/parse.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/parse.kt @@ -63,8 +63,6 @@ import java.time.LocalDate as JavaLocalDate import java.time.LocalDateTime as JavaLocalDateTime import java.time.LocalTime as JavaLocalTime - - private val logger = KotlinLogging.logger { } internal interface StringParser { @@ -494,15 +492,21 @@ internal object Parsers : GlobalParserOptions { posixParserToDoubleWithOptions, // Boolean stringParser { it.toBooleanOrNull() }, - //UUID - stringParser {str -> - try{ - UUID.fromString(str) - } catch(e: IllegalArgumentException){ + // UUID + stringParser { str -> + + val uuidRegex = Regex("[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}") + + if (uuidRegex.matches(str)) { + try { + UUID.fromString(str) + } catch (e: IllegalArgumentException) { + null + } + } else { null } }, - // BigInteger stringParser { it.toBigIntegerOrNull() }, // BigDecimal From a9d7e91fcfde33e4fd42238a6bbca4eec4111ffd Mon Sep 17 00:00:00 2001 From: Emmanuel Berkowicz Date: Thu, 3 Jul 2025 19:53:29 +1000 Subject: [PATCH 3/3] Parse String to UUID #1006 Use kotlin.uuid.Uuid instead of java.util.UUID change implemented in both test and main parse.kt --- .../kotlinx/dataframe/impl/api/parse.kt | 8 ++++-- .../jetbrains/kotlinx/dataframe/api/parse.kt | 28 ++++++++++--------- 2 files changed, 20 insertions(+), 16 deletions(-) diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/parse.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/parse.kt index 32b1f623b0..0999f76337 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/parse.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/parse.kt @@ -49,7 +49,6 @@ import java.time.format.DateTimeFormatterBuilder import java.time.temporal.Temporal import java.time.temporal.TemporalQuery import java.util.Locale -import java.util.UUID import kotlin.properties.Delegates import kotlin.reflect.KClass import kotlin.reflect.KType @@ -57,6 +56,8 @@ import kotlin.reflect.full.withNullability import kotlin.reflect.jvm.jvmErasure import kotlin.reflect.typeOf import kotlin.time.Duration +import kotlin.uuid.ExperimentalUuidApi +import kotlin.uuid.Uuid import java.time.Duration as JavaDuration import java.time.Instant as JavaInstant import java.time.LocalDate as JavaLocalDate @@ -427,6 +428,7 @@ internal object Parsers : GlobalParserOptions { } } + @OptIn(ExperimentalUuidApi::class) internal val parsersOrder = listOf( // Int stringParser { it.toIntOrNull() }, @@ -493,13 +495,13 @@ internal object Parsers : GlobalParserOptions { // Boolean stringParser { it.toBooleanOrNull() }, // UUID - stringParser { str -> + stringParser { str -> val uuidRegex = Regex("[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}") if (uuidRegex.matches(str)) { try { - UUID.fromString(str) + Uuid.parse(str) } catch (e: IllegalArgumentException) { null } diff --git a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/parse.kt b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/parse.kt index 0b34ce7d51..dc2d2bc495 100644 --- a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/parse.kt +++ b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/parse.kt @@ -19,7 +19,6 @@ import org.jetbrains.kotlinx.dataframe.impl.catchSilent import org.jetbrains.kotlinx.dataframe.type import org.junit.Test import java.util.Locale -import java.util.UUID import kotlin.random.Random import kotlin.reflect.typeOf import kotlin.time.Duration @@ -30,6 +29,8 @@ import kotlin.time.Duration.Companion.milliseconds import kotlin.time.Duration.Companion.minutes import kotlin.time.Duration.Companion.nanoseconds import kotlin.time.Duration.Companion.seconds +import kotlin.uuid.ExperimentalUuidApi +import kotlin.uuid.Uuid import java.time.Duration as JavaDuration import java.time.Instant as JavaInstant @@ -483,27 +484,28 @@ class ParseTests { df.parse() } + @OptIn(ExperimentalUuidApi::class) @Test - fun `parse valid UUID`() { - val uuidString = "550e8400-e29b-41d4-a716-446655440000" - val column by columnOf(uuidString) + fun `parse valid Uuid`() { + val validUUID = "550e8400-e29b-41d4-a716-446655440000" + val column by columnOf(validUUID) val parsed = column.parse() - parsed.type() shouldBe typeOf() - (parsed[0] as UUID).toString() shouldBe uuidString + parsed.type() shouldBe typeOf() + (parsed[0] as Uuid).toString() shouldBe validUUID // Change UUID to Uuid } + @OptIn(ExperimentalUuidApi::class) @Test - fun `parse invalid UUID`(){ - val invalidUUID = "this is not a UUID" - val column = columnOf(invalidUUID) - val parsed = column.tryParse() // tryParse as string is not formatted. + fun `parse invalid Uuid`() { + val invalidUUID = "this is not a UUID" + val column = columnOf(invalidUUID) + val parsed = column.tryParse() // tryParse as string is not formatted. - parsed.type() shouldNotBe typeOf() - parsed.type() shouldBe typeOf() + parsed.type() shouldNotBe typeOf() + parsed.type() shouldBe typeOf() } - /** * Asserts that all elements of the iterable are equal to each other */