Skip to content

Commit bd63005

Browse files
OptimumCodecketti
andauthored
Add extensions functions to iterate over codepoints (#38)
Co-authored-by: cketti <[email protected]>
1 parent 91bed49 commit bd63005

File tree

4 files changed

+161
-0
lines changed

4 files changed

+161
-0
lines changed

kotlin-codepoints-deluxe/src/commonMain/kotlin/CharSequenceExtensions.kt

+23
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@ package de.cketti.codepoints.deluxe
66

77
import de.cketti.codepoints.codePointAt as intCodePointAt
88
import de.cketti.codepoints.codePointBefore as intCodePointBefore
9+
import de.cketti.codepoints.forEachCodePoint as intForEachCodePoint
10+
import de.cketti.codepoints.forEachCodePointIndexed as intForEachCodePointIndexed
911

1012
/**
1113
* Returns the Unicode code point at the specified index.
@@ -51,3 +53,24 @@ fun CharSequence.codePointSequence(): CodePointSequence {
5153
fun CharSequence.codePointIterator(startIndex: Int = 0, endIndex: Int = length): CodePointIterator {
5254
return CodePointIterator(this, startIndex, endIndex)
5355
}
56+
57+
/**
58+
* Performs the given [action] for each code point in this character sequence.
59+
*/
60+
inline fun CharSequence.forEachCodePoint(action: (codePoint: CodePoint) -> Unit) {
61+
intForEachCodePoint { codePoint ->
62+
action(codePoint.toCodePoint())
63+
}
64+
}
65+
66+
/**
67+
* Performs the given [action] for each code point in this character sequence.
68+
*
69+
* @param action The start index of the current code point is provided as the first argument to this function. The
70+
* code point as [CodePoint] instance as the second argument.
71+
*/
72+
inline fun CharSequence.forEachCodePointIndexed(action: (index: Int, codePoint: CodePoint) -> Unit) {
73+
intForEachCodePointIndexed { index, codePoint ->
74+
action(index, codePoint.toCodePoint())
75+
}
76+
}

kotlin-codepoints-deluxe/src/commonTest/kotlin/CharSequenceExtensionsTest.kt

+49
Original file line numberDiff line numberDiff line change
@@ -57,4 +57,53 @@ class CharSequenceExtensionsTest {
5757
assertEquals(0xD83E.toCodePoint(), "\uD83E\uDD95\uD83E\uDD96".codePointBefore(1))
5858
assertEquals(0xD83E.toCodePoint(), "\uD83E\uDD95\uD83E\uDD96".codePointBefore(3))
5959
}
60+
61+
@Test
62+
fun forEachCodepoint() {
63+
fun CharSequence.collectCodepoints(): List<CodePoint> = buildList { forEachCodePoint { add(it) } }
64+
65+
assertEquals(
66+
emptyList(),
67+
"".collectCodepoints(),
68+
)
69+
assertEquals(
70+
listOf('a'.toCodePoint()),
71+
"a".collectCodepoints(),
72+
)
73+
assertEquals(
74+
listOf('a'.toCodePoint(), 0xFFFF.toCodePoint()),
75+
"a\uFFFF".collectCodepoints(),
76+
)
77+
assertEquals(
78+
listOf(0x1F995.toCodePoint(), 'a'.toCodePoint(), 0x1F996.toCodePoint()),
79+
"\uD83E\uDD95a\uD83E\uDD96".collectCodepoints(),
80+
)
81+
}
82+
83+
@Test
84+
fun forEachCodepointIndexed() {
85+
fun CharSequence.collectCodepoints(): List<Pair<Int, CodePoint>> =
86+
buildList { forEachCodePointIndexed { index, codepoint -> add(index to codepoint) } }
87+
88+
assertEquals(
89+
emptyList(),
90+
"".collectCodepoints(),
91+
)
92+
assertEquals(
93+
listOf(0 to 'a'.toCodePoint()),
94+
"a".collectCodepoints(),
95+
)
96+
assertEquals(
97+
listOf(0 to 'a'.toCodePoint(), 1 to 0x1F995.toCodePoint()),
98+
"a\uD83E\uDD95".collectCodepoints(),
99+
)
100+
assertEquals(
101+
listOf(
102+
0 to 0x1F995.toCodePoint(),
103+
2 to 'a'.toCodePoint(),
104+
3 to 0x1F996.toCodePoint(),
105+
),
106+
"\uD83E\uDD95a\uD83E\uDD96".collectCodepoints(),
107+
)
108+
}
60109
}

kotlin-codepoints/src/commonMain/kotlin/CharSequenceExtensions.kt

+40
Original file line numberDiff line numberDiff line change
@@ -160,3 +160,43 @@ fun CharSequence.offsetByCodePoints(index: Int, codePointOffset: Int): Int {
160160
return currentIndex + 1
161161
}
162162
}
163+
164+
/**
165+
* Performs the given [action] for each code point in this character sequence.
166+
*/
167+
inline fun CharSequence.forEachCodePoint(action: (codePoint: Int) -> Unit) {
168+
forEachCodePointIndexed { _, codePoint ->
169+
action(codePoint)
170+
}
171+
}
172+
173+
/**
174+
* Performs the given [action] for each code point in this character sequence.
175+
*
176+
* @param action The start index of the current code point is provided as the first argument to this function. The code
177+
* point value as the second argument.
178+
*/
179+
inline fun CharSequence.forEachCodePointIndexed(action: (index: Int, codePoint: Int) -> Unit) {
180+
var index = 0
181+
val endIndex = length
182+
while (index < endIndex) {
183+
val codePointStartIndex = index
184+
185+
val firstChar = this[index]
186+
index++
187+
188+
if (firstChar.isHighSurrogate() && index < endIndex) {
189+
val nextChar = this[index]
190+
if (nextChar.isLowSurrogate()) {
191+
index++
192+
193+
val codePoint = CodePoints.toCodePoint(firstChar, nextChar)
194+
action(codePointStartIndex, codePoint)
195+
196+
continue
197+
}
198+
}
199+
200+
action(codePointStartIndex, firstChar.code)
201+
}
202+
}

kotlin-codepoints/src/commonTest/kotlin/CharSequenceExtensionsTest.kt

+49
Original file line numberDiff line numberDiff line change
@@ -161,4 +161,53 @@ class CharSequenceExtensionsTest {
161161
"\uD83E\uDD95".offsetByCodePoints(index = 2, codePointOffset = -2)
162162
}
163163
}
164+
165+
@Test
166+
fun forEachCodepoint() {
167+
fun CharSequence.collectCodepoints(): List<Int> = buildList { forEachCodePoint { add(it) } }
168+
169+
assertEquals(
170+
emptyList(),
171+
"".collectCodepoints(),
172+
)
173+
assertEquals(
174+
listOf('a'.code),
175+
"a".collectCodepoints(),
176+
)
177+
assertEquals(
178+
listOf('a'.code, 0xFFFF),
179+
"a\uFFFF".collectCodepoints(),
180+
)
181+
assertEquals(
182+
listOf(0x1F995, 'a'.code, 0x1F996),
183+
"\uD83E\uDD95a\uD83E\uDD96".collectCodepoints(),
184+
)
185+
}
186+
187+
@Test
188+
fun forEachCodepointIndexed() {
189+
fun CharSequence.collectCodepoints(): List<Pair<Int, Int>> =
190+
buildList { forEachCodePointIndexed { index, codepoint -> add(index to codepoint) } }
191+
192+
assertEquals(
193+
emptyList(),
194+
"".collectCodepoints(),
195+
)
196+
assertEquals(
197+
listOf(0 to 'a'.code),
198+
"a".collectCodepoints(),
199+
)
200+
assertEquals(
201+
listOf(0 to 'a'.code, 1 to 0x1F995),
202+
"a\uD83E\uDD95".collectCodepoints(),
203+
)
204+
assertEquals(
205+
listOf(
206+
0 to 0x1F995,
207+
2 to 'a'.code,
208+
3 to 0x1F996,
209+
),
210+
"\uD83E\uDD95a\uD83E\uDD96".collectCodepoints(),
211+
)
212+
}
164213
}

0 commit comments

Comments
 (0)