-
Notifications
You must be signed in to change notification settings - Fork 15
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Port BaseLoader + BaseTextSplitter interfaces and TextLoader + Charac…
…terTextSplitter impl. (#8) * feat: create basic text loader and splitter * feat: add JVM and native TextLoaders * test: add TextLoader simple test * test: add CharacterTextSplitter simple test * style: add new lines at end of some files * feat: apply suggestion of using nested use on Source and Buffer
- Loading branch information
1 parent
462cb20
commit 6ee648a
Showing
10 changed files
with
207 additions
and
8 deletions.
There are no files selected for viewing
3 changes: 3 additions & 0 deletions
3
src/commonMain/kotlin/com/xebia/functional/domain/Document.kt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
package com.xebia.functional.domain | ||
|
||
data class Document(val content: String) |
9 changes: 9 additions & 0 deletions
9
src/commonMain/kotlin/com/xebia/functional/loaders/BaseLoader.kt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
package com.xebia.functional.loaders | ||
|
||
import com.xebia.functional.domain.Document | ||
import com.xebia.functional.textsplitters.BaseTextSplitter | ||
|
||
interface BaseLoader { | ||
suspend fun load(): List<Document> | ||
suspend fun loadAndSplit(textSplitter: BaseTextSplitter): List<Document> | ||
} |
36 changes: 36 additions & 0 deletions
36
src/commonMain/kotlin/com/xebia/functional/loaders/TextLoader.kt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,36 @@ | ||
package com.xebia.functional.loaders | ||
|
||
import com.xebia.functional.domain.Document | ||
import com.xebia.functional.textsplitters.BaseTextSplitter | ||
import okio.FileSystem | ||
import okio.Path | ||
|
||
/** | ||
* Creates a TextLoader based on a Path | ||
* JVM & Native have overloads for FileSystem.SYSTEM, | ||
* on NodeJs you need to manually pass FileSystem.SYSTEM. | ||
* | ||
* This function can currently not be used on the browser. | ||
* | ||
* https://github.com/square/okio/issues/1070 | ||
* https://youtrack.jetbrains.com/issue/KT-47038 | ||
*/ | ||
suspend fun TextLoader( | ||
filePath: Path, | ||
fileSystem: FileSystem | ||
): BaseLoader = object : BaseLoader { | ||
|
||
override suspend fun load(): List<Document> = | ||
buildList { | ||
fileSystem.read(filePath) { | ||
while (true) { | ||
val line = readUtf8Line() ?: break | ||
val document = Document(line) | ||
add(document) | ||
} | ||
} | ||
} | ||
|
||
override suspend fun loadAndSplit(textSplitter: BaseTextSplitter): List<Document> = | ||
textSplitter.splitDocuments(documents = load()) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
9 changes: 9 additions & 0 deletions
9
src/commonMain/kotlin/com/xebia/functional/textsplitters/BaseTextSplitter.kt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
package com.xebia.functional.textsplitters | ||
|
||
import com.xebia.functional.domain.Document | ||
|
||
interface BaseTextSplitter { | ||
suspend fun splitText(text: String): List<String> | ||
suspend fun splitDocuments(documents: List<Document>): List<Document> | ||
suspend fun splitTextInDocuments(text: String): List<Document> | ||
} |
17 changes: 17 additions & 0 deletions
17
src/commonMain/kotlin/com/xebia/functional/textsplitters/CharacterTextSplitter.kt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
package com.xebia.functional.textsplitters | ||
|
||
import com.xebia.functional.domain.Document | ||
|
||
suspend fun CharacterTextSplitter( | ||
separator: String | ||
): BaseTextSplitter = object : BaseTextSplitter { | ||
|
||
override suspend fun splitText(text: String): List<String> = | ||
text.split(separator) | ||
|
||
override suspend fun splitDocuments(documents: List<Document>): List<Document> = | ||
documents.flatMap { doc -> doc.content.split(separator) }.map(::Document) | ||
|
||
override suspend fun splitTextInDocuments(text: String): List<Document> = | ||
text.split(separator).map(::Document) | ||
} |
59 changes: 59 additions & 0 deletions
59
src/commonTest/kotlin/com/xebia/functional/loaders/TextLoaderSpec.kt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,59 @@ | ||
package com.xebia.functional.loaders | ||
|
||
import com.xebia.functional.domain.Document | ||
import com.xebia.functional.textsplitters.CharacterTextSplitter | ||
import io.kotest.core.spec.style.StringSpec | ||
import io.kotest.matchers.shouldBe | ||
import okio.Path.Companion.toPath | ||
import okio.fakefilesystem.FakeFileSystem | ||
|
||
class TextLoaderSpec : StringSpec({ | ||
"should return a list of documents with the contents of each line of the specified file" { | ||
val fileSystem = FakeFileSystem().apply { | ||
val templates = "templates".toPath() | ||
createDirectory(templates) | ||
val example = templates / "example.txt" | ||
write(example) { | ||
writeUtf8( | ||
""" | ||
|Lorem ipsum dolor sit amet, consectetur adipiscing elit. | ||
|Sed do eiusmod tempor incididunt, ut labore et dolore magna aliqua. | ||
""".trimMargin() | ||
) | ||
} | ||
} | ||
val textLoader = TextLoader("templates/example.txt".toPath(), fileSystem) | ||
val documentList = textLoader.load() | ||
|
||
documentList shouldBe listOf( | ||
Document("Lorem ipsum dolor sit amet, consectetur adipiscing elit."), | ||
Document("Sed do eiusmod tempor incididunt, ut labore et dolore magna aliqua.") | ||
) | ||
} | ||
|
||
"should return a list of documents with the contents of each trim of the specified file" { | ||
val fileSystem = FakeFileSystem().apply { | ||
val templates = "templates".toPath() | ||
createDirectory(templates) | ||
val example = templates / "example.txt" | ||
write(example) { | ||
writeUtf8( | ||
""" | ||
|Lorem ipsum dolor sit amet, consectetur adipiscing elit. | ||
|Sed do eiusmod tempor incididunt, ut labore et dolore magna aliqua. | ||
""".trimMargin() | ||
) | ||
} | ||
} | ||
val textLoader = TextLoader("templates/example.txt".toPath(), fileSystem) | ||
val textSplitter = CharacterTextSplitter(", ") | ||
val documentList = textLoader.loadAndSplit(textSplitter) | ||
|
||
documentList shouldBe listOf( | ||
Document("Lorem ipsum dolor sit amet"), | ||
Document("consectetur adipiscing elit."), | ||
Document("Sed do eiusmod tempor incididunt"), | ||
Document("ut labore et dolore magna aliqua.") | ||
) | ||
} | ||
}) |
52 changes: 52 additions & 0 deletions
52
src/commonTest/kotlin/com/xebia/functional/textsplitters/CharacterTextSplitterSpec.kt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,52 @@ | ||
package com.xebia.functional.textsplitters | ||
|
||
import com.xebia.functional.domain.Document | ||
import io.kotest.core.spec.style.StringSpec | ||
import io.kotest.matchers.shouldBe | ||
|
||
class CharacterTextSplitterSpec : StringSpec({ | ||
"should return a list of strings after split with a given separator" { | ||
|
||
val text = "Lorem ipsum dolor sit amet, consectetur adipiscing elit." | ||
|
||
val separator = ", " | ||
val textSplitter = CharacterTextSplitter(separator) | ||
|
||
textSplitter.splitText(text) shouldBe listOf( | ||
"Lorem ipsum dolor sit amet", | ||
"consectetur adipiscing elit." | ||
) | ||
} | ||
|
||
"should return a list of documents after split on a list of documents with a given separator" { | ||
|
||
val documents = listOf( | ||
Document("Lorem ipsum dolor sit amet, consectetur adipiscing elit."), | ||
Document("Sed do eiusmod tempor incididunt, ut labore et dolore magna aliqua.") | ||
) | ||
|
||
val separator = ", " | ||
val textSplitter = CharacterTextSplitter(separator) | ||
|
||
textSplitter.splitDocuments(documents) shouldBe listOf( | ||
Document("Lorem ipsum dolor sit amet"), | ||
Document("consectetur adipiscing elit."), | ||
Document("Sed do eiusmod tempor incididunt"), | ||
Document("ut labore et dolore magna aliqua.") | ||
) | ||
} | ||
|
||
"should return a list of documents after split on a text with a given separator" { | ||
|
||
val text = "Lorem ipsum dolor sit amet, consectetur adipiscing elit, ut labore et dolore magna aliqua." | ||
|
||
val separator = ", " | ||
val textSplitter = CharacterTextSplitter(separator) | ||
|
||
textSplitter.splitTextInDocuments(text) shouldBe listOf( | ||
Document("Lorem ipsum dolor sit amet"), | ||
Document("consectetur adipiscing elit"), | ||
Document("ut labore et dolore magna aliqua.") | ||
) | ||
} | ||
}) |
9 changes: 9 additions & 0 deletions
9
src/jvmMain/kotlin/com/xebia/functional/loaders/TextLoaderJVM.kt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
package com.xebia.functional.loaders | ||
|
||
import okio.FileSystem | ||
import okio.Path | ||
|
||
suspend fun TextLoader( | ||
filePath: Path | ||
): BaseLoader = | ||
TextLoader(filePath, FileSystem.SYSTEM) |
9 changes: 9 additions & 0 deletions
9
src/nativeMain/kotlin/com/xebia/functional/loaders/TextLoader.kt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
package com.xebia.functional.loaders | ||
|
||
import okio.FileSystem | ||
import okio.Path | ||
|
||
suspend fun TextLoader( | ||
filePath: Path | ||
): BaseLoader = | ||
TextLoader(filePath, FileSystem.SYSTEM) |