Skip to content

Commit

Permalink
Port BaseLoader + BaseTextSplitter interfaces and TextLoader + Charac…
Browse files Browse the repository at this point in the history
…terTextSplitter impl. (#8)

* feat: create basic text loader and splitter

* feat: add JVM and native TextLoaders

* test: add TextLoader simple test

* test: add CharacterTextSplitter simple test

* style: add new lines at end of some files

* feat: apply suggestion of using nested use on Source and Buffer
  • Loading branch information
realdavidvega authored Apr 24, 2023
1 parent 462cb20 commit 6ee648a
Show file tree
Hide file tree
Showing 10 changed files with 207 additions and 8 deletions.
3 changes: 3 additions & 0 deletions src/commonMain/kotlin/com/xebia/functional/domain/Document.kt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
package com.xebia.functional.domain

data class Document(val content: String)
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
package com.xebia.functional.loaders

import com.xebia.functional.domain.Document
import com.xebia.functional.textsplitters.BaseTextSplitter

interface BaseLoader {
suspend fun load(): List<Document>
suspend fun loadAndSplit(textSplitter: BaseTextSplitter): List<Document>
}
36 changes: 36 additions & 0 deletions src/commonMain/kotlin/com/xebia/functional/loaders/TextLoader.kt
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
package com.xebia.functional.loaders

import com.xebia.functional.domain.Document
import com.xebia.functional.textsplitters.BaseTextSplitter
import okio.FileSystem
import okio.Path

/**
* Creates a TextLoader based on a Path
* JVM & Native have overloads for FileSystem.SYSTEM,
* on NodeJs you need to manually pass FileSystem.SYSTEM.
*
* This function can currently not be used on the browser.
*
* https://github.com/square/okio/issues/1070
* https://youtrack.jetbrains.com/issue/KT-47038
*/
suspend fun TextLoader(
filePath: Path,
fileSystem: FileSystem
): BaseLoader = object : BaseLoader {

override suspend fun load(): List<Document> =
buildList {
fileSystem.read(filePath) {
while (true) {
val line = readUtf8Line() ?: break
val document = Document(line)
add(document)
}
}
}

override suspend fun loadAndSplit(textSplitter: BaseTextSplitter): List<Document> =
textSplitter.splitDocuments(documents = load())
}
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,6 @@ package com.xebia.functional.prompt
import arrow.core.raise.Raise
import okio.FileSystem
import okio.Path
import okio.buffer
import okio.use

fun Raise<InvalidTemplate>.PromptTemplate(
examples: List<String>,
Expand Down Expand Up @@ -38,12 +36,10 @@ suspend fun Raise<InvalidTemplate>.PromptTemplate(
variables: List<String>,
fileSystem: FileSystem
): PromptTemplate =
fileSystem.source(path).use { source ->
source.buffer().use { buffer ->
val template = buffer.readUtf8()
val config = Config(template, variables)
PromptTemplate(config)
}
fileSystem.read(path) {
val template = readUtf8()
val config = Config(template, variables)
PromptTemplate(config)
}

interface PromptTemplate {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
package com.xebia.functional.textsplitters

import com.xebia.functional.domain.Document

interface BaseTextSplitter {
suspend fun splitText(text: String): List<String>
suspend fun splitDocuments(documents: List<Document>): List<Document>
suspend fun splitTextInDocuments(text: String): List<Document>
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
package com.xebia.functional.textsplitters

import com.xebia.functional.domain.Document

suspend fun CharacterTextSplitter(
separator: String
): BaseTextSplitter = object : BaseTextSplitter {

override suspend fun splitText(text: String): List<String> =
text.split(separator)

override suspend fun splitDocuments(documents: List<Document>): List<Document> =
documents.flatMap { doc -> doc.content.split(separator) }.map(::Document)

override suspend fun splitTextInDocuments(text: String): List<Document> =
text.split(separator).map(::Document)
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
package com.xebia.functional.loaders

import com.xebia.functional.domain.Document
import com.xebia.functional.textsplitters.CharacterTextSplitter
import io.kotest.core.spec.style.StringSpec
import io.kotest.matchers.shouldBe
import okio.Path.Companion.toPath
import okio.fakefilesystem.FakeFileSystem

class TextLoaderSpec : StringSpec({
"should return a list of documents with the contents of each line of the specified file" {
val fileSystem = FakeFileSystem().apply {
val templates = "templates".toPath()
createDirectory(templates)
val example = templates / "example.txt"
write(example) {
writeUtf8(
"""
|Lorem ipsum dolor sit amet, consectetur adipiscing elit.
|Sed do eiusmod tempor incididunt, ut labore et dolore magna aliqua.
""".trimMargin()
)
}
}
val textLoader = TextLoader("templates/example.txt".toPath(), fileSystem)
val documentList = textLoader.load()

documentList shouldBe listOf(
Document("Lorem ipsum dolor sit amet, consectetur adipiscing elit."),
Document("Sed do eiusmod tempor incididunt, ut labore et dolore magna aliqua.")
)
}

"should return a list of documents with the contents of each trim of the specified file" {
val fileSystem = FakeFileSystem().apply {
val templates = "templates".toPath()
createDirectory(templates)
val example = templates / "example.txt"
write(example) {
writeUtf8(
"""
|Lorem ipsum dolor sit amet, consectetur adipiscing elit.
|Sed do eiusmod tempor incididunt, ut labore et dolore magna aliqua.
""".trimMargin()
)
}
}
val textLoader = TextLoader("templates/example.txt".toPath(), fileSystem)
val textSplitter = CharacterTextSplitter(", ")
val documentList = textLoader.loadAndSplit(textSplitter)

documentList shouldBe listOf(
Document("Lorem ipsum dolor sit amet"),
Document("consectetur adipiscing elit."),
Document("Sed do eiusmod tempor incididunt"),
Document("ut labore et dolore magna aliqua.")
)
}
})
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
package com.xebia.functional.textsplitters

import com.xebia.functional.domain.Document
import io.kotest.core.spec.style.StringSpec
import io.kotest.matchers.shouldBe

class CharacterTextSplitterSpec : StringSpec({
"should return a list of strings after split with a given separator" {

val text = "Lorem ipsum dolor sit amet, consectetur adipiscing elit."

val separator = ", "
val textSplitter = CharacterTextSplitter(separator)

textSplitter.splitText(text) shouldBe listOf(
"Lorem ipsum dolor sit amet",
"consectetur adipiscing elit."
)
}

"should return a list of documents after split on a list of documents with a given separator" {

val documents = listOf(
Document("Lorem ipsum dolor sit amet, consectetur adipiscing elit."),
Document("Sed do eiusmod tempor incididunt, ut labore et dolore magna aliqua.")
)

val separator = ", "
val textSplitter = CharacterTextSplitter(separator)

textSplitter.splitDocuments(documents) shouldBe listOf(
Document("Lorem ipsum dolor sit amet"),
Document("consectetur adipiscing elit."),
Document("Sed do eiusmod tempor incididunt"),
Document("ut labore et dolore magna aliqua.")
)
}

"should return a list of documents after split on a text with a given separator" {

val text = "Lorem ipsum dolor sit amet, consectetur adipiscing elit, ut labore et dolore magna aliqua."

val separator = ", "
val textSplitter = CharacterTextSplitter(separator)

textSplitter.splitTextInDocuments(text) shouldBe listOf(
Document("Lorem ipsum dolor sit amet"),
Document("consectetur adipiscing elit"),
Document("ut labore et dolore magna aliqua.")
)
}
})
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
package com.xebia.functional.loaders

import okio.FileSystem
import okio.Path

suspend fun TextLoader(
filePath: Path
): BaseLoader =
TextLoader(filePath, FileSystem.SYSTEM)
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
package com.xebia.functional.loaders

import okio.FileSystem
import okio.Path

suspend fun TextLoader(
filePath: Path
): BaseLoader =
TextLoader(filePath, FileSystem.SYSTEM)

0 comments on commit 6ee648a

Please sign in to comment.