From a369ba60081eb97cf75be290e7f59961ed2335f8 Mon Sep 17 00:00:00 2001 From: Simon Vergauwen Date: Mon, 22 May 2023 12:35:16 +0200 Subject: [PATCH] Move TokenTextSplitter to commonMain --- .../kotlin/com/xebia/functional/xef/loaders/BaseLoader.kt | 4 ++-- .../functional/xef/textsplitters/CharacterTextSplitter.kt | 4 ++-- .../textsplitters/{BaseTextSplitter.kt => TextSplitter.kt} | 2 +- .../xebia/functional/xef/textsplitters/TokenTextSplitter.kt | 4 ++-- .../kotlin/com/xebia/functional/xef/agents/BingSearch.kt | 4 ++-- .../com/xebia/functional/xef/agents/ScrapeUrlContent.kt | 4 ++-- .../kotlin/com/xebia/functional/xef/loaders/TextLoader.kt | 1 - .../main/kotlin/com/xebia/functional/xef/pdf/PDFLoader.kt | 6 +++--- 8 files changed, 14 insertions(+), 15 deletions(-) rename core/src/commonMain/kotlin/com/xebia/functional/xef/textsplitters/{BaseTextSplitter.kt => TextSplitter.kt} (88%) rename core/src/{jvmMain => commonMain}/kotlin/com/xebia/functional/xef/textsplitters/TokenTextSplitter.kt (94%) diff --git a/core/src/commonMain/kotlin/com/xebia/functional/xef/loaders/BaseLoader.kt b/core/src/commonMain/kotlin/com/xebia/functional/xef/loaders/BaseLoader.kt index b13d63f36..7c244c725 100644 --- a/core/src/commonMain/kotlin/com/xebia/functional/xef/loaders/BaseLoader.kt +++ b/core/src/commonMain/kotlin/com/xebia/functional/xef/loaders/BaseLoader.kt @@ -1,9 +1,9 @@ package com.xebia.functional.xef.loaders -import com.xebia.functional.xef.textsplitters.BaseTextSplitter +import com.xebia.functional.xef.textsplitters.TextSplitter interface BaseLoader { suspend fun load(): List - suspend fun loadAndSplit(textSplitter: BaseTextSplitter): List = + suspend fun loadAndSplit(textSplitter: TextSplitter): List = textSplitter.splitDocuments(documents = load()) } diff --git a/core/src/commonMain/kotlin/com/xebia/functional/xef/textsplitters/CharacterTextSplitter.kt b/core/src/commonMain/kotlin/com/xebia/functional/xef/textsplitters/CharacterTextSplitter.kt index 17e9318d1..b316f8530 100644 --- a/core/src/commonMain/kotlin/com/xebia/functional/xef/textsplitters/CharacterTextSplitter.kt +++ b/core/src/commonMain/kotlin/com/xebia/functional/xef/textsplitters/CharacterTextSplitter.kt @@ -1,7 +1,7 @@ package com.xebia.functional.xef.textsplitters -fun CharacterTextSplitter(separator: String): BaseTextSplitter = - object : BaseTextSplitter { +fun CharacterTextSplitter(separator: String): TextSplitter = + object : TextSplitter { override suspend fun splitText(text: String): List = text.split(separator) diff --git a/core/src/commonMain/kotlin/com/xebia/functional/xef/textsplitters/BaseTextSplitter.kt b/core/src/commonMain/kotlin/com/xebia/functional/xef/textsplitters/TextSplitter.kt similarity index 88% rename from core/src/commonMain/kotlin/com/xebia/functional/xef/textsplitters/BaseTextSplitter.kt rename to core/src/commonMain/kotlin/com/xebia/functional/xef/textsplitters/TextSplitter.kt index cd20f76fc..a93536d43 100644 --- a/core/src/commonMain/kotlin/com/xebia/functional/xef/textsplitters/BaseTextSplitter.kt +++ b/core/src/commonMain/kotlin/com/xebia/functional/xef/textsplitters/TextSplitter.kt @@ -1,6 +1,6 @@ package com.xebia.functional.xef.textsplitters -interface BaseTextSplitter { +interface TextSplitter { suspend fun splitText(text: String): List suspend fun splitDocuments(documents: List): List suspend fun splitTextInDocuments(text: String): List diff --git a/core/src/jvmMain/kotlin/com/xebia/functional/xef/textsplitters/TokenTextSplitter.kt b/core/src/commonMain/kotlin/com/xebia/functional/xef/textsplitters/TokenTextSplitter.kt similarity index 94% rename from core/src/jvmMain/kotlin/com/xebia/functional/xef/textsplitters/TokenTextSplitter.kt rename to core/src/commonMain/kotlin/com/xebia/functional/xef/textsplitters/TokenTextSplitter.kt index 0fd778448..788cbe016 100644 --- a/core/src/jvmMain/kotlin/com/xebia/functional/xef/textsplitters/TokenTextSplitter.kt +++ b/core/src/commonMain/kotlin/com/xebia/functional/xef/textsplitters/TokenTextSplitter.kt @@ -3,14 +3,14 @@ package com.xebia.functional.xef.textsplitters import com.xebia.functional.tokenizer.Encoding import com.xebia.functional.tokenizer.ModelType -fun TokenTextSplitter(modelType: ModelType, chunkSize: Int, chunkOverlap: Int): BaseTextSplitter = +fun TokenTextSplitter(modelType: ModelType, chunkSize: Int, chunkOverlap: Int): TextSplitter = TokenTextSplitterImpl(modelType.encoding, chunkSize, chunkOverlap) private class TokenTextSplitterImpl( private val tokenizer: Encoding, private val chunkSize: Int, private val chunkOverlap: Int -) : BaseTextSplitter { +) : TextSplitter { override suspend fun splitText(text: String): List { val inputIds = tokenizer.encode(text) diff --git a/core/src/jvmMain/kotlin/com/xebia/functional/xef/agents/BingSearch.kt b/core/src/jvmMain/kotlin/com/xebia/functional/xef/agents/BingSearch.kt index 4f60dd16a..21ccf2545 100644 --- a/core/src/jvmMain/kotlin/com/xebia/functional/xef/agents/BingSearch.kt +++ b/core/src/jvmMain/kotlin/com/xebia/functional/xef/agents/BingSearch.kt @@ -4,7 +4,7 @@ import arrow.core.flatten import arrow.fx.coroutines.parMap import com.apptasticsoftware.rssreader.Item import com.apptasticsoftware.rssreader.RssReader -import com.xebia.functional.xef.textsplitters.BaseTextSplitter +import com.xebia.functional.xef.textsplitters.TextSplitter import io.ktor.http.* import java.util.stream.Collectors import kotlin.jvm.optionals.toList @@ -12,7 +12,7 @@ import kotlinx.coroutines.Dispatchers suspend fun bingSearch( search: String, - splitter: BaseTextSplitter, + splitter: TextSplitter, url: String = "https://www.bing.com/news/search?q=${search.encodeURLParameter()}&format=rss", maxLinks: Int = 10 ): List { diff --git a/core/src/jvmMain/kotlin/com/xebia/functional/xef/agents/ScrapeUrlContent.kt b/core/src/jvmMain/kotlin/com/xebia/functional/xef/agents/ScrapeUrlContent.kt index 5135ec528..f1a97ba4d 100644 --- a/core/src/jvmMain/kotlin/com/xebia/functional/xef/agents/ScrapeUrlContent.kt +++ b/core/src/jvmMain/kotlin/com/xebia/functional/xef/agents/ScrapeUrlContent.kt @@ -1,7 +1,7 @@ package com.xebia.functional.xef.agents import com.xebia.functional.xef.loaders.ScrapeURLTextLoader -import com.xebia.functional.xef.textsplitters.BaseTextSplitter +import com.xebia.functional.xef.textsplitters.TextSplitter -suspend fun scrapeUrlContent(url: String, splitter: BaseTextSplitter): List = +suspend fun scrapeUrlContent(url: String, splitter: TextSplitter): List = ScrapeURLTextLoader(url).loadAndSplit(splitter) diff --git a/filesystem/src/commonMain/kotlin/com/xebia/functional/xef/loaders/TextLoader.kt b/filesystem/src/commonMain/kotlin/com/xebia/functional/xef/loaders/TextLoader.kt index ad3ff5a8c..96cbb939b 100644 --- a/filesystem/src/commonMain/kotlin/com/xebia/functional/xef/loaders/TextLoader.kt +++ b/filesystem/src/commonMain/kotlin/com/xebia/functional/xef/loaders/TextLoader.kt @@ -1,7 +1,6 @@ package com.xebia.functional.xef.loaders import com.xebia.functional.xef.io.DEFAULT -import com.xebia.functional.xef.textsplitters.BaseTextSplitter import okio.FileSystem import okio.Path diff --git a/integrations/pdf/src/main/kotlin/com/xebia/functional/xef/pdf/PDFLoader.kt b/integrations/pdf/src/main/kotlin/com/xebia/functional/xef/pdf/PDFLoader.kt index d39efd001..fe6ac9764 100644 --- a/integrations/pdf/src/main/kotlin/com/xebia/functional/xef/pdf/PDFLoader.kt +++ b/integrations/pdf/src/main/kotlin/com/xebia/functional/xef/pdf/PDFLoader.kt @@ -2,7 +2,7 @@ package com.xebia.functional.xef.pdf import com.xebia.functional.tokenizer.ModelType import com.xebia.functional.xef.loaders.BaseLoader -import com.xebia.functional.xef.textsplitters.BaseTextSplitter +import com.xebia.functional.xef.textsplitters.TextSplitter import com.xebia.functional.xef.textsplitters.TokenTextSplitter import io.ktor.client.* import io.ktor.client.request.* @@ -15,7 +15,7 @@ import java.io.File suspend fun pdf( url: String, - splitter: BaseTextSplitter = TokenTextSplitter(modelType = ModelType.GPT_3_5_TURBO, chunkSize = 100, chunkOverlap = 50) + splitter: TextSplitter = TokenTextSplitter(modelType = ModelType.GPT_3_5_TURBO, chunkSize = 100, chunkOverlap = 50) ): List = HttpClient().use { val response = it.get(url) @@ -29,7 +29,7 @@ suspend fun pdf( suspend fun pdf( file: File, - splitter: BaseTextSplitter = TokenTextSplitter(modelType = ModelType.GPT_3_5_TURBO, chunkSize = 100, chunkOverlap = 50) + splitter: TextSplitter = TokenTextSplitter(modelType = ModelType.GPT_3_5_TURBO, chunkSize = 100, chunkOverlap = 50) ): List { val loader = PDFLoader(file) return loader.loadAndSplit(splitter)