Skip to content

Commit 6405f84

Browse files
raulrajanomisRev
andauthored
Remote PDF Loading (#75)
* Remote PDF Loading * Remove unused import * Update integrations/pdf/src/main/kotlin/com/xebia/functional/xef/pdf/PDFLoader.kt Co-authored-by: Simon Vergauwen <[email protected]> * Fix merge from main --------- Co-authored-by: Simon Vergauwen <[email protected]>
1 parent 0da8332 commit 6405f84

File tree

3 files changed

+22
-3
lines changed
  • example/src/main
  • integrations/pdf/src/main/kotlin/com/xebia/functional/xef/pdf

3 files changed

+22
-3
lines changed

example/src/main/kotlin/com/xebia/functional/xef/auto/PDFDocument.kt

+3-3
Original file line numberDiff line numberDiff line change
@@ -2,14 +2,14 @@ package com.xebia.functional.xef.auto
22

33
import com.xebia.functional.xef.pdf.pdf
44
import kotlinx.serialization.Serializable
5-
import java.io.File
65

76
@Serializable
87
data class AIResponse(val answer: String, val source: String)
98

9+
const val pdfUrl = "https://people.cs.ksu.edu/~schmidt/705a/Scala/Programming-in-Scala.pdf"
10+
1011
suspend fun main() = ai {
11-
val file = AIResponse::class.java.getResource("/documents/doc.pdf").file
12-
contextScope(pdf(file = File(file))) {
12+
contextScope(pdf(url = pdfUrl)) {
1313
while (true) {
1414
print("Enter your question: ")
1515
val line = readlnOrNull() ?: break
-3.45 MB
Binary file not shown.

integrations/pdf/src/main/kotlin/com/xebia/functional/xef/pdf/PDFLoader.kt

+19
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,29 @@ import com.xebia.functional.tokenizer.ModelType
44
import com.xebia.functional.xef.loaders.BaseLoader
55
import com.xebia.functional.xef.textsplitters.BaseTextSplitter
66
import com.xebia.functional.xef.textsplitters.TokenTextSplitter
7+
import io.ktor.client.*
8+
import io.ktor.client.request.*
9+
import io.ktor.client.statement.*
10+
import io.ktor.util.cio.*
11+
import io.ktor.utils.io.*
712
import org.apache.pdfbox.pdmodel.PDDocument
813
import org.apache.pdfbox.text.PDFTextStripper
914
import java.io.File
1015

16+
suspend fun pdf(
17+
url: String,
18+
splitter: BaseTextSplitter = TokenTextSplitter(modelType = ModelType.GPT_3_5_TURBO, chunkSize = 100, chunkOverlap = 50)
19+
): List<String> =
20+
HttpClient().use {
21+
val response = it.get(url)
22+
val file = File.createTempFile("pdf", ".pdf")
23+
file.writeChannel().use {
24+
response.bodyAsChannel().copyAndClose(this)
25+
}
26+
pdf(file, splitter)
27+
}
28+
29+
1130
suspend fun pdf(
1231
file: File,
1332
splitter: BaseTextSplitter = TokenTextSplitter(modelType = ModelType.GPT_3_5_TURBO, chunkSize = 100, chunkOverlap = 50)

0 commit comments

Comments
 (0)