File tree 3 files changed +22
-3
lines changed
kotlin/com/xebia/functional/xef/auto
integrations/pdf/src/main/kotlin/com/xebia/functional/xef/pdf
3 files changed +22
-3
lines changed Original file line number Diff line number Diff line change @@ -2,14 +2,14 @@ package com.xebia.functional.xef.auto
2
2
3
3
import com.xebia.functional.xef.pdf.pdf
4
4
import kotlinx.serialization.Serializable
5
- import java.io.File
6
5
7
6
@Serializable
8
7
data class AIResponse (val answer : String , val source : String )
9
8
9
+ const val pdfUrl = " https://people.cs.ksu.edu/~schmidt/705a/Scala/Programming-in-Scala.pdf"
10
+
10
11
suspend fun main () = ai {
11
- val file = AIResponse ::class .java.getResource(" /documents/doc.pdf" ).file
12
- contextScope(pdf(file = File (file))) {
12
+ contextScope(pdf(url = pdfUrl)) {
13
13
while (true ) {
14
14
print (" Enter your question: " )
15
15
val line = readlnOrNull() ? : break
Original file line number Diff line number Diff line change @@ -4,10 +4,29 @@ import com.xebia.functional.tokenizer.ModelType
4
4
import com.xebia.functional.xef.loaders.BaseLoader
5
5
import com.xebia.functional.xef.textsplitters.BaseTextSplitter
6
6
import com.xebia.functional.xef.textsplitters.TokenTextSplitter
7
+ import io.ktor.client.*
8
+ import io.ktor.client.request.*
9
+ import io.ktor.client.statement.*
10
+ import io.ktor.util.cio.*
11
+ import io.ktor.utils.io.*
7
12
import org.apache.pdfbox.pdmodel.PDDocument
8
13
import org.apache.pdfbox.text.PDFTextStripper
9
14
import java.io.File
10
15
16
+ suspend fun pdf (
17
+ url : String ,
18
+ splitter : BaseTextSplitter = TokenTextSplitter (modelType = ModelType .GPT_3_5_TURBO , chunkSize = 100, chunkOverlap = 50)
19
+ ): List <String > =
20
+ HttpClient ().use {
21
+ val response = it.get(url)
22
+ val file = File .createTempFile(" pdf" , " .pdf" )
23
+ file.writeChannel().use {
24
+ response.bodyAsChannel().copyAndClose(this )
25
+ }
26
+ pdf(file, splitter)
27
+ }
28
+
29
+
11
30
suspend fun pdf (
12
31
file : File ,
13
32
splitter : BaseTextSplitter = TokenTextSplitter (modelType = ModelType .GPT_3_5_TURBO , chunkSize = 100, chunkOverlap = 50)
You can’t perform that action at this time.
0 commit comments