10
10
from langchain_core .embeddings import Embeddings
11
11
from langchain_core .messages import AIMessage , HumanMessage
12
12
from langchain_core .vectorstores import VectorStore
13
+ from quivr_core .rag .entities .models import ParsedRAGResponse
13
14
from langchain_openai import OpenAIEmbeddings
15
+ from quivr_core .rag .quivr_rag import QuivrQARAG
14
16
from rich .console import Console
15
17
from rich .panel import Panel
16
18
22
24
LocalStorageConfig ,
23
25
TransparentStorageConfig ,
24
26
)
25
- from quivr_core .chat import ChatHistory
26
- from quivr_core .config import RetrievalConfig
27
+ from quivr_core .rag . entities . chat import ChatHistory
28
+ from quivr_core .rag . entities . config import RetrievalConfig
27
29
from quivr_core .files .file import load_qfile
28
30
from quivr_core .llm import LLMEndpoint
29
- from quivr_core .models import (
31
+ from quivr_core .rag . entities . models import (
30
32
ParsedRAGChunkResponse ,
31
- ParsedRAGResponse ,
32
33
QuivrKnowledge ,
33
34
SearchResult ,
34
35
)
35
36
from quivr_core .processor .registry import get_processor_class
36
- from quivr_core .quivr_rag import QuivrQARAG
37
- from quivr_core .quivr_rag_langgraph import QuivrQARAGLangGraph
37
+ from quivr_core .rag .quivr_rag_langgraph import QuivrQARAGLangGraph
38
38
from quivr_core .storage .local_storage import LocalStorage , TransparentStorage
39
39
from quivr_core .storage .storage_base import StorageBase
40
40
@@ -49,19 +49,15 @@ async def process_files(
49
49
"""
50
50
Process files in storage.
51
51
This function takes a StorageBase and return a list of langchain documents.
52
-
53
52
Args:
54
53
storage (StorageBase): The storage containing the files to process.
55
54
skip_file_error (bool): Whether to skip files that cannot be processed.
56
55
processor_kwargs (dict[str, Any]): Additional arguments for the processor.
57
-
58
56
Returns:
59
57
list[Document]: List of processed documents in the Langchain Document format.
60
-
61
58
Raises:
62
59
ValueError: If a file cannot be processed and skip_file_error is False.
63
60
Exception: If no processor is found for a file of a specific type and skip_file_error is False.
64
-
65
61
"""
66
62
67
63
knowledge = []
@@ -91,40 +87,32 @@ async def process_files(
91
87
class Brain :
92
88
"""
93
89
A class representing a Brain.
94
-
95
90
This class allows for the creation of a Brain, which is a collection of knowledge one wants to retrieve information from.
96
-
97
91
A Brain is set to:
98
-
99
92
* Store files in the storage of your choice (local, S3, etc.)
100
93
* Process the files in the storage to extract text and metadata in a wide range of format.
101
94
* Store the processed files in the vector store of your choice (FAISS, PGVector, etc.) - default to FAISS.
102
95
* Create an index of the processed files.
103
96
* Use the *Quivr* workflow for the retrieval augmented generation.
104
-
105
97
A Brain is able to:
106
-
107
98
* Search for information in the vector store.
108
99
* Answer questions about the knowledges in the Brain.
109
100
* Stream the answer to the question.
110
-
111
101
Attributes:
112
102
name (str): The name of the brain.
113
103
id (UUID): The unique identifier of the brain.
114
104
storage (StorageBase): The storage used to store the files.
115
105
llm (LLMEndpoint): The language model used to generate the answer.
116
106
vector_db (VectorStore): The vector store used to store the processed files.
117
107
embedder (Embeddings): The embeddings used to create the index of the processed files.
118
-
119
-
120
108
"""
121
109
122
110
def __init__ (
123
111
self ,
124
112
* ,
125
113
name : str ,
126
- id : UUID ,
127
114
llm : LLMEndpoint ,
115
+ id : UUID | None = None ,
128
116
vector_db : VectorStore | None = None ,
129
117
embedder : Embeddings | None = None ,
130
118
storage : StorageBase | None = None ,
@@ -156,19 +144,15 @@ def print_info(self):
156
144
def load (cls , folder_path : str | Path ) -> Self :
157
145
"""
158
146
Load a brain from a folder path.
159
-
160
147
Args:
161
148
folder_path (str | Path): The path to the folder containing the brain.
162
-
163
149
Returns:
164
150
Brain: The brain loaded from the folder path.
165
-
166
151
Example:
167
152
```python
168
153
brain_loaded = Brain.load("path/to/brain")
169
154
brain_loaded.print_info()
170
155
```
171
-
172
156
"""
173
157
if isinstance (folder_path , str ):
174
158
folder_path = Path (folder_path )
@@ -217,16 +201,13 @@ def load(cls, folder_path: str | Path) -> Self:
217
201
vector_db = vector_db ,
218
202
)
219
203
220
- async def save (self , folder_path : str | Path ) -> str :
204
+ async def save (self , folder_path : str | Path ):
221
205
"""
222
206
Save the brain to a folder path.
223
-
224
207
Args:
225
208
folder_path (str | Path): The path to the folder where the brain will be saved.
226
-
227
209
Returns:
228
210
str: The path to the folder where the brain was saved.
229
-
230
211
Example:
231
212
```python
232
213
await brain.save("path/to/brain")
@@ -324,10 +305,9 @@ async def afrom_files(
324
305
embedder : Embeddings | None = None ,
325
306
skip_file_error : bool = False ,
326
307
processor_kwargs : dict [str , Any ] | None = None ,
327
- ) -> Self :
308
+ ):
328
309
"""
329
310
Create a brain from a list of file paths.
330
-
331
311
Args:
332
312
name (str): The name of the brain.
333
313
file_paths (list[str | Path]): The list of file paths to add to the brain.
@@ -337,10 +317,8 @@ async def afrom_files(
337
317
embedder (Embeddings | None): The embeddings used to create the index of the processed files.
338
318
skip_file_error (bool): Whether to skip files that cannot be processed.
339
319
processor_kwargs (dict[str, Any] | None): Additional arguments for the processor.
340
-
341
320
Returns:
342
321
Brain: The brain created from the file paths.
343
-
344
322
Example:
345
323
```python
346
324
brain = await Brain.afrom_files(name="My Brain", file_paths=["file1.pdf", "file2.pdf"])
@@ -429,18 +407,15 @@ async def afrom_langchain_documents(
429
407
) -> Self :
430
408
"""
431
409
Create a brain from a list of langchain documents.
432
-
433
410
Args:
434
411
name (str): The name of the brain.
435
412
langchain_documents (list[Document]): The list of langchain documents to add to the brain.
436
413
vector_db (VectorStore | None): The vector store used to store the processed files.
437
414
storage (StorageBase): The storage used to store the files.
438
415
llm (LLMEndpoint | None): The language model used to generate the answer.
439
416
embedder (Embeddings | None): The embeddings used to create the index of the processed files.
440
-
441
417
Returns:
442
418
Brain: The brain created from the langchain documents.
443
-
444
419
Example:
445
420
```python
446
421
from langchain_core.documents import Document
@@ -449,6 +424,7 @@ async def afrom_langchain_documents(
449
424
brain.print_info()
450
425
```
451
426
"""
427
+
452
428
if llm is None :
453
429
llm = default_llm ()
454
430
@@ -481,16 +457,13 @@ async def asearch(
481
457
) -> list [SearchResult ]:
482
458
"""
483
459
Search for relevant documents in the brain based on a query.
484
-
485
460
Args:
486
461
query (str | Document): The query to search for.
487
462
n_results (int): The number of results to return.
488
463
filter (Callable | Dict[str, Any] | None): The filter to apply to the search.
489
464
fetch_n_neighbors (int): The number of neighbors to fetch.
490
-
491
465
Returns:
492
466
list[SearchResult]: The list of retrieved chunks.
493
-
494
467
Example:
495
468
```python
496
469
brain = Brain.from_files(name="My Brain", file_paths=["file1.pdf", "file2.pdf"])
@@ -517,57 +490,6 @@ def add_file(self) -> None:
517
490
# add it to vectorstore
518
491
raise NotImplementedError
519
492
520
- def ask (
521
- self ,
522
- question : str ,
523
- retrieval_config : RetrievalConfig | None = None ,
524
- rag_pipeline : Type [Union [QuivrQARAG , QuivrQARAGLangGraph ]] | None = None ,
525
- list_files : list [QuivrKnowledge ] | None = None ,
526
- chat_history : ChatHistory | None = None ,
527
- ) -> ParsedRAGResponse :
528
- """
529
- Ask a question to the brain and get a generated answer.
530
-
531
- Args:
532
- question (str): The question to ask.
533
- retrieval_config (RetrievalConfig | None): The retrieval configuration (see RetrievalConfig docs).
534
- rag_pipeline (Type[Union[QuivrQARAG, QuivrQARAGLangGraph]] | None): The RAG pipeline to use.
535
- list_files (list[QuivrKnowledge] | None): The list of files to include in the RAG pipeline.
536
- chat_history (ChatHistory | None): The chat history to use.
537
-
538
- Returns:
539
- ParsedRAGResponse: The generated answer.
540
-
541
- Example:
542
- ```python
543
- brain = Brain.from_files(name="My Brain", file_paths=["file1.pdf", "file2.pdf"])
544
- answer = brain.ask("What is the meaning of life?")
545
- print(answer.answer)
546
- ```
547
- """
548
- async def collect_streamed_response ():
549
- full_answer = ""
550
- async for response in self .ask_streaming (
551
- question = question ,
552
- retrieval_config = retrieval_config ,
553
- rag_pipeline = rag_pipeline ,
554
- list_files = list_files ,
555
- chat_history = chat_history
556
- ):
557
- full_answer += response .answer
558
- return full_answer
559
-
560
- # Run the async function in the event loop
561
- loop = asyncio .get_event_loop ()
562
- full_answer = loop .run_until_complete (collect_streamed_response ())
563
-
564
- chat_history = self .default_chat if chat_history is None else chat_history
565
- chat_history .append (HumanMessage (content = question ))
566
- chat_history .append (AIMessage (content = full_answer ))
567
-
568
- # Return the final response
569
- return ParsedRAGResponse (answer = full_answer )
570
-
571
493
async def ask_streaming (
572
494
self ,
573
495
question : str ,
@@ -578,24 +500,20 @@ async def ask_streaming(
578
500
) -> AsyncGenerator [ParsedRAGChunkResponse , ParsedRAGChunkResponse ]:
579
501
"""
580
502
Ask a question to the brain and get a streamed generated answer.
581
-
582
503
Args:
583
504
question (str): The question to ask.
584
505
retrieval_config (RetrievalConfig | None): The retrieval configuration (see RetrievalConfig docs).
585
506
rag_pipeline (Type[Union[QuivrQARAG, QuivrQARAGLangGraph]] | None): The RAG pipeline to use.
586
- list_files (list[QuivrKnowledge] | None): The list of files to include in the RAG pipeline.
507
+ list_files (list[QuivrKnowledge] | None): The list of files to include in the RAG pipeline.
587
508
chat_history (ChatHistory | None): The chat history to use.
588
-
589
509
Returns:
590
510
AsyncGenerator[ParsedRAGChunkResponse, ParsedRAGChunkResponse]: The streamed generated answer.
591
-
592
511
Example:
593
512
```python
594
513
brain = Brain.from_files(name="My Brain", file_paths=["file1.pdf", "file2.pdf"])
595
514
async for chunk in brain.ask_streaming("What is the meaning of life?"):
596
515
print(chunk.answer)
597
516
```
598
-
599
517
"""
600
518
llm = self .llm
601
519
@@ -630,3 +548,64 @@ async def ask_streaming(
630
548
chat_history .append (AIMessage (content = full_answer ))
631
549
yield response
632
550
551
+ async def aask (
552
+ self ,
553
+ question : str ,
554
+ retrieval_config : RetrievalConfig | None = None ,
555
+ rag_pipeline : Type [Union [QuivrQARAG , QuivrQARAGLangGraph ]] | None = None ,
556
+ list_files : list [QuivrKnowledge ] | None = None ,
557
+ chat_history : ChatHistory | None = None ,
558
+ ) -> ParsedRAGResponse :
559
+ """
560
+ Synchronous version that asks a question to the brain and gets a generated answer.
561
+ Args:
562
+ question (str): The question to ask.
563
+ retrieval_config (RetrievalConfig | None): The retrieval configuration (see RetrievalConfig docs).
564
+ rag_pipeline (Type[Union[QuivrQARAG, QuivrQARAGLangGraph]] | None): The RAG pipeline to use.
565
+ list_files (list[QuivrKnowledge] | None): The list of files to include in the RAG pipeline.
566
+ chat_history (ChatHistory | None): The chat history to use.
567
+ Returns:
568
+ ParsedRAGResponse: The generated answer.
569
+ """
570
+ full_answer = ""
571
+
572
+ async for response in self .ask_streaming (
573
+ question = question ,
574
+ retrieval_config = retrieval_config ,
575
+ rag_pipeline = rag_pipeline ,
576
+ list_files = list_files ,
577
+ chat_history = chat_history ,
578
+ ):
579
+ full_answer += response .answer
580
+
581
+ return ParsedRAGResponse (answer = full_answer )
582
+
583
+ def ask (
584
+ self ,
585
+ question : str ,
586
+ retrieval_config : RetrievalConfig | None = None ,
587
+ rag_pipeline : Type [Union [QuivrQARAG , QuivrQARAGLangGraph ]] | None = None ,
588
+ list_files : list [QuivrKnowledge ] | None = None ,
589
+ chat_history : ChatHistory | None = None ,
590
+ ) -> ParsedRAGResponse :
591
+ """
592
+ Fully synchronous version that asks a question to the brain and gets a generated answer.
593
+ Args:
594
+ question (str): The question to ask.
595
+ retrieval_config (RetrievalConfig | None): The retrieval configuration (see RetrievalConfig docs).
596
+ rag_pipeline (Type[Union[QuivrQARAG, QuivrQARAGLangGraph]] | None): The RAG pipeline to use.
597
+ list_files (list[QuivrKnowledge] | None): The list of files to include in the RAG pipeline.
598
+ chat_history (ChatHistory | None): The chat history to use.
599
+ Returns:
600
+ ParsedRAGResponse: The generated answer.
601
+ """
602
+ loop = asyncio .get_event_loop ()
603
+ return loop .run_until_complete (
604
+ self .aask (
605
+ question = question ,
606
+ retrieval_config = retrieval_config ,
607
+ rag_pipeline = rag_pipeline ,
608
+ list_files = list_files ,
609
+ chat_history = chat_history ,
610
+ )
611
+ )
0 commit comments