Skip to content

Commit 285fe5b

Browse files
feat: websearch, tool use, user intent, dynamic retrieval, multiple questions (#3424)
# Description This PR includes far too many new features: - detection of user intent (closes CORE-211) - treating multiple questions in parallel (closes CORE-212) - using the chat history when answering a question (closes CORE-213) - filtering of retrieved chunks by relevance threshold (closes CORE-217) - dynamic retrieval of chunks (closes CORE-218) - enabling web search via Tavily (closes CORE-220) - enabling agent / assistant to activate tools when relevant to complete the user task (closes CORE-224) Also closes CORE-205 ## Checklist before requesting a review Please delete options that are not relevant. - [ ] My code follows the style guidelines of this project - [ ] I have performed a self-review of my code - [ ] I have commented hard-to-understand areas - [ ] I have ideally added tests that prove my fix is effective or that my feature works - [ ] New and existing unit tests pass locally with my changes - [ ] Any dependent changes have been merged ## Screenshots (if appropriate): --------- Co-authored-by: Stan Girard <[email protected]>
1 parent 5401c01 commit 285fe5b

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

43 files changed

+2165
-1452
lines changed

.github/workflows/backend-core-tests.yml

-2
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,4 @@ jobs:
4141
sudo apt-get update
4242
sudo apt-get install -y libmagic-dev poppler-utils libreoffice tesseract-ocr pandoc
4343
cd core
44-
rye run python -c "from unstructured.nlp.tokenize import download_nltk_packages; download_nltk_packages()"
45-
rye run python -c "import nltk;nltk.download('punkt_tab'); nltk.download('averaged_perceptron_tagger_eng')"
4644
rye test -p quivr-core

core/pyproject.toml

+2-1
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ dependencies = [
99
"pydantic>=2.8.2",
1010
"langchain-core>=0.2.38",
1111
"langchain>=0.2.14,<0.3.0",
12-
"langgraph>=0.2.14",
12+
"langgraph>=0.2.38",
1313
"httpx>=0.27.0",
1414
"rich>=13.7.1",
1515
"tiktoken>=0.7.0",
@@ -21,6 +21,7 @@ dependencies = [
2121
"types-pyyaml>=6.0.12.20240808",
2222
"transformers[sentencepiece]>=4.44.2",
2323
"faiss-cpu>=1.8.0.post1",
24+
"rapidfuzz>=3.10.1",
2425
]
2526
readme = "README.md"
2627
requires-python = ">= 3.11"

core/quivr_core/brain/brain.py

+72-93
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,9 @@
1010
from langchain_core.embeddings import Embeddings
1111
from langchain_core.messages import AIMessage, HumanMessage
1212
from langchain_core.vectorstores import VectorStore
13+
from quivr_core.rag.entities.models import ParsedRAGResponse
1314
from langchain_openai import OpenAIEmbeddings
15+
from quivr_core.rag.quivr_rag import QuivrQARAG
1416
from rich.console import Console
1517
from rich.panel import Panel
1618

@@ -22,19 +24,17 @@
2224
LocalStorageConfig,
2325
TransparentStorageConfig,
2426
)
25-
from quivr_core.chat import ChatHistory
26-
from quivr_core.config import RetrievalConfig
27+
from quivr_core.rag.entities.chat import ChatHistory
28+
from quivr_core.rag.entities.config import RetrievalConfig
2729
from quivr_core.files.file import load_qfile
2830
from quivr_core.llm import LLMEndpoint
29-
from quivr_core.models import (
31+
from quivr_core.rag.entities.models import (
3032
ParsedRAGChunkResponse,
31-
ParsedRAGResponse,
3233
QuivrKnowledge,
3334
SearchResult,
3435
)
3536
from quivr_core.processor.registry import get_processor_class
36-
from quivr_core.quivr_rag import QuivrQARAG
37-
from quivr_core.quivr_rag_langgraph import QuivrQARAGLangGraph
37+
from quivr_core.rag.quivr_rag_langgraph import QuivrQARAGLangGraph
3838
from quivr_core.storage.local_storage import LocalStorage, TransparentStorage
3939
from quivr_core.storage.storage_base import StorageBase
4040

@@ -49,19 +49,15 @@ async def process_files(
4949
"""
5050
Process files in storage.
5151
This function takes a StorageBase and return a list of langchain documents.
52-
5352
Args:
5453
storage (StorageBase): The storage containing the files to process.
5554
skip_file_error (bool): Whether to skip files that cannot be processed.
5655
processor_kwargs (dict[str, Any]): Additional arguments for the processor.
57-
5856
Returns:
5957
list[Document]: List of processed documents in the Langchain Document format.
60-
6158
Raises:
6259
ValueError: If a file cannot be processed and skip_file_error is False.
6360
Exception: If no processor is found for a file of a specific type and skip_file_error is False.
64-
6561
"""
6662

6763
knowledge = []
@@ -91,40 +87,32 @@ async def process_files(
9187
class Brain:
9288
"""
9389
A class representing a Brain.
94-
9590
This class allows for the creation of a Brain, which is a collection of knowledge one wants to retrieve information from.
96-
9791
A Brain is set to:
98-
9992
* Store files in the storage of your choice (local, S3, etc.)
10093
* Process the files in the storage to extract text and metadata in a wide range of format.
10194
* Store the processed files in the vector store of your choice (FAISS, PGVector, etc.) - default to FAISS.
10295
* Create an index of the processed files.
10396
* Use the *Quivr* workflow for the retrieval augmented generation.
104-
10597
A Brain is able to:
106-
10798
* Search for information in the vector store.
10899
* Answer questions about the knowledges in the Brain.
109100
* Stream the answer to the question.
110-
111101
Attributes:
112102
name (str): The name of the brain.
113103
id (UUID): The unique identifier of the brain.
114104
storage (StorageBase): The storage used to store the files.
115105
llm (LLMEndpoint): The language model used to generate the answer.
116106
vector_db (VectorStore): The vector store used to store the processed files.
117107
embedder (Embeddings): The embeddings used to create the index of the processed files.
118-
119-
120108
"""
121109

122110
def __init__(
123111
self,
124112
*,
125113
name: str,
126-
id: UUID,
127114
llm: LLMEndpoint,
115+
id: UUID | None = None,
128116
vector_db: VectorStore | None = None,
129117
embedder: Embeddings | None = None,
130118
storage: StorageBase | None = None,
@@ -156,19 +144,15 @@ def print_info(self):
156144
def load(cls, folder_path: str | Path) -> Self:
157145
"""
158146
Load a brain from a folder path.
159-
160147
Args:
161148
folder_path (str | Path): The path to the folder containing the brain.
162-
163149
Returns:
164150
Brain: The brain loaded from the folder path.
165-
166151
Example:
167152
```python
168153
brain_loaded = Brain.load("path/to/brain")
169154
brain_loaded.print_info()
170155
```
171-
172156
"""
173157
if isinstance(folder_path, str):
174158
folder_path = Path(folder_path)
@@ -217,16 +201,13 @@ def load(cls, folder_path: str | Path) -> Self:
217201
vector_db=vector_db,
218202
)
219203

220-
async def save(self, folder_path: str | Path) -> str:
204+
async def save(self, folder_path: str | Path):
221205
"""
222206
Save the brain to a folder path.
223-
224207
Args:
225208
folder_path (str | Path): The path to the folder where the brain will be saved.
226-
227209
Returns:
228210
str: The path to the folder where the brain was saved.
229-
230211
Example:
231212
```python
232213
await brain.save("path/to/brain")
@@ -324,10 +305,9 @@ async def afrom_files(
324305
embedder: Embeddings | None = None,
325306
skip_file_error: bool = False,
326307
processor_kwargs: dict[str, Any] | None = None,
327-
) -> Self:
308+
):
328309
"""
329310
Create a brain from a list of file paths.
330-
331311
Args:
332312
name (str): The name of the brain.
333313
file_paths (list[str | Path]): The list of file paths to add to the brain.
@@ -337,10 +317,8 @@ async def afrom_files(
337317
embedder (Embeddings | None): The embeddings used to create the index of the processed files.
338318
skip_file_error (bool): Whether to skip files that cannot be processed.
339319
processor_kwargs (dict[str, Any] | None): Additional arguments for the processor.
340-
341320
Returns:
342321
Brain: The brain created from the file paths.
343-
344322
Example:
345323
```python
346324
brain = await Brain.afrom_files(name="My Brain", file_paths=["file1.pdf", "file2.pdf"])
@@ -429,18 +407,15 @@ async def afrom_langchain_documents(
429407
) -> Self:
430408
"""
431409
Create a brain from a list of langchain documents.
432-
433410
Args:
434411
name (str): The name of the brain.
435412
langchain_documents (list[Document]): The list of langchain documents to add to the brain.
436413
vector_db (VectorStore | None): The vector store used to store the processed files.
437414
storage (StorageBase): The storage used to store the files.
438415
llm (LLMEndpoint | None): The language model used to generate the answer.
439416
embedder (Embeddings | None): The embeddings used to create the index of the processed files.
440-
441417
Returns:
442418
Brain: The brain created from the langchain documents.
443-
444419
Example:
445420
```python
446421
from langchain_core.documents import Document
@@ -449,6 +424,7 @@ async def afrom_langchain_documents(
449424
brain.print_info()
450425
```
451426
"""
427+
452428
if llm is None:
453429
llm = default_llm()
454430

@@ -481,16 +457,13 @@ async def asearch(
481457
) -> list[SearchResult]:
482458
"""
483459
Search for relevant documents in the brain based on a query.
484-
485460
Args:
486461
query (str | Document): The query to search for.
487462
n_results (int): The number of results to return.
488463
filter (Callable | Dict[str, Any] | None): The filter to apply to the search.
489464
fetch_n_neighbors (int): The number of neighbors to fetch.
490-
491465
Returns:
492466
list[SearchResult]: The list of retrieved chunks.
493-
494467
Example:
495468
```python
496469
brain = Brain.from_files(name="My Brain", file_paths=["file1.pdf", "file2.pdf"])
@@ -517,57 +490,6 @@ def add_file(self) -> None:
517490
# add it to vectorstore
518491
raise NotImplementedError
519492

520-
def ask(
521-
self,
522-
question: str,
523-
retrieval_config: RetrievalConfig | None = None,
524-
rag_pipeline: Type[Union[QuivrQARAG, QuivrQARAGLangGraph]] | None = None,
525-
list_files: list[QuivrKnowledge] | None = None,
526-
chat_history: ChatHistory | None = None,
527-
) -> ParsedRAGResponse:
528-
"""
529-
Ask a question to the brain and get a generated answer.
530-
531-
Args:
532-
question (str): The question to ask.
533-
retrieval_config (RetrievalConfig | None): The retrieval configuration (see RetrievalConfig docs).
534-
rag_pipeline (Type[Union[QuivrQARAG, QuivrQARAGLangGraph]] | None): The RAG pipeline to use.
535-
list_files (list[QuivrKnowledge] | None): The list of files to include in the RAG pipeline.
536-
chat_history (ChatHistory | None): The chat history to use.
537-
538-
Returns:
539-
ParsedRAGResponse: The generated answer.
540-
541-
Example:
542-
```python
543-
brain = Brain.from_files(name="My Brain", file_paths=["file1.pdf", "file2.pdf"])
544-
answer = brain.ask("What is the meaning of life?")
545-
print(answer.answer)
546-
```
547-
"""
548-
async def collect_streamed_response():
549-
full_answer = ""
550-
async for response in self.ask_streaming(
551-
question=question,
552-
retrieval_config=retrieval_config,
553-
rag_pipeline=rag_pipeline,
554-
list_files=list_files,
555-
chat_history=chat_history
556-
):
557-
full_answer += response.answer
558-
return full_answer
559-
560-
# Run the async function in the event loop
561-
loop = asyncio.get_event_loop()
562-
full_answer = loop.run_until_complete(collect_streamed_response())
563-
564-
chat_history = self.default_chat if chat_history is None else chat_history
565-
chat_history.append(HumanMessage(content=question))
566-
chat_history.append(AIMessage(content=full_answer))
567-
568-
# Return the final response
569-
return ParsedRAGResponse(answer=full_answer)
570-
571493
async def ask_streaming(
572494
self,
573495
question: str,
@@ -578,24 +500,20 @@ async def ask_streaming(
578500
) -> AsyncGenerator[ParsedRAGChunkResponse, ParsedRAGChunkResponse]:
579501
"""
580502
Ask a question to the brain and get a streamed generated answer.
581-
582503
Args:
583504
question (str): The question to ask.
584505
retrieval_config (RetrievalConfig | None): The retrieval configuration (see RetrievalConfig docs).
585506
rag_pipeline (Type[Union[QuivrQARAG, QuivrQARAGLangGraph]] | None): The RAG pipeline to use.
586-
list_files (list[QuivrKnowledge] | None): The list of files to include in the RAG pipeline.
507+
list_files (list[QuivrKnowledge] | None): The list of files to include in the RAG pipeline.
587508
chat_history (ChatHistory | None): The chat history to use.
588-
589509
Returns:
590510
AsyncGenerator[ParsedRAGChunkResponse, ParsedRAGChunkResponse]: The streamed generated answer.
591-
592511
Example:
593512
```python
594513
brain = Brain.from_files(name="My Brain", file_paths=["file1.pdf", "file2.pdf"])
595514
async for chunk in brain.ask_streaming("What is the meaning of life?"):
596515
print(chunk.answer)
597516
```
598-
599517
"""
600518
llm = self.llm
601519

@@ -630,3 +548,64 @@ async def ask_streaming(
630548
chat_history.append(AIMessage(content=full_answer))
631549
yield response
632550

551+
async def aask(
552+
self,
553+
question: str,
554+
retrieval_config: RetrievalConfig | None = None,
555+
rag_pipeline: Type[Union[QuivrQARAG, QuivrQARAGLangGraph]] | None = None,
556+
list_files: list[QuivrKnowledge] | None = None,
557+
chat_history: ChatHistory | None = None,
558+
) -> ParsedRAGResponse:
559+
"""
560+
Synchronous version that asks a question to the brain and gets a generated answer.
561+
Args:
562+
question (str): The question to ask.
563+
retrieval_config (RetrievalConfig | None): The retrieval configuration (see RetrievalConfig docs).
564+
rag_pipeline (Type[Union[QuivrQARAG, QuivrQARAGLangGraph]] | None): The RAG pipeline to use.
565+
list_files (list[QuivrKnowledge] | None): The list of files to include in the RAG pipeline.
566+
chat_history (ChatHistory | None): The chat history to use.
567+
Returns:
568+
ParsedRAGResponse: The generated answer.
569+
"""
570+
full_answer = ""
571+
572+
async for response in self.ask_streaming(
573+
question=question,
574+
retrieval_config=retrieval_config,
575+
rag_pipeline=rag_pipeline,
576+
list_files=list_files,
577+
chat_history=chat_history,
578+
):
579+
full_answer += response.answer
580+
581+
return ParsedRAGResponse(answer=full_answer)
582+
583+
def ask(
584+
self,
585+
question: str,
586+
retrieval_config: RetrievalConfig | None = None,
587+
rag_pipeline: Type[Union[QuivrQARAG, QuivrQARAGLangGraph]] | None = None,
588+
list_files: list[QuivrKnowledge] | None = None,
589+
chat_history: ChatHistory | None = None,
590+
) -> ParsedRAGResponse:
591+
"""
592+
Fully synchronous version that asks a question to the brain and gets a generated answer.
593+
Args:
594+
question (str): The question to ask.
595+
retrieval_config (RetrievalConfig | None): The retrieval configuration (see RetrievalConfig docs).
596+
rag_pipeline (Type[Union[QuivrQARAG, QuivrQARAGLangGraph]] | None): The RAG pipeline to use.
597+
list_files (list[QuivrKnowledge] | None): The list of files to include in the RAG pipeline.
598+
chat_history (ChatHistory | None): The chat history to use.
599+
Returns:
600+
ParsedRAGResponse: The generated answer.
601+
"""
602+
loop = asyncio.get_event_loop()
603+
return loop.run_until_complete(
604+
self.aask(
605+
question=question,
606+
retrieval_config=retrieval_config,
607+
rag_pipeline=rag_pipeline,
608+
list_files=list_files,
609+
chat_history=chat_history,
610+
)
611+
)

core/quivr_core/brain/brain_defaults.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
from langchain_core.embeddings import Embeddings
55
from langchain_core.vectorstores import VectorStore
66

7-
from quivr_core.config import LLMEndpointConfig
7+
from quivr_core.rag.entities.config import DefaultModelSuppliers, LLMEndpointConfig
88
from quivr_core.llm import LLMEndpoint
99

1010
logger = logging.getLogger("quivr_core")
@@ -46,7 +46,9 @@ def default_embedder() -> Embeddings:
4646
def default_llm() -> LLMEndpoint:
4747
try:
4848
logger.debug("Loaded ChatOpenAI as default LLM for brain")
49-
llm = LLMEndpoint.from_config(LLMEndpointConfig())
49+
llm = LLMEndpoint.from_config(
50+
LLMEndpointConfig(supplier=DefaultModelSuppliers.OPENAI, model="gpt-4o")
51+
)
5052
return llm
5153

5254
except ImportError as e:

0 commit comments

Comments
 (0)