Skip to content

Commit

Permalink
feat: add param extra_payload for VectorRetriever.process() (#1161)
Browse files Browse the repository at this point in the history
Co-authored-by: Wendong <[email protected]>
  • Loading branch information
koch3092 and Wendong-Fan authored Nov 8, 2024
1 parent 768dc3d commit 7468dde
Show file tree
Hide file tree
Showing 2 changed files with 6 additions and 2 deletions.
6 changes: 5 additions & 1 deletion camel/retrievers/vector_retriever.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ def process(
max_characters: int = 500,
embed_batch: int = 50,
should_chunk: bool = True,
extra_info: Optional[dict] = None,
**kwargs: Any,
) -> None:
r"""Processes content from local file path, remote URL, string
Expand All @@ -93,6 +94,8 @@ def process(
embed_batch (int): Size of batch for embeddings. Defaults to `50`.
should_chunk (bool): If True, divide the content into chunks,
otherwise skip chunking. Defaults to True.
extra_info (Optional[dict]): Extra information to be added
to the payload. Defaults to None.
**kwargs (Any): Additional keyword arguments for content parsing.
"""
from unstructured.documents.elements import Element
Expand Down Expand Up @@ -153,12 +156,13 @@ def process(
chunk_metadata = {"metadata": chunk.metadata.to_dict()}
# Remove the 'orig_elements' key if it exists
chunk_metadata["metadata"].pop("orig_elements", "")

extra_info = extra_info or {}
chunk_text = {"text": str(chunk)}
combined_dict = {
**content_path_info,
**chunk_metadata,
**chunk_text,
**extra_info,
}

records.append(
Expand Down
2 changes: 1 addition & 1 deletion test/retrievers/test_cohere_rerank_retriever.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ def test_query(cohere_rerank, mock_retrieved_result):
query=query, retrieved_result=mock_retrieved_result, top_k=1
)
assert len(result) == 1
assert result[0]["similarity score"] == 0.9999998
assert result[0]["similarity score"] == 0.9999999
assert (
'by Isaac Asimov in his science fiction stories' in result[0]["text"]
)

0 comments on commit 7468dde

Please sign in to comment.