Skip to content

Commit 59e515d

Browse files
committed
docs: notebook for QdrantRetrieveUserProxyAgent
1 parent c579230 commit 59e515d

File tree

2 files changed

+3323
-8
lines changed

2 files changed

+3323
-8
lines changed

autogen/agentchat/contrib/qdrant_retrieve_user_proxy_agent.py

+8-8
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ def __init__(
4444
To use default config, set to None. Otherwise, set to a dictionary with the following keys:
4545
- task (Optional, str): the task of the retrieve chat. Possible values are "code", "qa" and "default". System
4646
prompt will be different for different tasks. The default value is `default`, which supports both code and qa.
47-
- client (Optional, qdrant_client.QdrantClient(":memory:")): A QdrantClient instance. If not provided, an in-memory instance will be assigned.`
47+
- client (Optional, qdrant_client.QdrantClient(":memory:")): A QdrantClient instance. If not provided, an in-memory instance will be assigned. Not recommended for production.
4848
will be used. If you want to use other vector db, extend this class and override the `retrieve_docs` function.
4949
- docs_path (Optional, str): the path to the docs directory. It can also be the path to a single file,
5050
or the url to a single file. Default is None, which works only if the collection is already created.
@@ -61,7 +61,7 @@ def __init__(
6161
- must_break_at_empty_line (Optional, bool): chunk will only break at empty line if True. Default is True.
6262
If chunk_mode is "one_line", this parameter will be ignored.
6363
- embedding_model (Optional, str): the embedding model to use for the retrieve chat.
64-
If key not provided, a default model `BAAI/bge-base-en-v1.5` will be used. All available models
64+
If key not provided, a default model `BAAI/bge-small-en-v1.5` will be used. All available models
6565
can be found at `https://qdrant.github.io/fastembed/examples/Supported_Models/`.
6666
- customized_prompt (Optional, str): the customized prompt for the retrieve chat. Default is None.
6767
- customized_answer_prefix (Optional, str): the customized answer prefix for the retrieve chat. Default is "".
@@ -82,7 +82,7 @@ def __init__(
8282
"""
8383
super().__init__(name, human_input_mode, is_termination_msg, retrieve_config, **kwargs)
8484
self._client = self._retrieve_config.get("client", QdrantClient(":memory:"))
85-
self._embedding_model = self._retrieve_config.get("embedding_model", "BAAI/bge-base-en-v1.5")
85+
self._embedding_model = self._retrieve_config.get("embedding_model", "BAAI/bge-small-en-v1.5")
8686
# Uses all available CPU cores to encode data when set to 0
8787
self._parallel = self._retrieve_config.get("parallel", 0)
8888
self._on_disk = self._retrieve_config.get("on_disk", False)
@@ -135,7 +135,7 @@ def create_qdrant_from_dir(
135135
collection_name: str = "all-my-documents",
136136
chunk_mode: str = "multi_lines",
137137
must_break_at_empty_line: bool = True,
138-
embedding_model: str = "BAAI/bge-base-en-v1.5",
138+
embedding_model: str = "BAAI/bge-small-en-v1.5",
139139
custom_text_split_function: Callable = None,
140140
parallel: int = 0,
141141
on_disk: bool = False,
@@ -154,11 +154,11 @@ def create_qdrant_from_dir(
154154
collection_name (Optional, str): the name of the collection. Default is "all-my-documents".
155155
chunk_mode (Optional, str): the chunk mode. Default is "multi_lines".
156156
must_break_at_empty_line (Optional, bool): Whether to break at empty line. Default is True.
157-
embedding_model (Optional, str): the embedding model to use. Default is "BAAI/bge-base-en-v1.5". The list of all the available models can be at https://qdrant.github.io/fastembed/examples/Supported_Models/.
157+
embedding_model (Optional, str): the embedding model to use. Default is "BAAI/bge-small-en-v1.5". The list of all the available models can be at https://qdrant.github.io/fastembed/examples/Supported_Models/.
158158
parallel (Optional, int): How many parallel workers to use for embedding. Defaults to the number of CPU cores
159159
on_disk (Optional, bool): Whether to store the collection on disk. Default is False.
160-
quantization_config: Quantization configuration. If None, quantization will be disabled.
161-
hnsw_config: HNSW configuration. If None, default configuration will be used.
160+
quantization_config: Quantization configuration. If None, quantization will be disabled. Ref: https://qdrant.github.io/qdrant/redoc/index.html#tag/collections/operation/create_collection
161+
hnsw_config: HNSW configuration. If None, default configuration will be used. Ref: https://qdrant.github.io/qdrant/redoc/index.html#tag/collections/operation/create_collection
162162
payload_indexing: Whether to create a payload index for the document field. Default is False.
163163
qdrant_client_options: (Optional, dict): the options for instantiating the qdrant client. Reference: https://github.com/qdrant/qdrant-client/blob/master/qdrant_client/qdrant_client.py#L36-L58.
164164
"""
@@ -214,7 +214,7 @@ def query_qdrant(
214214
client: QdrantClient = None,
215215
collection_name: str = "all-my-documents",
216216
search_string: str = "",
217-
embedding_model: str = "BAAI/bge-base-en-v1.5",
217+
embedding_model: str = "BAAI/bge-small-en-v1.5",
218218
qdrant_client_options: Optional[Dict] = {},
219219
) -> List[List[QueryResponse]]:
220220
"""Perform a similarity search with filters on a Qdrant collection

0 commit comments

Comments
 (0)