Skip to content

Commit ff41489

Browse files
authored
Fix docstring of get_or_create (#583)
* Fix docstring of get_or_create * Improve docstring
1 parent 805d845 commit ff41489

File tree

3 files changed

+8
-10
lines changed

3 files changed

+8
-10
lines changed

autogen/agentchat/contrib/retrieve_user_proxy_agent.py

+5-7
Original file line numberDiff line numberDiff line change
@@ -122,8 +122,8 @@ def __init__(
122122
- customized_answer_prefix (Optional, str): the customized answer prefix for the retrieve chat. Default is "".
123123
If not "" and the customized_answer_prefix is not in the answer, `Update Context` will be triggered.
124124
- update_context (Optional, bool): if False, will not apply `Update Context` for interactive retrieval. Default is True.
125-
- get_or_create (Optional, bool): if True, will create/recreate a collection for the retrieve chat.
126-
This is the same as that used in chromadb. Default is False. Will be set to False if docs_path is None.
125+
- get_or_create (Optional, bool): if True, will create/return a collection for the retrieve chat. This is the same as that used in chromadb.
126+
Default is False. Will raise ValueError if the collection already exists and get_or_create is False. Will be set to True if docs_path is None.
127127
- custom_token_count_function(Optional, Callable): a custom function to count the number of tokens in a string.
128128
The function should take (text:str, model:str) as input and return the token_count(int). the retrieve_config["model"] will be passed in the function.
129129
Default is autogen.token_count_utils.count_token that uses tiktoken, which may not be accurate for non-OpenAI models.
@@ -178,9 +178,7 @@ def retrieve_docs(self, problem: str, n_results: int = 20, search_string: str =
178178
self.customized_prompt = self._retrieve_config.get("customized_prompt", None)
179179
self.customized_answer_prefix = self._retrieve_config.get("customized_answer_prefix", "").upper()
180180
self.update_context = self._retrieve_config.get("update_context", True)
181-
self._get_or_create = (
182-
self._retrieve_config.get("get_or_create", False) if self._docs_path is not None else False
183-
)
181+
self._get_or_create = self._retrieve_config.get("get_or_create", False) if self._docs_path is not None else True
184182
self.custom_token_count_function = self._retrieve_config.get("custom_token_count_function", count_token)
185183
self.custom_text_split_function = self._retrieve_config.get("custom_text_split_function", None)
186184
self._context_max_tokens = self._max_tokens * 0.8
@@ -360,7 +358,7 @@ def retrieve_docs(self, problem: str, n_results: int = 20, search_string: str =
360358
n_results (int): the number of results to be retrieved.
361359
search_string (str): only docs containing this string will be retrieved.
362360
"""
363-
if not self._collection or self._get_or_create:
361+
if not self._collection or not self._get_or_create:
364362
print("Trying to create collection.")
365363
self._client = create_vector_db_from_dir(
366364
dir_path=self._docs_path,
@@ -375,7 +373,7 @@ def retrieve_docs(self, problem: str, n_results: int = 20, search_string: str =
375373
custom_text_split_function=self.custom_text_split_function,
376374
)
377375
self._collection = True
378-
self._get_or_create = False
376+
self._get_or_create = True
379377

380378
results = query_vector_db(
381379
query_texts=[problem],

autogen/retrieve_utils.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -242,7 +242,7 @@ def create_vector_db_from_dir(
242242
db_path (Optional, str): the path to the chromadb. Default is "/tmp/chromadb.db".
243243
collection_name (Optional, str): the name of the collection. Default is "all-my-documents".
244244
get_or_create (Optional, bool): Whether to get or create the collection. Default is False. If True, the collection
245-
will be recreated if it already exists.
245+
will be returned if it already exists. Will raise ValueError if the collection already exists and get_or_create is False.
246246
chunk_mode (Optional, str): the chunk mode. Default is "multi_lines".
247247
must_break_at_empty_line (Optional, bool): Whether to break at empty line. Default is True.
248248
embedding_model (Optional, str): the embedding model to use. Default is "all-MiniLM-L6-v2". Will be ignored if

notebook/agentchat_RetrieveChat.ipynb

+2-2
Original file line numberDiff line numberDiff line change
@@ -212,7 +212,7 @@
212212
" \"model\": config_list[0][\"model\"],\n",
213213
" \"client\": chromadb.PersistentClient(path=\"/tmp/chromadb\"),\n",
214214
" \"embedding_model\": \"all-mpnet-base-v2\",\n",
215-
" \"get_or_create\": False, # set to True if you want to recreate the collection\n",
215+
" \"get_or_create\": True, # set to False if you don't want to reuse an existing collection, but you'll need to remove the collection manually\n",
216216
" },\n",
217217
")"
218218
]
@@ -4172,7 +4172,7 @@
41724172
"name": "python",
41734173
"nbconvert_exporter": "python",
41744174
"pygments_lexer": "ipython3",
4175-
"version": "3.10.12"
4175+
"version": "3.10.13"
41764176
}
41774177
},
41784178
"nbformat": 4,

0 commit comments

Comments
 (0)