microsoft · thinkall · Jun 11, 2024 · Jun 4, 2024 · Jun 4, 2024 · Jun 4, 2024
diff --git a/autogen/agentchat/contrib/vectordb/pgvectordb.py b/autogen/agentchat/contrib/vectordb/pgvectordb.py
@@ -80,6 +80,14 @@ def __init__(
         self.metadata = metadata if metadata else {"hnsw:space": "ip", "hnsw:construction_ef": 32, "hnsw:M": 16}
         self.documents = ""
         self.get_or_create = get_or_create
+        # This will get the model dimension size by computing the embeddings dimensions
+        sentences = [
+            "The weather is lovely today in paradise.",
+            "It's so sunny outside in the garden!",
+            "He reached his max potential within his lifetime.",
+        ]
+        embeddings = self.embedding_function.encode(sentences)
+        self.dimension = embeddings.shape[1]
 
     def set_collection_name(self, collection_name) -> str:
         name = re.sub("-", "_", collection_name)
@@ -304,7 +312,7 @@ def get(
                 )
         except (psycopg.errors.UndefinedTable, psycopg.errors.UndefinedColumn) as e:
             logger.info(f"Error executing select on non-existent table: {self.name}. Creating it instead. Error: {e}")
-            self.create_collection(collection_name=self.name)
+            self.create_collection(collection_name=self.name, dimension=self.dimension)
             logger.info(f"Created table {self.name}")
 
         cursor.close()
@@ -526,22 +534,29 @@ def delete_collection(self, collection_name: Optional[str] = None) -> None:
         cursor.execute(f"DROP TABLE IF EXISTS {self.name}")
         cursor.close()
 
-    def create_collection(self, collection_name: Optional[str] = None) -> None:
+    def create_collection(
+        self, collection_name: Optional[str] = None, dimension: Optional[Union[str, int]] = 384
+    ) -> None:
         """
         Create a new collection.
 
         Args:
             collection_name (Optional[str]): The name of the new collection.
+            dimension (Optional[Union[str, int]]): The dimension size of the sentence embedding model
 
         Returns:
             None
         """
         if collection_name:
             self.name = collection_name
+
+        if dimension:
+            self.dimension = dimension
+
         cursor = self.client.cursor()
         cursor.execute(
             f"CREATE TABLE {self.name} ("
-            f"documents text, id CHAR(8) PRIMARY KEY, metadatas JSONB, embedding vector(384));"
+            f"documents text, id CHAR(8) PRIMARY KEY, metadatas JSONB, embedding vector({self.dimension}));"
             f"CREATE INDEX "
             f'ON {self.name} USING hnsw (embedding vector_l2_ops) WITH (m = {self.metadata["hnsw:M"]}, '
             f'ef_construction = {self.metadata["hnsw:construction_ef"]});'
@@ -602,6 +617,37 @@ def __init__(
 
         Returns:
             None
+
+        Example:
+            RetrieveUserProxyAgent(
+                name="ragproxyagent",
+                human_input_mode="NEVER",
+                max_consecutive_auto_reply=3,
+                retrieve_config={
+                    "task": "code",
+                    "docs_path": [
+                        "https://raw.githubusercontent.com/microsoft/FLAML/main/website/docs/Examples/Integrate%20-%20Spark.md",
+                        "https://raw.githubusercontent.com/microsoft/FLAML/main/website/docs/Research.md",
+                        "https://raw.githubusercontent.com/Knuckles-Team/geniusbot/main/README.md",
+                        "https://raw.githubusercontent.com/Knuckles-Team/repository-manager/main/README.md",
+                        "https://raw.githubusercontent.com/Knuckles-Team/gitlab-api/main/README.md",
+                        "https://raw.githubusercontent.com/Knuckles-Team/media-downloader/main/README.md",
+                        os.path.join(os.path.abspath(""), "..", "website", "docs"),
+                    ],
+                    "custom_text_types": ["non-existent-type"],
+                    "chunk_token_size": 2000,
+                    "model": config_list[0]["model"],
+                    "vector_db": "pgvector",  # PGVector database
+                    "collection_name": "test_collection",
+                    "db_config": {
+                        "connection_string": "postgresql://postgres:postgres@localhost:5432/postgres",
+                    },
+                    "embedding_function": "all-distilroberta-v1",
+                    "get_or_create": True,  # set to False if you don't want to reuse an existing collection
+                    "overwrite": False,  # set to True if you want to overwrite an existing collection
+                },
+                code_execution_config=False,  # set to False if you don't want to execute the code
+            )
         """
         self.client = self.establish_connection(
             conn=conn,
@@ -618,6 +664,14 @@ def __init__(
             self.embedding_function = (
                 SentenceTransformer(self.model_name) if embedding_function is None else embedding_function
             )
+            # This will get the model dimension size by computing the embeddings dimensions
+            sentences = [
+                "The weather is lovely today in paradise.",
+                "It's so sunny outside in the garden!",
+                "He reached his max potential within his lifetime.",
+            ]
+            embeddings = self.embedding_function.encode(sentences)
+            self.dimension = embeddings.shape[1]
         except Exception as e:
             logger.error(
                 f"Validate the model name entered: {self.model_name} "