From 6819799ebe435e8e512bf07f1cd1d52f58a1b2f9 Mon Sep 17 00:00:00 2001 From: BillSchumacher <34168009+BillSchumacher@users.noreply.github.com> Date: Thu, 6 Apr 2023 22:25:17 -0500 Subject: [PATCH 01/15] Create an abstract MemoryProviderSingleton class. Pass config instead of instantiating a new one where used. --- scripts/commands.py | 4 +-- scripts/config.py | 7 ++++- scripts/main.py | 4 +-- scripts/memory/__init__.py | 0 scripts/memory/base.py | 34 +++++++++++++++++++++++ scripts/{memory.py => memory/pinecone.py} | 18 +++--------- scripts/memory/redismem.py | 0 7 files changed, 48 insertions(+), 19 deletions(-) create mode 100644 scripts/memory/__init__.py create mode 100644 scripts/memory/base.py rename scripts/{memory.py => memory/pinecone.py} (80%) create mode 100644 scripts/memory/redismem.py diff --git a/scripts/commands.py b/scripts/commands.py index fc10d1d052e3..f00875f06354 100644 --- a/scripts/commands.py +++ b/scripts/commands.py @@ -1,6 +1,6 @@ import browse import json -from memory import PineconeMemory +from memory.pinecone import PineconeMemory import datetime import agent_manager as agents import speak @@ -52,7 +52,7 @@ def get_command(response): def execute_command(command_name, arguments): - memory = PineconeMemory() + memory = PineconeMemory(cfg=cfg) try: if command_name == "google": diff --git a/scripts/config.py b/scripts/config.py index fe48d2980040..1b716a3ebadd 100644 --- a/scripts/config.py +++ b/scripts/config.py @@ -1,3 +1,4 @@ +import abc import os import openai from dotenv import load_dotenv @@ -5,7 +6,7 @@ load_dotenv() -class Singleton(type): +class Singleton(abc.ABCMeta, type): """ Singleton metaclass for ensuring only one instance of a class. """ @@ -20,6 +21,10 @@ def __call__(cls, *args, **kwargs): return cls._instances[cls] +class AbstractSingleton(abc.ABC, metaclass=Singleton): + pass + + class Config(metaclass=Singleton): """ Configuration class to store the state of bools for different scripts access. diff --git a/scripts/main.py b/scripts/main.py index a79fd553cef7..acb63a39be84 100644 --- a/scripts/main.py +++ b/scripts/main.py @@ -1,7 +1,7 @@ import json import random import commands as cmd -from memory import PineconeMemory +from memory.pinecone import PineconeMemory import data import chat from colorama import Fore, Style @@ -283,7 +283,7 @@ def parse_arguments(): # Initialize memory and make sure it is empty. # this is particularly important for indexing and referencing pinecone memory -memory = PineconeMemory() +memory = PineconeMemory(cfg) memory.clear() print('Using memory of type: ' + memory.__class__.__name__) diff --git a/scripts/memory/__init__.py b/scripts/memory/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/scripts/memory/base.py b/scripts/memory/base.py new file mode 100644 index 000000000000..29f5d56be1d9 --- /dev/null +++ b/scripts/memory/base.py @@ -0,0 +1,34 @@ +import abc +from config import AbstractSingleton +import openai + + +def get_ada_embedding(text): + text = text.replace("\n", " ") + return openai.Embedding.create(input=[text], model="text-embedding-ada-002")["data"][0]["embedding"] + + +def get_text_from_embedding(embedding): + return openai.Embedding.retrieve(embedding, model="text-embedding-ada-002")["data"][0]["text"] + + +class MemoryProviderSingleton(AbstractSingleton): + @abc.abstractmethod + def add(self, data): + pass + + @abc.abstractmethod + def get(self, data): + pass + + @abc.abstractmethod + def clear(self): + pass + + @abc.abstractmethod + def get_relevant(self, data, num_relevant=5): + pass + + @abc.abstractmethod + def get_stats(self): + pass diff --git a/scripts/memory.py b/scripts/memory/pinecone.py similarity index 80% rename from scripts/memory.py rename to scripts/memory/pinecone.py index 0d265a31d8f4..8e1eaa570fee 100644 --- a/scripts/memory.py +++ b/scripts/memory/pinecone.py @@ -1,21 +1,11 @@ -from config import Config, Singleton -import pinecone -import openai - -cfg = Config() - - -def get_ada_embedding(text): - text = text.replace("\n", " ") - return openai.Embedding.create(input=[text], model="text-embedding-ada-002")["data"][0]["embedding"] +import pinecone -def get_text_from_embedding(embedding): - return openai.Embedding.retrieve(embedding, model="text-embedding-ada-002")["data"][0]["text"] +from memory.base import MemoryProviderSingleton, get_ada_embedding -class PineconeMemory(metaclass=Singleton): - def __init__(self): +class PineconeMemory(MemoryProviderSingleton): + def __init__(self, cfg): pinecone_api_key = cfg.pinecone_api_key pinecone_region = cfg.pinecone_region pinecone.init(api_key=pinecone_api_key, environment=pinecone_region) diff --git a/scripts/memory/redismem.py b/scripts/memory/redismem.py new file mode 100644 index 000000000000..e69de29bb2d1 From 5a1d9e6d0a1752cf08cf747f9279f8b316f3a8c4 Mon Sep 17 00:00:00 2001 From: BillSchumacher <34168009+BillSchumacher@users.noreply.github.com> Date: Fri, 7 Apr 2023 00:08:25 -0500 Subject: [PATCH 02/15] Implement redis memory backend. --- README.md | 21 ++++++ requirements.txt | 1 + scripts/commands.py | 6 +- scripts/config.py | 7 +- scripts/main.py | 8 ++- scripts/memory/base.py | 1 + scripts/memory/redismem.py | 135 +++++++++++++++++++++++++++++++++++++ 7 files changed, 175 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index a89c5d03b7e2..921f297eed85 100644 --- a/README.md +++ b/README.md @@ -149,6 +149,27 @@ are loaded for the agent at any given time. 2. Choose the `Starter` plan to avoid being charged. 3. Find your API key and region under the default project in the left sidebar. + +## Redis Setup + +Install docker desktop. + +Run: +``` +docker run -d --name redis-stack-server -p 6379:6379 redis/redis-stack-server:latest +``` + +Set the following environment variables: +``` +MEMORY_BACKEND=redis +REDIS_HOST=localhost +REDIS_PORT=6379 +REDIS_PASSWORD= +``` + +Note that this is not intended to be run facing the internet and is not secure, do not expose redis to the internet without a password or at all really. + + ### Setting up environment variables For Windows Users: ``` diff --git a/requirements.txt b/requirements.txt index ce24709858e9..9cfddad62778 100644 --- a/requirements.txt +++ b/requirements.txt @@ -12,3 +12,4 @@ docker duckduckgo-search google-api-python-client #(https://developers.google.com/custom-search/v1/overview) pinecone-client==2.2.1 +redis \ No newline at end of file diff --git a/scripts/commands.py b/scripts/commands.py index f00875f06354..98be77727bc9 100644 --- a/scripts/commands.py +++ b/scripts/commands.py @@ -1,6 +1,7 @@ import browse import json from memory.pinecone import PineconeMemory +from memory.redismem import RedisMemory import datetime import agent_manager as agents import speak @@ -52,7 +53,10 @@ def get_command(response): def execute_command(command_name, arguments): - memory = PineconeMemory(cfg=cfg) + if cfg.memory_backend == "pinecone": + memory = PineconeMemory(cfg=cfg) + else: + memory = RedisMemory(cfg=cfg) try: if command_name == "google": diff --git a/scripts/config.py b/scripts/config.py index 1b716a3ebadd..77498d6c912b 100644 --- a/scripts/config.py +++ b/scripts/config.py @@ -61,7 +61,12 @@ def __init__(self): # User agent headers to use when browsing web # Some websites might just completely deny request with an error code if no user agent was found. self.user_agent_header = {"User-Agent":"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.97 Safari/537.36"} - + self.redis_host = os.getenv("REDIS_HOST") + self.redis_port = os.getenv("REDIS_PORT") + self.redis_password = os.getenv("REDIS_PASSWORD") + # Note that indexes must be created on db 0 in redis, this is not configureable. + + self.memory_backend = os.getenv("MEMORY_BACKEND", 'pinecone') # Initialize the OpenAI API client openai.api_key = self.openai_api_key diff --git a/scripts/main.py b/scripts/main.py index acb63a39be84..eecdd7f80ee3 100644 --- a/scripts/main.py +++ b/scripts/main.py @@ -2,6 +2,7 @@ import random import commands as cmd from memory.pinecone import PineconeMemory +from memory.redismem import RedisMemory import data import chat from colorama import Fore, Style @@ -283,8 +284,11 @@ def parse_arguments(): # Initialize memory and make sure it is empty. # this is particularly important for indexing and referencing pinecone memory -memory = PineconeMemory(cfg) -memory.clear() +if cfg.memory_backend == "pinecone": + memory = PineconeMemory(cfg) + memory.clear() +else: + memory = RedisMemory(cfg) print('Using memory of type: ' + memory.__class__.__name__) diff --git a/scripts/memory/base.py b/scripts/memory/base.py index 29f5d56be1d9..72349f6be124 100644 --- a/scripts/memory/base.py +++ b/scripts/memory/base.py @@ -1,3 +1,4 @@ +"""Base class for memory providers.""" import abc from config import AbstractSingleton import openai diff --git a/scripts/memory/redismem.py b/scripts/memory/redismem.py index e69de29bb2d1..162b9269bd12 100644 --- a/scripts/memory/redismem.py +++ b/scripts/memory/redismem.py @@ -0,0 +1,135 @@ +"""Redis memory provider.""" +from typing import Any, List, Optional +import redis +from redis.commands.search.field import VectorField, TextField +from redis.commands.search.query import Query +from redis.commands.search.indexDefinition import IndexDefinition, IndexType +import traceback +import numpy as np + +from memory.base import MemoryProviderSingleton, get_ada_embedding + + +SCHEMA = [ + TextField("data"), + VectorField( + "embedding", + "HNSW", + { + "TYPE": "FLOAT32", + "DIM": 1536, + "DISTANCE_METRIC": "COSINE" + } + ), +] + + +class RedisMemory(MemoryProviderSingleton): + def __init__(self, cfg): + """ + Initializes the Redis memory provider. + + Args: + cfg: The config object. + + Returns: None + """ + redis_host = cfg.redis_host + redis_port = cfg.redis_port + redis_password = cfg.redis_password + self.dimension = 1536 + self.redis = redis.Redis( + host=redis_host, + port=redis_port, + password=redis_password, + db=0 # Cannot be changed + ) + self.redis.flushall() + try: + self.redis.ft("gpt").create_index( + fields=SCHEMA, + definition=IndexDefinition( + prefix=["gpt:"], + index_type=IndexType.HASH + ) + ) + except Exception as e: + print("Error creating Redis search index: ", e) + self.vec_num = 0 + + def add(self, data: str) -> str: + """ + Adds a data point to the memory. + + Args: + data: The data to add. + + Returns: Message indicating that the data has been added. + """ + vector = get_ada_embedding(data) + vector = np.array(vector).astype(np.float32).tobytes() + data_dict = { + b"data": data, + "embedding": vector + } + self.redis.hset(f"gpt:{self.vec_num}", mapping=data_dict) + _text = f"Inserting data into memory at index: {self.vec_num}:\n"\ + f"data: {data}" + self.vec_num += 1 + return _text + + def get(self, data: str) -> Optional[List[Any]]: + """ + Gets the data from the memory that is most relevant to the given data. + + Args: + data: The data to compare to. + + Returns: The most relevant data. + """ + return self.get_relevant(data, 1) + + def clear(self) -> str: + """ + Clears the redis server. + + Returns: A message indicating that the memory has been cleared. + """ + self.redis.flushall() + return "Obliviated" + + def get_relevant( + self, + data: str, + num_relevant: int = 5 + ) -> Optional[List[Any]]: + """ + Returns all the data in the memory that is relevant to the given data. + Args: + data: The data to compare to. + num_relevant: The number of relevant data to return. + + Returns: A list of the most relevant data. + """ + query_embedding = get_ada_embedding(data) + base_query = f"*=>[KNN {num_relevant} @embedding $vector AS vector_score]" + query = Query(base_query).return_fields( + "data", + "vector_score" + ).sort_by("vector_score").dialect(2) + query_vector = np.array(query_embedding).astype(np.float32).tobytes() + + try: + results = self.redis.ft("gpt").search( + query, query_params={"vector": query_vector} + ) + except Exception as e: + print("Error calling Redis search: ", e) + return None + return list(results.docs) + + def get_stats(self): + """ + Returns: The stats of the memory index. + """ + return self.redis.ft("mem").info() From cce79695fa43a9abb5aca8e368f7951924c3ae9c Mon Sep 17 00:00:00 2001 From: BillSchumacher <34168009+BillSchumacher@users.noreply.github.com> Date: Fri, 7 Apr 2023 00:48:27 -0500 Subject: [PATCH 03/15] Save redis memory state, with the default being to wipe on start still. --- scripts/config.py | 3 ++- scripts/memory/redismem.py | 12 +++++++++--- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/scripts/config.py b/scripts/config.py index 77498d6c912b..8c582a157242 100644 --- a/scripts/config.py +++ b/scripts/config.py @@ -64,8 +64,9 @@ def __init__(self): self.redis_host = os.getenv("REDIS_HOST") self.redis_port = os.getenv("REDIS_PORT") self.redis_password = os.getenv("REDIS_PASSWORD") + self.wipe_redis_on_start = os.getenv("WIPE_REDIS_ON_START", "True") == 'True' # Note that indexes must be created on db 0 in redis, this is not configureable. - + self.memory_backend = os.getenv("MEMORY_BACKEND", 'pinecone') # Initialize the OpenAI API client openai.api_key = self.openai_api_key diff --git a/scripts/memory/redismem.py b/scripts/memory/redismem.py index 162b9269bd12..e7021066fa1a 100644 --- a/scripts/memory/redismem.py +++ b/scripts/memory/redismem.py @@ -44,7 +44,8 @@ def __init__(self, cfg): password=redis_password, db=0 # Cannot be changed ) - self.redis.flushall() + if cfg.wipe_redis_on_start: + self.redis.flushall() try: self.redis.ft("gpt").create_index( fields=SCHEMA, @@ -55,7 +56,9 @@ def __init__(self, cfg): ) except Exception as e: print("Error creating Redis search index: ", e) - self.vec_num = 0 + existing_vec_num = self.redis.get('vec_num') + self.vec_num = int(existing_vec_num.decode('utf-8')) if\ + existing_vec_num else 0 def add(self, data: str) -> str: """ @@ -72,10 +75,13 @@ def add(self, data: str) -> str: b"data": data, "embedding": vector } - self.redis.hset(f"gpt:{self.vec_num}", mapping=data_dict) + pipe = self.redis.pipeline() + pipe.hset(f"gpt:{self.vec_num}", mapping=data_dict) _text = f"Inserting data into memory at index: {self.vec_num}:\n"\ f"data: {data}" self.vec_num += 1 + pipe.set('vec_num', self.vec_num) + pipe.execute() return _text def get(self, data: str) -> Optional[List[Any]]: From 43746b1396fe47feae9447a72bbaa15ce2c0960a Mon Sep 17 00:00:00 2001 From: BillSchumacher <34168009+BillSchumacher@users.noreply.github.com> Date: Fri, 7 Apr 2023 00:58:57 -0500 Subject: [PATCH 04/15] Update README with WIPE_REDIS_ON_START setting. --- README.md | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/README.md b/README.md index 921f297eed85..7d83b4633ff5 100644 --- a/README.md +++ b/README.md @@ -169,6 +169,13 @@ REDIS_PASSWORD= Note that this is not intended to be run facing the internet and is not secure, do not expose redis to the internet without a password or at all really. +You can optionally set + +``` +WIPE_REDIS_ON_START=False +``` + +To persist memory stored in Redis. ### Setting up environment variables For Windows Users: From f0162037c341e31583d09626da2c853563cc4776 Mon Sep 17 00:00:00 2001 From: BillSchumacher <34168009+BillSchumacher@users.noreply.github.com> Date: Fri, 7 Apr 2023 15:02:22 -0500 Subject: [PATCH 05/15] Fix README --- README.md | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index 7d83b4633ff5..b7f514e8306e 100644 --- a/README.md +++ b/README.md @@ -140,16 +140,6 @@ export CUSTOM_SEARCH_ENGINE_ID="YOUR_CUSTOM_SEARCH_ENGINE_ID" ``` -## 🌲 Pinecone API Key Setup - -Pinecone enable a vector based memory so a vast memory can be stored and only relevant memories -are loaded for the agent at any given time. - -1. Go to app.pinecone.io and make an account if you don't already have one. -2. Choose the `Starter` plan to avoid being charged. -3. Find your API key and region under the default project in the left sidebar. - - ## Redis Setup Install docker desktop. @@ -177,6 +167,15 @@ WIPE_REDIS_ON_START=False To persist memory stored in Redis. +## 🌲 Pinecone API Key Setup + +Pinecone enable a vector based memory so a vast memory can be stored and only relevant memories +are loaded for the agent at any given time. + +1. Go to app.pinecone.io and make an account if you don't already have one. +2. Choose the `Starter` plan to avoid being charged. +3. Find your API key and region under the default project in the left sidebar. + ### Setting up environment variables For Windows Users: ``` From 5d13fb2546916f2b5ff360720b07706ab31e6e21 Mon Sep 17 00:00:00 2001 From: BillSchumacher <34168009+BillSchumacher@users.noreply.github.com> Date: Fri, 7 Apr 2023 15:03:20 -0500 Subject: [PATCH 06/15] Remove unused function. --- scripts/memory/base.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/scripts/memory/base.py b/scripts/memory/base.py index 72349f6be124..d7ab7fcf1f55 100644 --- a/scripts/memory/base.py +++ b/scripts/memory/base.py @@ -9,10 +9,6 @@ def get_ada_embedding(text): return openai.Embedding.create(input=[text], model="text-embedding-ada-002")["data"][0]["embedding"] -def get_text_from_embedding(embedding): - return openai.Embedding.retrieve(embedding, model="text-embedding-ada-002")["data"][0]["text"] - - class MemoryProviderSingleton(AbstractSingleton): @abc.abstractmethod def add(self, data): From 14e10c9c4ddc1d0736b4161e96d0c2517c65b12a Mon Sep 17 00:00:00 2001 From: BillSchumacher <34168009+BillSchumacher@users.noreply.github.com> Date: Fri, 7 Apr 2023 15:27:48 -0500 Subject: [PATCH 07/15] Add configurable index key for redis. --- scripts/config.py | 1 + scripts/memory/redismem.py | 15 ++++++++------- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/scripts/config.py b/scripts/config.py index 8c582a157242..637c17fdf8bf 100644 --- a/scripts/config.py +++ b/scripts/config.py @@ -65,6 +65,7 @@ def __init__(self): self.redis_port = os.getenv("REDIS_PORT") self.redis_password = os.getenv("REDIS_PASSWORD") self.wipe_redis_on_start = os.getenv("WIPE_REDIS_ON_START", "True") == 'True' + self.memory_index = os.getenv("MEMORY_INDEX", 'gpt') # Note that indexes must be created on db 0 in redis, this is not configureable. self.memory_backend = os.getenv("MEMORY_BACKEND", 'pinecone') diff --git a/scripts/memory/redismem.py b/scripts/memory/redismem.py index e7021066fa1a..20be4a4e8450 100644 --- a/scripts/memory/redismem.py +++ b/scripts/memory/redismem.py @@ -44,19 +44,20 @@ def __init__(self, cfg): password=redis_password, db=0 # Cannot be changed ) + self.cfg = cfg if cfg.wipe_redis_on_start: self.redis.flushall() try: - self.redis.ft("gpt").create_index( + self.redis.ft(f"{cfg.memory_index}").create_index( fields=SCHEMA, definition=IndexDefinition( - prefix=["gpt:"], + prefix=[f"{cfg.memory_index}:"], index_type=IndexType.HASH ) ) except Exception as e: print("Error creating Redis search index: ", e) - existing_vec_num = self.redis.get('vec_num') + existing_vec_num = self.redis.get(f'{cfg.memory_index}-vec_num') self.vec_num = int(existing_vec_num.decode('utf-8')) if\ existing_vec_num else 0 @@ -76,11 +77,11 @@ def add(self, data: str) -> str: "embedding": vector } pipe = self.redis.pipeline() - pipe.hset(f"gpt:{self.vec_num}", mapping=data_dict) + pipe.hset(f"{self.cfg.memory_index}:{self.vec_num}", mapping=data_dict) _text = f"Inserting data into memory at index: {self.vec_num}:\n"\ f"data: {data}" self.vec_num += 1 - pipe.set('vec_num', self.vec_num) + pipe.set(f'{self.cfg.memory_index}-vec_num', self.vec_num) pipe.execute() return _text @@ -126,7 +127,7 @@ def get_relevant( query_vector = np.array(query_embedding).astype(np.float32).tobytes() try: - results = self.redis.ft("gpt").search( + results = self.redis.ft(f"{self.cfg.memory_index}").search( query, query_params={"vector": query_vector} ) except Exception as e: @@ -138,4 +139,4 @@ def get_stats(self): """ Returns: The stats of the memory index. """ - return self.redis.ft("mem").info() + return self.redis.ft(f"{self.cfg.memory_index}").info() From ea6b97050948487cee5ee50a12f7eb2a161e0648 Mon Sep 17 00:00:00 2001 From: BillSchumacher <34168009+BillSchumacher@users.noreply.github.com> Date: Fri, 7 Apr 2023 15:28:48 -0500 Subject: [PATCH 08/15] Update README --- README.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/README.md b/README.md index b7f514e8306e..5ce001b0944a 100644 --- a/README.md +++ b/README.md @@ -167,6 +167,12 @@ WIPE_REDIS_ON_START=False To persist memory stored in Redis. +You can specify the memory index for redis using the following: + +```` +MEMORY_INDEX=whatever +```` + ## 🌲 Pinecone API Key Setup Pinecone enable a vector based memory so a vast memory can be stored and only relevant memories From cb14c8d999c32c89215be04d27fe132a149eb047 Mon Sep 17 00:00:00 2001 From: BillSchumacher <34168009+BillSchumacher@users.noreply.github.com> Date: Fri, 7 Apr 2023 18:13:18 -0500 Subject: [PATCH 09/15] Implement local memory. --- requirements.txt | 3 +- scripts/commands.py | 8 ++- scripts/config.py | 4 +- scripts/main.py | 5 +- scripts/memory/local.py | 111 +++++++++++++++++++++++++++++++++++++ scripts/memory/redismem.py | 1 - 6 files changed, 125 insertions(+), 7 deletions(-) create mode 100644 scripts/memory/local.py diff --git a/requirements.txt b/requirements.txt index 9cfddad62778..5bcc74957ec5 100644 --- a/requirements.txt +++ b/requirements.txt @@ -12,4 +12,5 @@ docker duckduckgo-search google-api-python-client #(https://developers.google.com/custom-search/v1/overview) pinecone-client==2.2.1 -redis \ No newline at end of file +redis +orjson \ No newline at end of file diff --git a/scripts/commands.py b/scripts/commands.py index 98be77727bc9..a88ad0ae09ab 100644 --- a/scripts/commands.py +++ b/scripts/commands.py @@ -1,5 +1,6 @@ import browse import json +from memory.local import LocalCache from memory.pinecone import PineconeMemory from memory.redismem import RedisMemory import datetime @@ -55,11 +56,14 @@ def get_command(response): def execute_command(command_name, arguments): if cfg.memory_backend == "pinecone": memory = PineconeMemory(cfg=cfg) - else: + elif cfg.memory_backend == "redis": memory = RedisMemory(cfg=cfg) + else: + memory = LocalCache(cfg=cfg) + try: if command_name == "google": - + # Check if the Google API key is set and use the official search method # If the API key is not set or has only whitespaces, use the unofficial search method if cfg.google_api_key and (cfg.google_api_key.strip() if cfg.google_api_key else None): diff --git a/scripts/config.py b/scripts/config.py index 637c17fdf8bf..9afeb1d25727 100644 --- a/scripts/config.py +++ b/scripts/config.py @@ -65,10 +65,10 @@ def __init__(self): self.redis_port = os.getenv("REDIS_PORT") self.redis_password = os.getenv("REDIS_PASSWORD") self.wipe_redis_on_start = os.getenv("WIPE_REDIS_ON_START", "True") == 'True' - self.memory_index = os.getenv("MEMORY_INDEX", 'gpt') + self.memory_index = os.getenv("MEMORY_INDEX", 'auto-gpt') # Note that indexes must be created on db 0 in redis, this is not configureable. - self.memory_backend = os.getenv("MEMORY_BACKEND", 'pinecone') + self.memory_backend = os.getenv("MEMORY_BACKEND", 'local') # Initialize the OpenAI API client openai.api_key = self.openai_api_key diff --git a/scripts/main.py b/scripts/main.py index eecdd7f80ee3..e49f1810d256 100644 --- a/scripts/main.py +++ b/scripts/main.py @@ -1,6 +1,7 @@ import json import random import commands as cmd +from memory.local import LocalCache from memory.pinecone import PineconeMemory from memory.redismem import RedisMemory import data @@ -287,8 +288,10 @@ def parse_arguments(): if cfg.memory_backend == "pinecone": memory = PineconeMemory(cfg) memory.clear() -else: +elif cfg.memory_backend == "redis": memory = RedisMemory(cfg) +else: + memory = LocalCache(cfg) print('Using memory of type: ' + memory.__class__.__name__) diff --git a/scripts/memory/local.py b/scripts/memory/local.py new file mode 100644 index 000000000000..fb10522426cf --- /dev/null +++ b/scripts/memory/local.py @@ -0,0 +1,111 @@ +import dataclasses +import orjson +from typing import Any, List, Optional +import numpy as np +import os +from memory.base import MemoryProviderSingleton, get_ada_embedding + + +EMBED_DIM = 1536 +SAVE_OPTIONS = orjson.OPT_SERIALIZE_NUMPY | orjson.OPT_SERIALIZE_DATACLASS + + +def create_default_embeddings(): + return np.zeros((0, EMBED_DIM)).astype(np.float32) + + +@dataclasses.dataclass +class CacheContent: + texts: List[str] = dataclasses.field(default_factory=list) + embeddings: np.ndarray = dataclasses.field( + default_factory=create_default_embeddings + ) + + +class LocalCache(MemoryProviderSingleton): + + # on load, load our database + def __init__(self, cfg) -> None: + self.filename = f"{cfg.memory_index}.json" + if os.path.exists(self.filename): + with open(self.filename, 'rb') as f: + loaded = orjson.loads(f.read()) + self.data = CacheContent(**loaded) + else: + self.data = CacheContent() + + def add(self, text: str): + """ + Add text to our list of texts, add embedding as row to our + embeddings-matrix + + Args: + text: str + + Returns: None + """ + self.data.texts.append(text) + + embedding = get_ada_embedding(text) + + vector = np.array(embedding).astype(np.float32) + vector = vector[np.newaxis, :] + self.data.embeddings = np.concatenate( + [ + vector, + self.data.embeddings, + ], + axis=0, + ) + + with open(self.filename, 'wb') as f: + out = orjson.dumps( + self.data, + option=SAVE_OPTIONS + ) + f.write(out) + + def clear(self) -> str: + """ + Clears the redis server. + + Returns: A message indicating that the memory has been cleared. + """ + self.data = CacheContent() + return "Obliviated" + + def get(self, data: str) -> Optional[List[Any]]: + """ + Gets the data from the memory that is most relevant to the given data. + + Args: + data: The data to compare to. + + Returns: The most relevant data. + """ + return self.get_relevant(data, 1) + + def get_relevant(self, text: str, k: int) -> List[Any]: + """" + matrix-vector mult to find score-for-each-row-of-matrix + get indices for top-k winning scores + return texts for those indices + Args: + text: str + k: int + + Returns: List[str] + """ + embedding = get_ada_embedding(text) + + scores = np.dot(self.data.embeddings, embedding) + + top_k_indices = np.argsort(scores)[-k:][::-1] + + return [self.data.texts[i] for i in top_k_indices] + + def get_stats(self): + """ + Returns: The stats of the local cache. + """ + return len(self.data.texts), self.data.embeddings.shape diff --git a/scripts/memory/redismem.py b/scripts/memory/redismem.py index 20be4a4e8450..296d0cce2c75 100644 --- a/scripts/memory/redismem.py +++ b/scripts/memory/redismem.py @@ -4,7 +4,6 @@ from redis.commands.search.field import VectorField, TextField from redis.commands.search.query import Query from redis.commands.search.indexDefinition import IndexDefinition, IndexType -import traceback import numpy as np from memory.base import MemoryProviderSingleton, get_ada_embedding From 503b58b7948fe3a37622919864015607352e76e6 Mon Sep 17 00:00:00 2001 From: BillSchumacher <34168009+BillSchumacher@users.noreply.github.com> Date: Fri, 7 Apr 2023 18:30:04 -0500 Subject: [PATCH 10/15] Refactor memory into factory. --- scripts/commands.py | 11 ++-------- scripts/main.py | 12 ++--------- scripts/memory/__init__.py | 42 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 46 insertions(+), 19 deletions(-) diff --git a/scripts/commands.py b/scripts/commands.py index a88ad0ae09ab..783e6bd2950a 100644 --- a/scripts/commands.py +++ b/scripts/commands.py @@ -1,8 +1,6 @@ import browse import json -from memory.local import LocalCache -from memory.pinecone import PineconeMemory -from memory.redismem import RedisMemory +from memory import get_memory import datetime import agent_manager as agents import speak @@ -54,12 +52,7 @@ def get_command(response): def execute_command(command_name, arguments): - if cfg.memory_backend == "pinecone": - memory = PineconeMemory(cfg=cfg) - elif cfg.memory_backend == "redis": - memory = RedisMemory(cfg=cfg) - else: - memory = LocalCache(cfg=cfg) + memory = get_memory(cfg) try: if command_name == "google": diff --git a/scripts/main.py b/scripts/main.py index e49f1810d256..11bf0dc1b23a 100644 --- a/scripts/main.py +++ b/scripts/main.py @@ -1,9 +1,7 @@ import json import random import commands as cmd -from memory.local import LocalCache -from memory.pinecone import PineconeMemory -from memory.redismem import RedisMemory +from memory import get_memory import data import chat from colorama import Fore, Style @@ -285,13 +283,7 @@ def parse_arguments(): # Initialize memory and make sure it is empty. # this is particularly important for indexing and referencing pinecone memory -if cfg.memory_backend == "pinecone": - memory = PineconeMemory(cfg) - memory.clear() -elif cfg.memory_backend == "redis": - memory = RedisMemory(cfg) -else: - memory = LocalCache(cfg) +memory = get_memory(cfg, init=True) print('Using memory of type: ' + memory.__class__.__name__) diff --git a/scripts/memory/__init__.py b/scripts/memory/__init__.py index e69de29bb2d1..dacb05b3286c 100644 --- a/scripts/memory/__init__.py +++ b/scripts/memory/__init__.py @@ -0,0 +1,42 @@ +from memory.local import LocalCache +try: + from memory.redismem import RedisMemory +except ImportError: + print("Redis not installed. Skipping import.") + RedisMemory = None + +try: + from memory.pinecone import PineconeMemory +except ImportError: + print("Pinecone not installed. Skipping import.") + PineconeMemory = None + + +def get_memory(cfg, init=False): + memory = None + if cfg.memory_backend == "pinecone": + if not PineconeMemory: + print("Error: Pinecone is not installed. Please install pinecone" + " to use Pinecone as a memory backend.") + else: + memory = PineconeMemory(cfg) + if init: + memory.clear() + elif cfg.memory_backend == "redis": + if not RedisMemory: + print("Error: Redis is not installed. Please install redis-py to" + " use Redis as a memory backend.") + else: + memory = RedisMemory(cfg) + + if memory is None: + memory = LocalCache(cfg) + return memory + + +__all__ = [ + "get_memory", + "LocalCache", + "RedisCache", + "PineconeCache", +] From a34c51bf8622cf83a34493718c8be60c0676e603 Mon Sep 17 00:00:00 2001 From: BillSchumacher <34168009+BillSchumacher@users.noreply.github.com> Date: Fri, 7 Apr 2023 20:58:00 -0500 Subject: [PATCH 11/15] Update scripts/config.py MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Jason Kölker --- scripts/config.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/scripts/config.py b/scripts/config.py index 9afeb1d25727..1601dcc43fd7 100644 --- a/scripts/config.py +++ b/scripts/config.py @@ -61,9 +61,9 @@ def __init__(self): # User agent headers to use when browsing web # Some websites might just completely deny request with an error code if no user agent was found. self.user_agent_header = {"User-Agent":"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.97 Safari/537.36"} - self.redis_host = os.getenv("REDIS_HOST") - self.redis_port = os.getenv("REDIS_PORT") - self.redis_password = os.getenv("REDIS_PASSWORD") + self.redis_host = os.getenv("REDIS_HOST", "localhost") + self.redis_port = os.getenv("REDIS_PORT", "6379") + self.redis_password = os.getenv("REDIS_PASSWORD", "") self.wipe_redis_on_start = os.getenv("WIPE_REDIS_ON_START", "True") == 'True' self.memory_index = os.getenv("MEMORY_INDEX", 'auto-gpt') # Note that indexes must be created on db 0 in redis, this is not configureable. From d1777e39a8668674d40b06f3e3690e68e5daa27d Mon Sep 17 00:00:00 2001 From: Toran Bruce Richards Date: Sun, 9 Apr 2023 02:31:51 +0100 Subject: [PATCH 12/15] Fixes incorrect class names in __all__ Changes "Cache" to "Memory". --- scripts/memory/__init__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/memory/__init__.py b/scripts/memory/__init__.py index dacb05b3286c..99523079434e 100644 --- a/scripts/memory/__init__.py +++ b/scripts/memory/__init__.py @@ -37,6 +37,6 @@ def get_memory(cfg, init=False): __all__ = [ "get_memory", "LocalCache", - "RedisCache", - "PineconeCache", + "RedisMemory", + "PineconeMemory", ] From 2db7f0815eed6e96b94423c96a40b95fc47750d4 Mon Sep 17 00:00:00 2001 From: BillSchumacher <34168009+BillSchumacher@users.noreply.github.com> Date: Sat, 8 Apr 2023 22:25:59 -0500 Subject: [PATCH 13/15] Update main.py Remove pinecone config requirement --- scripts/main.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/scripts/main.py b/scripts/main.py index e16fb9d14df9..10f9d0dcaa0b 100644 --- a/scripts/main.py +++ b/scripts/main.py @@ -281,8 +281,6 @@ def parse_arguments(): # Make a constant: user_input = "Determine which next command to use, and respond using the format specified above:" -# raise an exception if pinecone_api_key or region is not provided -if not cfg.pinecone_api_key or not cfg.pinecone_region: raise Exception("Please provide pinecone_api_key and pinecone_region") # Initialize memory and make sure it is empty. # this is particularly important for indexing and referencing pinecone memory memory = get_memory(cfg, init=True) From 9e139fb314b7b5c9b538a85d204ff08ce59e10bd Mon Sep 17 00:00:00 2001 From: Toran Bruce Richards Date: Sun, 9 Apr 2023 05:22:03 +0100 Subject: [PATCH 14/15] Wipe local memory on load --- scripts/memory/__init__.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/scripts/memory/__init__.py b/scripts/memory/__init__.py index 99523079434e..a441a46aa94e 100644 --- a/scripts/memory/__init__.py +++ b/scripts/memory/__init__.py @@ -31,6 +31,8 @@ def get_memory(cfg, init=False): if memory is None: memory = LocalCache(cfg) + if init: + memory.clear() return memory From a861dec6764254b581d23d4573f1da8307bf533a Mon Sep 17 00:00:00 2001 From: BillSchumacher <34168009+BillSchumacher@users.noreply.github.com> Date: Sat, 8 Apr 2023 23:33:18 -0500 Subject: [PATCH 15/15] Memory fixes. --- scripts/memory/local.py | 3 +++ scripts/memory/redismem.py | 4 +++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/scripts/memory/local.py b/scripts/memory/local.py index fb10522426cf..8dc90021ff6e 100644 --- a/scripts/memory/local.py +++ b/scripts/memory/local.py @@ -44,6 +44,8 @@ def add(self, text: str): Returns: None """ + if 'Command Error:' in text: + return "" self.data.texts.append(text) embedding = get_ada_embedding(text) @@ -64,6 +66,7 @@ def add(self, text: str): option=SAVE_OPTIONS ) f.write(out) + return text def clear(self) -> str: """ diff --git a/scripts/memory/redismem.py b/scripts/memory/redismem.py index 296d0cce2c75..2082fe588764 100644 --- a/scripts/memory/redismem.py +++ b/scripts/memory/redismem.py @@ -69,6 +69,8 @@ def add(self, data: str) -> str: Returns: Message indicating that the data has been added. """ + if 'Command Error:' in data: + return "" vector = get_ada_embedding(data) vector = np.array(vector).astype(np.float32).tobytes() data_dict = { @@ -132,7 +134,7 @@ def get_relevant( except Exception as e: print("Error calling Redis search: ", e) return None - return list(results.docs) + return [result.data for result in results.docs] def get_stats(self): """