Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adds risk avoidance mode and relevant config. #934

Closed
wants to merge 33 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
58cad4d
env template
Apr 17, 2023
d7c072f
relevant changes to config
Apr 18, 2023
3ed068f
Merge branch 'master' into riskMode
Apr 18, 2023
3002849
argument parse: risk_avoidance
Apr 18, 2023
3be189b
argument config: risk avoidance mode
Apr 18, 2023
ecf4f83
argument config: risk avoidance mode fixed to new changes
Apr 18, 2023
1f04a83
agent.py changed
Apr 18, 2023
f59a9f9
risk_evaluation.py
Apr 18, 2023
22dd6e5
comment removed
Apr 18, 2023
f2fc9d4
lint
Apr 18, 2023
5d0ca0c
black formatting
Apr 18, 2023
68b196c
isort formatting
Apr 18, 2023
bda56b2
Merge branch 'master' into riskMode
Apr 18, 2023
306527e
changes to cli
Apr 18, 2023
a732e7c
changes to configurator
Apr 18, 2023
223189f
fixing
Apr 18, 2023
9759df7
Merge pull request #2 from jnt0rrente/riskMode
jnt0rrente Apr 18, 2023
7aa969d
Merge branch 'Significant-Gravitas:master' into master
jnt0rrente Apr 18, 2023
271128b
Merge branch 'master' into master
jnt0rrente Apr 19, 2023
0e875e8
AutoGpt.json blanked out
Apr 20, 2023
191853f
RA flag kebab'd
Apr 20, 2023
a344a45
Merge branch 'master' into riskMode
Apr 20, 2023
6af2c14
patching for what I believe is a bug in correct_json
Apr 20, 2023
a4af736
corrected .env.template additions
Apr 20, 2023
91f10f8
Merge pull request #3 from jnt0rrente/riskMode
jnt0rrente Apr 20, 2023
40a7a69
Merge branch 'master' into merging-1
jnt0rrente Apr 20, 2023
57b2763
black formatting
Apr 20, 2023
0466adc
Merge branch 'master' into merging-1
jnt0rrente Apr 24, 2023
c4ed684
Delete AutoGpt.json
ntindle May 20, 2023
29659d0
Merge branch 'master' into merging-1
ntindle May 20, 2023
b58df53
Merge branch 'master' into merging-1
ntindle May 21, 2023
bee161c
Merge branch 'master' into merging-1
ntindle Jun 7, 2023
d8e98a5
Merge branch 'master' into merging-1
Pwuts Jun 7, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .env.template
Original file line number Diff line number Diff line change
Expand Up @@ -82,8 +82,13 @@ OPENAI_API_KEY=your-openai-api-key

## SMART_LLM_MODEL - Smart language model (Default: gpt-4)
## FAST_LLM_MODEL - Fast language model (Default: gpt-3.5-turbo)
## RISK_EVALUATION_MODEL - Risk evaluation model (Default: gpt-4)
# SMART_LLM_MODEL=gpt-4
# FAST_LLM_MODEL=gpt-3.5-turbo
# RISK_EVALUATION_MODEL=gpt-4
Pwuts marked this conversation as resolved.
Show resolved Hide resolved

### RISK ASSESSMENT
# RISK_THRESHOLD=0.5

### EMBEDDINGS
## EMBEDDING_MODEL - Model to use for creating embeddings
Expand Down
64 changes: 62 additions & 2 deletions autogpt/agent/agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
LogCycleHandler,
)
from autogpt.logs import logger, print_assistant_thoughts
from autogpt.risk_evaluation import evaluate_risk
from autogpt.memory.message_history import MessageHistory
from autogpt.memory.vector import VectorMemory
from autogpt.speech import say_text
Expand Down Expand Up @@ -177,8 +178,11 @@ def signal_handler(signum, frame):
f"ARGUMENTS = {Fore.CYAN}{arguments}{Style.RESET_ALL}",
)

if not cfg.continuous_mode and self.next_action_count == 0:
# ### GET USER AUTHORIZATION TO EXECUTE COMMAND ###
if (
not cfg.continuous_mode
and self.next_action_count == 0
and not cfg.risk_avoidance_mode
): # default mode
# Get key press: Prompt the user to press enter to continue or escape
# to exit
self.user_input = ""
Expand Down Expand Up @@ -255,6 +259,62 @@ def signal_handler(signum, frame):
elif user_input == "EXIT":
logger.info("Exiting...")
break
if cfg.risk_avoidance_mode:
logger.typewriter_log(
"NEXT ACTION: ",
Fore.CYAN,
f"COMMAND = {Fore.CYAN}{command_name}{Style.RESET_ALL} "
f"ARGUMENTS = {Fore.CYAN}{arguments}{Style.RESET_ALL}",
)

risk_value, risk_reason = evaluate_risk(command_name, arguments)

if risk_value > cfg.risk_threshold:
logger.typewriter_log(
"Risk evaluation: ",
Fore.RED,
f"Command not authorised. Calculated risk value: {Fore.RED}{risk_value}{Style.RESET_ALL},"
f" reason: {Fore.RED}{risk_reason}{Style.RESET_ALL}",
)

while True:
logger.typewriter_log(
"Awaiting user authorisation (y/n)...", Fore.RED
)
console_input = clean_input(
Fore.MAGENTA + "Input: " + Style.RESET_ALL
)
if console_input.lower().strip() == "y":
user_input = "GENERATE NEXT COMMAND JSON"
break
elif console_input.lower().strip() == "":
print("Invalid input format.")
continue
elif console_input.lower() == "n":
user_input = "EXIT"
break
else:
user_input = console_input
command_name = "human_feedback"
break

if user_input == "GENERATE NEXT COMMAND JSON":
logger.typewriter_log(
"-=-=-=-=-=-=-= COMMAND AUTHORISED BY USER -=-=-=-=-=-=-=",
Fore.MAGENTA,
"",
)
elif user_input == "EXIT":
print("Exiting...", flush=True)
break
else:
logger.typewriter_log(
"Risk evaluation: ",
Fore.GREEN,
f"Command authorised. Calculated risk value: {Fore.GREEN}{risk_value}{Style.RESET_ALL},"
f" reason: {Fore.GREEN}{risk_reason}{Style.RESET_ALL}",
)
user_input = "GENERATE NEXT COMMAND JSON"
else:
# First log new-line so user can differentiate sections better in console
logger.typewriter_log("\n")
Expand Down
3 changes: 3 additions & 0 deletions autogpt/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

@click.group(invoke_without_command=True)
@click.option("-c", "--continuous", is_flag=True, help="Enable Continuous Mode")
@click.option("--risk-avoidance", is_flag=True, help="Enable Risk Avoidance Mode")
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

risk-avoidance is technically accurate, although it doesn't reflect how this mode works, which is more of a supervisory workflow. Maybe something like --self-supervise would reflect it better. Or an option that combines risk avoidance and self-feedback:

  • --self-supervise=none (default)
  • --self-supervise=guidance for the existing self-feedback mode
  • --self-supervise=risk-averse --max-risk=0.3

What do you think? cc @ntindle

Copy link
Member

@ntindle ntindle Jun 10, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think pwuts’s suggestion is a really good way to handle it, especially in light of the work we are planningfor guardrails

@click.option(
"--skip-reprompt",
"-y",
Expand Down Expand Up @@ -70,6 +71,7 @@ def main(
ctx: click.Context,
continuous: bool,
continuous_limit: int,
risk_avoidance: bool,
ai_settings: str,
prompt_settings: str,
skip_reprompt: bool,
Expand All @@ -96,6 +98,7 @@ def main(
run_auto_gpt(
continuous,
continuous_limit,
risk_avoidance,
ai_settings,
prompt_settings,
skip_reprompt,
Expand Down
9 changes: 9 additions & 0 deletions autogpt/config/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ def __init__(self) -> None:

self.debug_mode = False
self.continuous_mode = False
self.risk_avoidance_mode = False
self.continuous_limit = 0
self.speak_mode = False
self.skip_reprompt = False
Expand Down Expand Up @@ -56,6 +57,10 @@ def __init__(self) -> None:
)
self.fast_llm_model = os.getenv("FAST_LLM_MODEL", "gpt-3.5-turbo")
self.smart_llm_model = os.getenv("SMART_LLM_MODEL", "gpt-4")

self.risk_evaluation_model = os.getenv("RISK_EVALUATION_MODEL", "gpt-4")
self.risk_threshold = float(os.getenv("RISK_THRESHOLD", 0.5))

self.embedding_model = os.getenv("EMBEDDING_MODEL", "text-embedding-ada-002")

self.browse_spacy_language_model = os.getenv(
Expand Down Expand Up @@ -248,6 +253,10 @@ def set_debug_mode(self, value: bool) -> None:
"""Set the debug mode value."""
self.debug_mode = value

def set_risk_avoidance_mode(self, value: bool):
"""Set the risk avoidance mode value."""
self.risk_avoidance_mode = value

def set_plugins(self, value: list) -> None:
"""Set the plugins value."""
self.plugins = value
Expand Down
18 changes: 18 additions & 0 deletions autogpt/configurator.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ def create_config(
config: Config,
continuous: bool,
continuous_limit: int,
risk_avoidance: bool,
ai_settings_file: str,
prompt_settings_file: str,
skip_reprompt: bool,
Expand Down Expand Up @@ -52,8 +53,12 @@ def create_config(
skips_news (bool): Whether to suppress the output of latest news on startup
"""
config.set_debug_mode(False)
config.set_risk_avoidance_mode(False)
config.set_continuous_mode(False)
config.set_speak_mode(False)
config.set_fast_llm_model(check_model(CFG.fast_llm_model, "fast_llm_model"))
config.set_smart_llm_model(check_model(CFG.smart_llm_model, "smart_llm_model"))


if debug:
logger.typewriter_log("Debug Mode: ", Fore.GREEN, "ENABLED")
Expand All @@ -76,6 +81,19 @@ def create_config(
)
config.set_continuous_limit(continuous_limit)

elif risk_avoidance:
logger.typewriter_log("Risk Avoidance Mode: ", Fore.RED, "ENABLED")
logger.typewriter_log(
"Risk evaluation settings: ",
Fore.RED,
f"Model: {CFG.risk_evaluation_model}, Threshold: {CFG.risk_threshold}",
)
logger.typewriter_log(
"WARNING: ",
Fore.RED,
"Risk Avoidance mode is expected to be safer than continuous mode, but it is still potentially dangerous and may cause your AI to run forever or carry out actions you would not usually authorise. Exercise proper caution.",
)
CFG.set_risk_avoidance_mode(True)
# Check if continuous limit is used without continuous mode
if continuous_limit and not continuous:
raise click.UsageError("--continuous-limit can only be used with --continuous")
Expand Down
57 changes: 57 additions & 0 deletions autogpt/risk_evaluation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
import json

from autogpt.chat import ( # TODO this shouldnt really be in chat.py, should it?
create_chat_message,
)
from autogpt.config import Config
from autogpt.json_utils.json_fix_general import correct_json
from autogpt.llm_utils import create_chat_completion

cfg = Config()

JSON_SCHEMA = """
{
"calculated_risk": 0.5,
"reason": "reason"
}
"""

risk_evaluator_prompt = """You are to act as an intelligent, automated expert risk evaluator. You will be given input in the form "{command: <command name>, arguments: <argument list>}" and you must evaluate the possible risks of an ideal automaton running that command. Think of risks against life, human or otherwise; the environment, society and the global economy.

Your answers must be in the form `{"calculated_risk": <value between 0 and 1>, "reason": "<your reasons for calculating the value>"}`. Using the right syntax is vital to your task. Mind the curly brackets, the colon, the comma and the space. If you do not use the right syntax, you will be penalized.

Respond with "Acknowledged." if you fully understand and agree to the above."""


def evaluate_risk(command, arguments):
"""Get the risk score of the received command."""

context = [
create_chat_message("system", risk_evaluator_prompt),
create_chat_message("assistant", "Acknowledged."),
create_chat_message("user", f"{{command: {command}, arguments: {arguments}}}"),
]

if cfg.debug_mode:
print(
"Evaluating command {} with arguments {}. ".format(command, str(arguments))
)

if cfg.debug_mode:
print(f"Context: {context}")

response = create_chat_completion(
model=cfg.risk_evaluation_model,
messages=context,
temperature=0, # This is a risk evaluator, so we want it to be as deterministic as possible
max_tokens=2500, # More than enough for this task, but TODO: Maybe this could be configurable?
)

response_object = json.loads(
correct_json(response)
) # correct_json only checks for errors

if cfg.debug_mode:
print(f"Risk evaluator response object: {response_object}")

return response_object["calculated_risk"], response_object["reason"]