diff --git a/README.md b/README.md index 9198cd8..bff8d4e 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@

- + . diff --git a/gpt_computer_agent/__init__.py b/gpt_computer_agent/__init__.py index 8a8d34f..9cc84d8 100644 --- a/gpt_computer_agent/__init__.py +++ b/gpt_computer_agent/__init__.py @@ -1,7 +1,288 @@ -from .start import start +try: + from .start import start -from .agentic import Agent + from .agentic import Agent -from .tooler import Tool + from .tooler import Tool +except: + pass +__version__ = '0.28.3' # fmt: skip -__version__ = '0.23.0' # fmt: skip + + + + +from .classes import BaseClass, BaseVerifier, TypeVerifier, Task + +import os +import time +import subprocess +import requests + + +from requests.packages.urllib3.exceptions import InsecureRequestWarning + +requests.packages.urllib3.disable_warnings(InsecureRequestWarning) + + + +class instance: + def __init__(self, url, tasks=None): + self.url = url + self.task = [] + for t in tasks: + self.add_task(t) + + + def request(self): + pass + + def add_task(self, task): + if isinstance(task, list): + for t in task: + self.task.append(t) + else: + self.task.append(task) + + for t in self.task: + t.add_client(self) + + def kick(self): + for t in self.task: + t.run() + + results = [] + for t in self.task: + results.append(t.result) + + return results + + + def run(self, task): + task.add_client(self) + task.run() + return task.result + + + def user_id(self): + from .utils.user_id import load_user_id + return load_user_id() + + +class interface: + pass + + + +class local_instance(instance): + def __init__(self, *args, **kwargs): + super().__init__("http://localhost:7541", *args, **kwargs) + from .remote import Remote_Client + + self.client = Remote_Client(self.url) + + def request(self, the_request, the_response, screen=False): + + return self.client.request(the_request, the_response, screen) + + + def start(self): + command = "python -c 'from gpt_computer_agent import start; start(True);'" + self.process = subprocess.Popen(command, shell=True) + + + def close(self): + try: + self.client.stop_server() + except: + pass + + self.process.terminate() + self.process.wait() + + + + def client_status(self): + return self.client.status + + + + +class local(interface): + + @staticmethod + def agent( *args, **kwargs): + the_instance = local_instance( *args, **kwargs) + the_instance.start() + + time.sleep(5) + + client_status = the_instance.client_status() + + if not client_status: + raise Exception("Failed to start the local instance") + + return the_instance + + + +class cloud_instance(instance): + def __init__(self, *args, **kwargs): + super().__init__("https://free_cloud_1.gca.khulnasoft.com/", *args, **kwargs) + + + def request(self, the_request, the_response, screen=False): + screen = "false" if not screen else "true" + + response = requests.post(self.url+"request", data={"request": the_request, "response": the_response, "screen":screen, "instance":self.instance_id}, verify=True) + json_response = response.json() + request_id = json_response["request_id"] + try: + while True: + response = requests.post(self.url+"request_result", data={"request_id": request_id}, verify=True) + the_json = response.json() + if the_json["status"] == True: + return the_json["result"] + time.sleep(1) + except: + return response.text + + + + def change_profile(self, profile): + response = requests.post(self.url+"change_profile", data={"profile": profile, "instance":self.instance_id}, verify=True) + the_json = response.json() + return the_json["result"] + + def add_system_message(self, system_message): + response = requests.post(self.url+"add_system_message", data={"system_message": system_message, "instance":self.instance_id}, verify=True) + the_json = response.json() + return the_json["result"] + + + def add_user_id(self, user_id): + response = requests.post(self.url+"add_user_id", data={"user_id": user_id, "instance":self.instance_id}, verify=True) + the_json = response.json() + return the_json["result"] + + def get_logs(self): + response = requests.post(self.url+"get_logs", data={"instance":self.instance_id}, verify=True) + the_json = response.json() + return the_json["result"] + + def reset_memory(self): + response = requests.post(self.url+"reset_memory", data={"instance":self.instance_id}, verify=True) + the_json = response.json() + return the_json["result"] + + def screenshot(self): + response = requests.post(self.url+"screenshot_instance", data={"instance":self.instance_id}, verify=True) + + its_an_error = False + + try: + the_json = response.json() + if "result" in the_json: + its_an_error = True + except: + pass + + + if not its_an_error: + with open('current_screenshot.png', 'wb') as file: + file.write(response.content) + import matplotlib.pyplot as plt + import matplotlib.image as mpimg + + img = mpimg.imread('current_screenshot.png') + plt.imshow(img) + plt.axis('off') + plt.show() + + + + + + def start(self): + req = requests.get(self.url+"start_instance", verify=True) + the_json = req.json() + + self.instance_id = the_json["result"] + self.add_user_id(self.user_id()) + + + + def close(self): + req = requests.post(self.url+"stop_instance", data={"instance": self.instance_id}, verify=True) + the_json = req.json() + return the_json["result"] + + def client_status(self): + return True + + + +class Cloud(interface): + + @staticmethod + def agent(*args, **kwargs): + start_time = time.time() + + the_instance = cloud_instance( *args, **kwargs) + the_instance.start() + time.sleep(1) + + end_time = time.time() + + print(f"Time to start the instance: {end_time - start_time}") + + return the_instance + + + + + + + +class docker_instance(instance): + def __init__(self, url, *args, **kwargs): + super().__init__(url, *args, **kwargs) + from .remote import Remote_Client + + self.client = Remote_Client(self.url) + + def request(self, the_request, the_response, screen=False): + + return self.client.request(the_request, the_response, screen) + + + def start(self): + pass + + + def close(self): + pass + + + + def client_status(self): + return self.client.status + + + + +class docker(interface): + + @staticmethod + def agent(url, *args, **kwargs): + the_instance = docker_instance(url, *args, **kwargs) + the_instance.start() + + + client_status = the_instance.client_status() + + if not client_status: + raise Exception("Failed to start the docker instance") + + return the_instance + \ No newline at end of file diff --git a/gpt_computer_agent/agent/agent.py b/gpt_computer_agent/agent/agent.py index 2bcf61c..df09c83 100644 --- a/gpt_computer_agent/agent/agent.py +++ b/gpt_computer_agent/agent/agent.py @@ -4,19 +4,26 @@ from ..llm_settings import llm_settings from ..tooler import * from ..display_tools import * + from ..cu.computer import * from ..teams import * from .agent_tools import get_tools + from ..mcp.tool import mcp_tools + from ..standard_tools import get_standard_tools + except ImportError: from llm import get_model from utils.db import * from llm_settings import llm_settings from tooler import * from display_tools import * + from cu.computer import * from teams import * from agent.agent_tools import get_tools + from mcp.tool import mcp_tools + from standard_tools import get_standard_tools -from langgraph.prebuilt import chat_agent_executor +from langgraph.prebuilt import create_react_agent custom_tools_ = [] @@ -44,7 +51,7 @@ def get_prompt(name): return prompt -def get_agent_executor(): +def get_agent_executor(the_anthropic_model=False, no_tools=False): tools = get_tools() tools += custom_tools() @@ -58,23 +65,24 @@ def get_agent_executor(): except ImportError: pass - if llm_settings[model]["provider"] == "openai": - tools += [ - click_on_a_text_on_the_screen, - click_on_a_icon_on_the_screen, - move_on_a_text_on_the_screen, - move_on_a_icon_on_the_screen, - mouse_scroll, - ] - - tools += [get_texts_on_the_screen] - - if ( - llm_settings[model]["provider"] == "openai" - or llm_settings[model]["provider"] == "groq" - ): - return chat_agent_executor.create_tool_calling_executor(get_model(), tools) - - if llm_settings[model]["provider"] == "ollama": - print("Ollama tool len", len(tools)) - return chat_agent_executor.create_tool_calling_executor(get_model(), tools) + + if the_anthropic_model: + tools += [] + if load_aws_access_key_id() == "default": + model_catch = get_model(the_model="claude-3-5-sonnet-20241022") + else: + model_catch = get_model(the_model="us.anthropic.claude-3-5-sonnet-20241022-v2:0") + + print("Anthropic model catch", model_catch) + print("Anthropic tools len", len(tools)) + return create_react_agent(model_catch, tools) + else: + tools += [mouse_scroll, click_to_text, click_to_icon, click_to_area] + mcp_tools() + get_standard_tools() + + + + if no_tools: + tools = [] + + + return create_react_agent(get_model(), tools) \ No newline at end of file diff --git a/gpt_computer_agent/agent/agent_tools.py b/gpt_computer_agent/agent/agent_tools.py index 875da1d..54db8cf 100644 --- a/gpt_computer_agent/agent/agent_tools.py +++ b/gpt_computer_agent/agent/agent_tools.py @@ -3,7 +3,7 @@ from ..tooler import * from ..display_tools import * from ..teams import * - from ..llm_settings import each_message_extension, llm_settings + from ..llm_settings import llm_settings except ImportError: from utils.db import * @@ -49,16 +49,4 @@ def get_tiger_tools(): def get_tools(): - model = load_model_settings() - - if not llm_settings[model]["tools"]: - return [] - - if is_online_tools_setting_active(): - tools = get_tiger_tools() - if not tools: - tools = load_default_tools() - else: - tools = load_default_tools() - - return tools + return [] \ No newline at end of file diff --git a/gpt_computer_agent/agent/chat_history.py b/gpt_computer_agent/agent/chat_history.py index 1013e34..1277060 100644 --- a/gpt_computer_agent/agent/chat_history.py +++ b/gpt_computer_agent/agent/chat_history.py @@ -4,10 +4,12 @@ try: from ..utils.db import get_history_db from ..utils.db import load_model_settings, agents - from ..llm_settings import each_message_extension, llm_settings + from ..llm_settings import llm_settings + from ..utils.chat_history import ChatHistory except ImportError: from utils.db import get_history_db from utils.db import load_model_settings + from utils.chat_history import ChatHistory from llm_settings import llm_settings @@ -24,8 +26,4 @@ def get_chat_message_history(): def clear_chat_history(): - get_chat_message_history().clear() - - the_model = load_model_settings() - if llm_settings[the_model]["tools"]: - get_chat_message_history().add_message(llm_history_oiginal()[0]) + ChatHistory().clear_chat() \ No newline at end of file diff --git a/gpt_computer_agent/agent/process.py b/gpt_computer_agent/agent/process.py index fb59f48..9a81695 100644 --- a/gpt_computer_agent/agent/process.py +++ b/gpt_computer_agent/agent/process.py @@ -1,22 +1,26 @@ try: from ..llm import * - from .assistant import * + from .agent import * from .chat_history import * from ..audio.tts import text_to_speech from ..audio.stt import speech_to_text - from ..audio.record import audio_data + from ..gui.signal import signal_handler from ..utils.db import * from ..utils.telemetry import my_tracer, os_name + from ..utils.user_id import load_user_id + from ..version import get_version except ImportError: from llm import * - from agent.assistant import * + from agent.agent import * from agent.chat_history import * from audio.tts import text_to_speech from audio.stt import speech_to_text from gui.signal import signal_handler from utils.db import * + from utils.user_id import load_user_id from utils.telemetry import my_tracer, os_name + from version import get_version import threading @@ -53,7 +57,7 @@ def tts_if_you_can( if first_control or bypass_other_settings: response_path = text_to_speech(text) if status_edit: - signal_handler.assistant_response_ready.emit() + signal_handler.agent_response_ready.emit() def play_audio(): for each_r in response_path: @@ -71,7 +75,7 @@ def play_audio(): break time.sleep(0.1) if status_edit: - signal_handler.assistant_response_stopped.emit() + signal_handler.agent_response_stopped.emit() if not not_threaded: playback_thread = threading.Thread(target=play_audio) @@ -88,9 +92,11 @@ def process_audio(take_screenshot=True, take_system_audio=False, dont_save_image span.set_attribute("user_id", user_id) span.set_attribute("os_name", os_name_) try: + from ..audio.record import audio_data global audio_data, last_ai_response from ..gpt_computer_agent import the_input_box, the_main_window - from ..audio.record import audio_data, the_input_box_pre + from ..audio.record import audio_data + from ..audio.input_box import the_input_box_pre transcription = speech_to_text(mic_record_location) @@ -120,9 +126,8 @@ def process_audio(take_screenshot=True, take_system_audio=False, dont_save_image print("LLM INPUT (screenshot)", llm_input) - llm_output = assistant( + llm_output = agent( llm_input, - get_chat_message_history().messages, get_client(), screenshot_path=screenshot_path if take_screenshot else None, dont_save_image=dont_save_image, @@ -142,7 +147,7 @@ def process_audio(take_screenshot=True, take_system_audio=False, dont_save_image the_main_window.set_text_to_input_box(last_ai_response) the_main_window.complated_answer = True - signal_handler.assistant_response_ready.emit() + signal_handler.agent_response_ready.emit() def play_text(): from ..gpt_computer_agent import the_main_window @@ -156,7 +161,7 @@ def play_text(): the_main_window.read_part_task() if the_main_window.stop_talking: the_main_window.stop_talking = False - signal_handler.assistant_response_stopped.emit() + signal_handler.agent_response_stopped.emit() playback_thread = threading.Thread(target=play_text) playback_thread.start() @@ -165,9 +170,11 @@ def play_text(): traceback.print_exc() from ..gpt_computer_agent import the_input_box, the_main_window - the_main_window.update_from_thread("EXCEPTION: " + str(e)) + exception_str = traceback.format_exc() + + the_main_window.update_from_thread("EXCEPTION: " + str(exception_str)) tts_if_you_can("Exception occurred. Please check the logs.") - signal_handler.assistant_response_stopped.emit() + signal_handler.agent_response_stopped.emit() def process_screenshot(): @@ -177,7 +184,8 @@ def process_screenshot(): try: global last_ai_response from ..gpt_computer_agent import the_input_box, the_main_window - from ..audio.record import the_input_box_pre + + from ..audio.input_box import the_input_box_pre llm_input = "I just take a screenshot. for you to remember. Just say 'Ok.' if the user doesnt want anything before." @@ -195,12 +203,12 @@ def process_screenshot(): "Transciption Completed. Running AI..." ) - llm_output = assistant( + llm_output = agent( llm_input, - get_chat_message_history().messages, get_client(), screenshot_path=just_screenshot_path, dont_save_image=False, + just_screenshot=True, ) last_ai_response = llm_output.replace("", "") @@ -214,7 +222,7 @@ def process_screenshot(): the_main_window.set_text_to_input_box(last_ai_response) the_main_window.complated_answer = True - signal_handler.assistant_response_ready.emit() + signal_handler.agent_response_ready.emit() def play_text(): from ..gpt_computer_agent import the_main_window @@ -228,7 +236,7 @@ def play_text(): the_main_window.read_part_task() if the_main_window.stop_talking: the_main_window.stop_talking = False - signal_handler.assistant_response_stopped.emit() + signal_handler.agent_response_stopped.emit() playback_thread = threading.Thread(target=play_text) playback_thread.start() @@ -238,9 +246,11 @@ def play_text(): traceback.print_exc() from ..gpt_computer_agent import the_input_box, the_main_window - the_main_window.update_from_thread("EXCEPTION: " + str(e)) + exception_str = traceback.format_exc() + + the_main_window.update_from_thread("EXCEPTION: " + str(exception_str)) tts_if_you_can("Exception occurred. Please check the logs.") - signal_handler.assistant_response_stopped.emit() + signal_handler.agent_response_stopped.emit() def process_text(text, screenshot_path=None): @@ -252,9 +262,8 @@ def process_text(text, screenshot_path=None): llm_input = text - llm_output = assistant( + llm_output = agent( llm_input, - get_chat_message_history().messages, get_client(), screenshot_path=screenshot_path, dont_save_image=True, @@ -270,7 +279,7 @@ def process_text(text, screenshot_path=None): the_main_window.set_text_to_input_box(last_ai_response) the_main_window.complated_answer = True - signal_handler.assistant_response_ready.emit() + signal_handler.agent_response_ready.emit() def play_text(): from ..gpt_computer_agent import the_main_window @@ -284,7 +293,7 @@ def play_text(): the_main_window.read_part_task() if the_main_window.stop_talking: the_main_window.stop_talking = False - signal_handler.assistant_response_stopped.emit() + signal_handler.agent_response_stopped.emit() playback_thread = threading.Thread(target=play_text) playback_thread.start() @@ -294,6 +303,53 @@ def play_text(): traceback.print_exc() from ..gpt_computer_agent import the_main_window - the_main_window.update_from_thread("EXCEPTION: " + str(e)) + exception_str = traceback.format_exc() + + the_main_window.update_from_thread("EXCEPTION: " + str(exception_str)) tts_if_you_can("Exception occurred. Please check the logs.") - signal_handler.assistant_response_stopped.emit() + signal_handler.agent_response_stopped.emit() + + + + + + +import sentry_sdk +sentry_sdk.init( + dsn="https://eed76b3c8eb23bbe1c2f6a796a03f1a9@o4508336623583232.ingest.us.sentry.io/4508556319195136", + # Set traces_sample_rate to 1.0 to capture 100% + # of transactions for tracing. + traces_sample_rate=1.0, + release=f"gcs@{get_version()}", + server_name="gca_client", +) + +def process_text_api(text, screenshot_path=None): + with my_tracer.start_span("process_text_api") as span: + span.set_attribute("user_id", user_id) + span.set_attribute("os_name", os_name_) + try: + global last_ai_response + + llm_input = text + + + sentry_sdk.set_user({"id": load_user_id()}) + + + sentry_sdk.profiler.start_profiler() + llm_output = agent( + llm_input, + get_client(), + screenshot_path=screenshot_path, + dont_save_image=True, + ) + sentry_sdk.profiler.stop_profiler() + + return llm_output + + + except Exception as e: + print("Error in process_text", e) + traceback.print_exc() + \ No newline at end of file diff --git a/gpt_computer_agent/api.py b/gpt_computer_agent/api.py index abd1897..3ac7947 100644 --- a/gpt_computer_agent/api.py +++ b/gpt_computer_agent/api.py @@ -3,9 +3,10 @@ from flask import Flask, request, jsonify import threading import time - +import pyautogui from werkzeug.serving import make_server +from waitress import serve app = Flask(__name__) @@ -14,6 +15,33 @@ def status(): return jsonify({"response": True}) + + +def the_input(text, screen, talk): + print("Input:", text) + + from .agent.process import process_text_api + from .utils.db import ( + screenshot_path, + ) + + + + if screen != "true": + result = process_text_api(text, None) + else: + screenshot = pyautogui.screenshot() + screenshot.save(screenshot_path) + result = process_text_api(text, screenshot_path) + + + + + + return jsonify({"response": result}) + + + @app.route("/input", methods=["POST"]) def input(): """ @@ -23,36 +51,31 @@ def input(): text = data["text"] screen = data["screen"] talk = data["talk"] - print("Input:", text) - from .gpt_computer_agent import the_main_window, the_input_box - firsst_text = the_input_box.toPlainText() + return the_input(text, screen, talk) - original_tts = the_main_window.tts_available - if talk == "true": - the_main_window.tts_available = True - the_main_window.manuel_stop = True - if screen != "true": - the_main_window.button_handler.input_text(text) - else: - the_main_window.button_handler.input_text_screenshot(text) - while the_input_box.toPlainText() == firsst_text: - time.sleep(0.3) - while the_input_box.toPlainText().startswith("System:"): - time.sleep(0.3) +@app.route("/request", methods=["POST"]) +def the_request(): + """ + This function receives input from the user and returns the response. + """ + data = request.json + the_request = data["request"] + the_response = data["response"] + if "screen" in data: + screen = data["screen"] + else: + screen = "false" - while not the_main_window.state == "idle": - time.sleep(0.3) + combined = the_request + "\n" + the_response - response = the_input_box.toPlainText() + return the_input(combined, screen, "false") - the_main_window.tts_available = original_tts - return jsonify({"response": response}) @app.route("/screenshot", methods=["POST"]) @@ -113,9 +136,7 @@ def profile(): from .utils.db import set_profile set_profile(profile) - from .gpt_computer_agent import the_main_window - the_main_window.update_from_thread("Profile set to " + profile) return jsonify({"response": "Profile set to " + profile}) @@ -127,9 +148,7 @@ def reset_memory(): from .agent.chat_history import clear_chat_history clear_chat_history() - from .gpt_computer_agent import the_main_window - the_main_window.update_from_thread("Memory reset") return jsonify({"response": "Memory reset"}) @@ -141,9 +160,7 @@ def enable_predefined_agents(): from .utils.db import activate_predefined_agents_setting activate_predefined_agents_setting() - from .gpt_computer_agent import the_main_window - the_main_window.update_from_thread("Predefined agents enabled") return jsonify({"response": "Predefined agents enabled"}) @@ -155,9 +172,7 @@ def disable_predefined_agents(): from .utils.db import deactivate_predefined_agents_setting deactivate_predefined_agents_setting() - from .gpt_computer_agent import the_main_window - the_main_window.update_from_thread("Predefined agents disabled") return jsonify({"response": "Predefined agents disabled"}) @@ -169,9 +184,7 @@ def enable_online_tools(): from .utils.db import activate_online_tools_setting activate_online_tools_setting() - from .gpt_computer_agent import the_main_window - the_main_window.update_from_thread("Online tools enabled") return jsonify({"response": "Online tools enabled"}) @@ -183,9 +196,7 @@ def disable_online_tools(): from .utils.db import deactivate_online_tools_setting deactivate_online_tools_setting() - from .gpt_computer_agent import the_main_window - the_main_window.update_from_thread("Online tools disabled") return jsonify({"response": "Online tools disabled"}) @@ -409,6 +420,67 @@ def save_openai_api_key(): return jsonify({"response": "OpenAI API key saved."}) +@app.route("/save_user_id", methods=["POST"]) +def save_user_id(): + """ + This api saves the user id + """ + data = request.json + user_id = data["user_id"] + from .utils.db import change_user_id + + change_user_id(user_id) + return jsonify({"response": "User id changed."}) + + +@app.route("/save_aws_access_key_id", methods=["POST"]) +def save_aws_access_key_id(): + + data = request.json + aws_access_key_id = data["aws_access_key_id"] + from .utils.db import save_aws_access_key_id + + save_aws_access_key_id(aws_access_key_id) + return jsonify({"response": "aws_access_key_id key saved."}) + + +@app.route("/save_aws_secret_access_key", methods=["POST"]) +def save_aws_secret_access_key(): + + data = request.json + aws_secret_access_key = data["aws_secret_access_key"] + from .utils.db import save_aws_secret_access_key + + save_aws_secret_access_key(aws_secret_access_key) + return jsonify({"response": "aws_secret_access_key key saved."}) + + +@app.route("/save_system_prompt", methods=["POST"]) +def save_system_prompt(): + """ + This api saves the prompt + """ + data = request.json + prompt = data["prompt"] + from .utils.db import save_system_prompt + + save_system_prompt(prompt) + return jsonify({"response": "prompt saved."}) + +@app.route("/save_anthropic_api_key", methods=["POST"]) +def save_anthropic_api_key(): + """ + This api saves the + """ + data = request.json + anthropic_api_key = data["anthropic_api_key"] + from .utils.db import save_anthropic_api_key + + save_anthropic_api_key(anthropic_api_key) + return jsonify({"response": "Anthropic API key saved."}) + + + @app.route("/save_openai_url", methods=["POST"]) def save_openai_url(): """ @@ -422,6 +494,20 @@ def save_openai_url(): return jsonify({"response": "OpenAI base URL saved."}) + + +@app.route("/save_api_version", methods=["POST"]) +def save_api_version(): + """ + This api saves the OpenAI base URL + """ + data = request.json + api_version = data["api_version"] + from .utils.db import save_api_version + + save_api_version(api_version) + return jsonify({"response": "API version saved."}) + @app.route("/save_model_settings", methods=["POST"]) def save_model_settings(): """ @@ -602,6 +688,15 @@ def get_openai_models(): return jsonify({"response": get_openai_models()}) +@app.route("/get_azureai_models", methods=["POST"]) +def get_azureai_models(): + """ + This api returns the list of Azure AI models + """ + from .llm_settings import get_azureai_models + + return jsonify({"response": get_azureai_models()}) + @app.route("/get_ollama_models", methods=["POST"]) def get_ollama_models(): @@ -633,6 +728,76 @@ def get_groq_models(): return jsonify({"response": get_groq_models()}) + + + + + + + +@app.route("/mouse_scroll_down", methods=["POST"]) +def mouse_scroll_down(): + + data = request.json + amount = data["amount"] + + from .display_tools import mouse_scroll_ + mouse_scroll_("down", amount) + return jsonify({"response": f"Mouse scrolled down by {amount}"}) +@app.route("/mouse_scroll_up", methods=["POST"]) +def mouse_scroll_up(): + + data = request.json + amount = data["amount"] + + from .display_tools import mouse_scroll_ + mouse_scroll_("up", amount) + return jsonify({"response": f"Mouse scrolled up by {amount}"}) + + + + + + + + +@app.route("/add_mcp", methods=["POST"]) +def add_mcp(): + + data = request.json + name = data["name"] + command = data["command"] + args = data["args"] + from .mcp.tool import add_custom_mcp_server + + add_custom_mcp_server(name, command, args) + return jsonify({"response": "MCP added."}) + + + + +@app.route("/stop_server", methods=["POST"]) +def stop_server(): + + + try: + try: + from .gpt_computer_agent import the_main_window + the_main_window.close() + except ImportError: + from gpt_computer_agent import the_main_window + the_main_window.close() + except: + pass + + + + stop_api() + exit(0) + + + + class ServerThread(threading.Thread): def __init__(self, app, host, port): threading.Thread.__init__(self) @@ -652,15 +817,18 @@ def shutdown(self): server_thread = None -def start_api(): - global server_thread - if server_thread is None: - server_thread = ServerThread(app, "localhost", 7541) - server_thread.start() - print("API started") - else: - print("API is already running") +def start_api(api=False): + if api == False: + global server_thread + if server_thread is None: + server_thread = ServerThread(app, "0.0.0.0", 7541) + server_thread.start() + print("API started") + else: + print("API is already running") + else: + serve(app, host="0.0.0.0", port=7541) def stop_api(): global server_thread @@ -670,4 +838,4 @@ def stop_api(): server_thread = None print("API stopped") else: - print("API is not running") + print("API is not running") \ No newline at end of file diff --git a/gpt_computer_agent/audio/record.py b/gpt_computer_agent/audio/record.py index 81dfa4b..3b9c236 100644 --- a/gpt_computer_agent/audio/record.py +++ b/gpt_computer_agent/audio/record.py @@ -2,10 +2,12 @@ from ..gui.signal import * from ..utils.db import * from ..utils.telemetry import my_tracer, os_name + from ..gpt_computer_agent import the_input_box except ImportError: from gui.signal import * from utils.db import * from utils.telemetry import my_tracer, os_name + from gpt_computer_agent import the_input_box import numpy as np import sounddevice as sd import soundfile as sf @@ -24,7 +26,7 @@ user_id = load_user_id() os_name_ = os_name() -the_input_box_pre = "" + import queue @@ -156,4 +158,4 @@ def quick_speech_to_text(time_total: int = 5) -> str: except ImportError: from stt import speech_to_text - return speech_to_text(quic_location) + return speech_to_text(quic_location) \ No newline at end of file diff --git a/gpt_computer_agent/classes.py b/gpt_computer_agent/classes.py new file mode 100644 index 0000000..de63880 --- /dev/null +++ b/gpt_computer_agent/classes.py @@ -0,0 +1,334 @@ +import time +import traceback +import json +import re +import hashlib + +# Rich imports +from rich.console import Console +from rich.panel import Panel +from rich.style import Style + +from .remote import Remote_Client +from .version import get_version + + +import sentry_sdk +sentry_sdk.init( + dsn="https://eed76b3c8eb23bbe1c2f6a796a03f1a9@o4508336623583232.ingest.us.sentry.io/4508556319195136", + # Set traces_sample_rate to 1.0 to capture 100% + # of transactions for tracing. + traces_sample_rate=1.0, + release=f"gcs@{get_version()}", + server_name="gca_framework", +) +from .utils.user_id import load_user_id +sentry_sdk.set_user({"id": load_user_id()}) + + + + + + + + +# Create a global Console object for styled output +console = Console() + + +def extract_json(llm_output): + + # Use regex to extract the json then transform it to a python dict object ```json ```` + json_str = re.search(r'```json\n(.*?)```', llm_output, re.DOTALL).group(1) + + + # transform the json string to a python dict object + transformed_json = json.loads(json_str) + + + + return transformed_json + + + +def current_date_time(): + return time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) + + + + +class BaseClass: + def __init__(self, screen=False): + self.screen_task = screen + + def add_client(self, client: Remote_Client): + + self.client = client + + if hasattr(self, "verifier"): + if self.verifier: + self.verifier.screen_task = self.screen_task + self.verifier.add_client(client) + + def add_task(self, task): + self.task = task + + + + + def sha_hash(self, text): + + return hashlib.sha256(text.encode()).hexdigest() + + + +class BaseVerifier(BaseClass): + def __init__(self, try_count=5, exception_return=None, *args, **kwargs): + + super().__init__(*args, **kwargs) + self.try_count = try_count + self.exception_return = exception_return + + +class TypeVerifier(BaseVerifier): + def __init__(self, type, *args, **kwargs): + + super().__init__(*args, **kwargs) + self.type = type + self.feedback = None + + + def verify(self, description, result): + with sentry_sdk.start_transaction(op="task", name="Verify"): + console.print( + Panel( + "[bold yellow]Verifying result with TypeVerifier...[/bold yellow]", + title="Verifier", + style=Style(color="bright_white", bgcolor="black", bold=True) + ) + ) + + console.print(f"[bold]Expected type:[/bold] [green]{self.type}[/green]\n") + + the_ai_result_if_we_have = "" + if not result.startswith("No response"): + the_ai_result_if_we_have = f"AI Result:\n{result}\n" + + control_point_span = sentry_sdk.start_span(name="Control Point") + + the_request = f""" + User Request: + {description} + + {the_ai_result_if_we_have} + + + Now critically analyze the result of the task you just completed. + +Rules: +- If the result is not verified , no problem its enough to "I am satified" to continue to the next task. +- If the result is not true give an resolve cookbook as Feedback to resolve. + + + + Getting current state: + - See the screen (Optional, If the ai output is not enough) + - Read the history of conversation to understand the context of the task. + + + If the result not true, respond onyl with “I am sorry” and "Reason:" and "Feedback to resolve:" to trigger a retry. + + If the result is true, respond with "I am satisfied" to continue to the next task. + + Current Date Time: {current_date_time()} + """ + + control_point_span.set_data("request", the_request) + control_point_span.set_data("screen", self.screen_task) + control_point = self.client.request( + the_request + , "", screen=self.screen_task) + + control_point_span.finish() + + if "I am sorry" in control_point: + self.feedback = control_point + + raise Exception(f"Not satisfied with the result {self.task.description}") + + + + prompt = """ + Hi, now your responsibility is returning the answer in the requested format. + + User only wants the result in the format of """f"""{self.type}"""+""". + Dont use any other format or any other type of data. + + Format recipe: + 1. list + Return the user want like this: + ```json + ["element1", "element2", "element3"] + ``` + + 2. dict + Return the user want like this: + ```json + {"key1": "value1", "key2": "value2", "key3": "value3"} + ``` + + 3. list in list + Return the user want like this: + ```json + [["element1", "element2", "element3"], ["element4", "element5", "element6"]] + ``` + + 4. dict in list + Return the user want like this: + ```json + [{"key1": "value1", "key2": "value2", "key3": "value3"}, {"key4": "value4", "key5": "value5", "key6": "value6"}] + ``` + + 5. string + Return the user want like this: + ```json + "This is a string" + ``` + + 6. integer + Return the user want like this: + ```json + 123 + ``` + + 7. float + Return the user want like this: + ```json + 123.456 + ``` + + 8. bool + Return the user want like this: + ```json + true + ``` + + + + End of the day return result in ```json ``` format. + """ + + + + self.client.change_profile(self.task.hash) + + + + + extracting_output_span = sentry_sdk.start_span(name="Extracting Output") + control_point_span.set_data("extracting_output_span", extracting_output_span) + result = self.client.request(prompt, "", screen=self.screen_task) + extracting_output_span.finish() + + + + + + result = extract_json(result) + return result + + + + + + +class Task(BaseClass): + def __init__(self, description, verifier: BaseVerifier = None, *args, **kwargs): + super().__init__(*args, **kwargs) + + self.description = description + self.output = "" + + self.verifier = verifier + if self.verifier: + self.verifier.add_task(self) + + self.result = None + + self.hash = self.sha_hash(description) + + + def run(self): + with sentry_sdk.start_transaction(op="task", name="Run"): + console.print( + Panel( + f"[bold green]Starting Task[/bold green]\n[b]Task Description:[/b] {self.description}", + title="Task", + style=Style(color="bright_white", bgcolor="black", bold=True) + ) + ) + + + requesting_ai_span = sentry_sdk.start_span(name="Requesting AI") + + requesting_ai_span.set_data("verifier", self.verifier) + + # Verify the result + if self.verifier: + + try_count = 0 + + while try_count < self.verifier.try_count: + try_count += 1 + + console.print( + Panel( + f"[yellow]Attempt {try_count}[/yellow] ", + title="Retry", + style=Style(color="bright_white", bgcolor="black", bold=True) + ) + ) + + if try_count > 1: + self.output = "User is not satisfied with the result. Please try again." if self.verifier.feedback is None else self.verifier.feedback + self.client.change_profile(self.hash) + + result = self.client.request(self.description, self.output, screen=self.screen_task) + + time.sleep(1) + ai_result = result + try: + self.client.change_profile(self.hash+"VERIFY") + result = self.verifier.verify(self.description, result) + console.print("[bold green]Verification successful![/bold green]\n") + break + except Exception as e: + console.print( + Panel( + f"[red]Verification failed[/red]\nAI Output: {ai_result}\nFeedback: {self.verifier.feedback}", + title="Verification Error", + style=Style(color="bright_white", bgcolor="black", bold=True) + ) + ) + + result = self.verifier.exception_return + + + else: + result = self.client.request(self.description, self.output, screen=self.screen_task) + + + requesting_ai_span.finish() + + + + console.print( + Panel( + "[bold green]Task Completed[/bold green]\n[bold]Final result ready.[/bold]", + title="Task Finished", + style=Style(color="bright_white", bgcolor="black", bold=True) + ) + ) + + self.result = result + return result \ No newline at end of file diff --git a/gpt_computer_agent/cu/ask_anthropic.py b/gpt_computer_agent/cu/ask_anthropic.py new file mode 100644 index 0000000..02d189a --- /dev/null +++ b/gpt_computer_agent/cu/ask_anthropic.py @@ -0,0 +1,140 @@ +import random +import traceback + + +from langchain_core.messages import HumanMessage, SystemMessage, AIMessage + + + + +try: + + from ..agent import get_agent_executor + from ..screen.shot import * + from ..utils.db import load_model_settings, agents + from ..llm import get_model + from ..llm_settings import llm_settings + from ..utils.chat_history import ChatHistory + from .computer import screenshot_action +except ImportError: + + from agent import get_agent_executor + from screen.shot import * + from utils.db import load_model_settings, agents + from llm import get_model + from llm_settings import llm_settings + from utils.chat_history import ChatHistory + from computer import screenshot_action + + + + + + +def ask_anthropic( + the_request:str +): + + try: + + from ..agent import get_agent_executor + except ImportError: + from agent import get_agent_executor + + + try: + print("ASK ANTHROPIC", the_request) + + + + llm_input = the_request + + print("LLM INPUT", llm_input) + + + + + human_first_message = {"type": "text", "text": f"{llm_input}"} + + + + the_message = [ + human_first_message + ] + + + + human_second_message = None + + + + base64_image = screenshot_action(direct_base64=True) + + + + + + human_second_message = { + "type": "image_url", + "image_url": {"url": f"data:image/png;base64,{base64_image}"}, + } + + + + print("LEN OF IMAGE", len(base64_image)) + + + if human_second_message: + the_message.append(human_second_message) + + + + + + the_message = HumanMessage(content=the_message) + + + + + + + + + + + msg = get_agent_executor(the_anthropic_model=True).invoke( + {"messages": [the_message]} + ) + + + + + + the_last_messages = msg["messages"] + + + + + + + + + + return_value = the_last_messages[-1].content + if isinstance(return_value, list): + the_text = "" + for each in return_value: + the_text += str(each) + return_value = the_text + + if return_value == "": + return_value = "No response " + + + + + + return return_value + + except Exception as e: + traceback.print_exc() \ No newline at end of file diff --git a/gpt_computer_agent/cu/base.py b/gpt_computer_agent/cu/base.py new file mode 100644 index 0000000..08c7d05 --- /dev/null +++ b/gpt_computer_agent/cu/base.py @@ -0,0 +1,69 @@ +from abc import ABCMeta, abstractmethod +from dataclasses import dataclass, fields, replace +from typing import Any + +from anthropic.types.beta import BetaToolUnionParam + + +class BaseAnthropicTool(metaclass=ABCMeta): + """Abstract base class for Anthropic-defined tools.""" + + @abstractmethod + def __call__(self, **kwargs) -> Any: + """Executes the tool with the given arguments.""" + ... + + @abstractmethod + def to_params( + self, + ) -> BetaToolUnionParam: + raise NotImplementedError + + +@dataclass(kw_only=True, frozen=True) +class ToolResult: + """Represents the result of a tool execution.""" + + output: str | None = None + error: str | None = None + base64_image: str | None = None + system: str | None = None + + def __bool__(self): + return any(getattr(self, field.name) for field in fields(self)) + + def __add__(self, other: "ToolResult"): + def combine_fields( + field: str | None, other_field: str | None, concatenate: bool = True + ): + if field and other_field: + if concatenate: + return field + other_field + raise ValueError("Cannot combine tool results") + return field or other_field + + return ToolResult( + output=combine_fields(self.output, other.output), + error=combine_fields(self.error, other.error), + base64_image=combine_fields(self.base64_image, other.base64_image, False), + system=combine_fields(self.system, other.system), + ) + + def replace(self, **kwargs): + """Returns a new ToolResult with the given fields replaced.""" + return replace(self, **kwargs) + + +class CLIResult(ToolResult): + """A ToolResult that can be rendered as a CLI output.""" + + +class ToolFailure(ToolResult): + """A ToolResult that represents a failure.""" + + +class ToolError(Exception): + """Raised when a tool encounters an error.""" + + def __init__(self, message): + self.message = message \ No newline at end of file diff --git a/gpt_computer_agent/cu/computer.py b/gpt_computer_agent/cu/computer.py new file mode 100644 index 0000000..680feb9 --- /dev/null +++ b/gpt_computer_agent/cu/computer.py @@ -0,0 +1,298 @@ +try: + from ..utils.db import * + from ..llm import get_model + from ..top_bar_wrapper import wrapper + from ..llm_settings import llm_settings +except ImportError: + from utils.db import * + from top_bar_wrapper import wrapper + from llm_settings import llm_settings + +from langchain.tools import tool + + +import base64 +import math +import os +import platform +import shlex +import shutil +import tempfile +import time +from strenum import StrEnum +from pathlib import Path +from typing import Literal, TypedDict +from uuid import uuid4 +import pyautogui +from anthropic.types.beta import BetaToolComputerUse20241022Param +from PIL import Image + +from .base import BaseAnthropicTool, ToolError, ToolResult +from .run import run + +OUTPUT_DIR = "/tmp/outputs" + +TYPING_DELAY_MS = 12 +TYPING_GROUP_SIZE = 50 + +Action = Literal[ + "key", + "type", + "mouse_move", + "left_click", + "left_click_drag", + "right_click", + "middle_click", + "double_click", + "screenshot", + "cursor_position", +] + + +class Resolution(TypedDict): + width: int + height: int + + +MAX_SCALING_TARGETS: dict[str, Resolution] = { + "XGA": Resolution(width=1024, height=768), + "WXGA": Resolution(width=1280, height=800), + "FWXGA": Resolution(width=1366, height=768), +} + + +class ScalingSource(StrEnum): + COMPUTER = "computer" + API = "api" + + +class ComputerToolOptions(TypedDict): + display_height_px: int + display_width_px: int + display_number: int | None + + +def chunks(s: str, chunk_size: int) -> list[str]: + return [s[i : i + chunk_size] for i in range(0, len(s), chunk_size)] + + +def smooth_move_to(x, y, duration=1.2): + + pyautogui.moveTo(x, y) + + +def key_action(text: str): + if platform.system() == "Darwin": + text = text.replace("super+", "command+") + + def normalize_key(key): + key = key.lower().replace("_", "") + key_map = { + "pagedown": "pgdn", + "pageup": "pgup", + "enter": "return", + "return": "enter", + } + return key_map.get(key, key) + + keys = [normalize_key(k) for k in text.split("+")] + + if len(keys) > 1: + if "darwin" in platform.system().lower(): + keystroke, modifier = (keys[-1], "+".join(keys[:-1])) + modifier = modifier.lower() + " down" + if keystroke.lower() == "space": + keystroke = " " + elif keystroke.lower() == "enter": + keystroke = "\n" + script = f""" + tell application "System Events" + keystroke "{keystroke}" using {modifier} + end tell + """ + os.system("osascript -e '{}'".format(script)) + else: + pyautogui.hotkey(*keys) + else: + pyautogui.press(keys[0]) + + +def type_action(text: str): + pyautogui.write(text, interval=TYPING_DELAY_MS / 1000) + + +def click_action(action: str): + time.sleep(0.1) + button = { + "left_click": "left", + "right_click": "right", + "middle_click": "middle", + } + if action == "double_click": + pyautogui.click() + time.sleep(0.1) + pyautogui.click() + else: + pyautogui.click(button=button.get(action, "left")) + + +def screenshot_action(direct_base64: bool = False) -> ToolResult: + """ + See the screenshot of the current screen. + """ + + + temp_dir = Path(tempfile.gettempdir()) + path = temp_dir / f"screenshot_{uuid4().hex}.png" + + screenshot = pyautogui.screenshot() + screenshot.save(str(path)) + + if _scaling_enabled: + x, y = scale_coordinates(ScalingSource.COMPUTER, width, height) + print(f"Scaling screenshot to {x}x{y}") + with Image.open(path) as img: + img = img.resize((x, y), Image.Resampling.LANCZOS) + img.save(path) + + if path.exists(): + + with open(path, "rb") as image_file: + base64_image = base64.b64encode(image_file.read()).decode("utf-8") + + path.unlink() + if direct_base64: + return base64_image + return ToolResult(base64_image=base64_image) + raise ToolError(f"Failed to take screenshot") + + +def screenshot_action_(path): + + + screenshot = pyautogui.screenshot() + screenshot.save(str(path)) + + if _scaling_enabled: + x, y = scale_coordinates(ScalingSource.COMPUTER, width, height) + print(f"Scaling screenshot to {x}x{y}") + with Image.open(path) as img: + img = img.resize((x, y), Image.Resampling.LANCZOS) + img.save(path) + + + + +def cursor_position_action(): + """ + Get the current position of the cursor as (x, y). + """ + + x, y = pyautogui.position() + x, y = scale_coordinates(ScalingSource.COMPUTER, x, y) + return ToolResult(output=f"X={x},Y={y}") + + +def shell_action(command: str, take_screenshot=True) -> ToolResult: + """ + Run a shell command and return the output. + """ + _, stdout, stderr = run(command) + base64_image = None + + if take_screenshot: + time.sleep(_screenshot_delay) + base64_image = screenshot_action().base64_image + + return ToolResult(output=stdout, error=stderr, base64_image=base64_image) + + +def scale_coordinates(source: ScalingSource, x: int, y: int): + if not _scaling_enabled: + return x, y + ratio = width / height + target_dimension = None + for dimension in MAX_SCALING_TARGETS.values(): + if abs(dimension["width"] / dimension["height"] - ratio) < 0.02: + if dimension["width"] < width: + target_dimension = dimension + break + if target_dimension is None: + return x, y + x_scaling_factor = target_dimension["width"] / width + y_scaling_factor = target_dimension["height"] / height + if source == ScalingSource.API: + if x > width or y > height: + raise ToolError(f"Coordinates {x}, {y} are out of bounds") + return round(x / x_scaling_factor), round(y / y_scaling_factor) + return round(x * x_scaling_factor), round(y * y_scaling_factor) + + +@wrapper +def mouse_move_action(coordinate: tuple[int, int]): + """Move the mouse to the specified coordinate.""" + if coordinate is None: + raise ToolError("coordinate is required for mouse_move") + x, y = scale_coordinates(ScalingSource.API, coordinate[0], coordinate[1]) + smooth_move_to(x, y) + +@wrapper +def left_click_drag_action(coordinate: tuple[int, int]): + """Perform a left click and drag to the specified coordinate.""" + if coordinate is None: + raise ToolError("coordinate is required for left_click_drag") + x, y = scale_coordinates(ScalingSource.API, coordinate[0], coordinate[1]) + smooth_move_to(x, y) + pyautogui.dragTo(x, y, button="left") + +@wrapper +def key_action_handler(text: str): + """Press a specific key.""" + if text is None: + raise ToolError("text is required for key") + key_action(text) + +@wrapper +def type_action_handler(text: str): + """Type the specified text.""" + if text is None: + raise ToolError("text is required for type") + type_action(text) + +@wrapper +def left_click_action(): + """Perform a left click.""" + click_action("left_click") + +@wrapper +def right_click_action(): + """Perform a right click.""" + click_action("right_click") + +@wrapper +def middle_click_action(): + """Perform a middle click.""" + click_action("middle_click") + +@wrapper +def double_click_action(): + """Perform a double click.""" + click_action("double_click") + + + + + + + + +# Initialize global variables +width, height = pyautogui.size() + +display_num = None +_screenshot_delay = 2.0 +_scaling_enabled = True + + + +computer_tool = [tool(mouse_move_action), tool(left_click_drag_action), tool(key_action_handler), tool(type_action_handler), tool(left_click_action), tool(right_click_action), tool(middle_click_action), tool(double_click_action), tool(screenshot_action), tool(cursor_position_action), tool(shell_action)] \ No newline at end of file diff --git a/gpt_computer_agent/cu/run.py b/gpt_computer_agent/cu/run.py new file mode 100644 index 0000000..0bf447b --- /dev/null +++ b/gpt_computer_agent/cu/run.py @@ -0,0 +1,42 @@ +"""Utility to run shell commands asynchronously with a timeout.""" + +import asyncio + +TRUNCATED_MESSAGE: str = "To save on context only part of this file has been shown to you. You should retry this tool after you have searched inside the file with `grep -n` in order to find the line numbers of what you are looking for." +MAX_RESPONSE_LEN: int = 16000 + + +def maybe_truncate(content: str, truncate_after: int | None = MAX_RESPONSE_LEN): + """Truncate content and append a notice if content exceeds the specified length.""" + return ( + content + if not truncate_after or len(content) <= truncate_after + else content[:truncate_after] + TRUNCATED_MESSAGE + ) + + +async def run( + cmd: str, + timeout: float | None = 120.0, # seconds + truncate_after: int | None = MAX_RESPONSE_LEN, +): + """Run a shell command asynchronously with a timeout.""" + process = await asyncio.create_subprocess_shell( + cmd, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE + ) + + try: + stdout, stderr = await asyncio.wait_for(process.communicate(), timeout=timeout) + return ( + process.returncode or 0, + maybe_truncate(stdout.decode(), truncate_after=truncate_after), + maybe_truncate(stderr.decode(), truncate_after=truncate_after), + ) + except asyncio.TimeoutError as exc: + try: + process.kill() + except ProcessLookupError: + pass + raise TimeoutError( + f"Command '{cmd}' timed out after {timeout} seconds" + ) from exc \ No newline at end of file diff --git a/gpt_computer_agent/display_tools.py b/gpt_computer_agent/display_tools.py index 3d302cb..c37e8b5 100644 --- a/gpt_computer_agent/display_tools.py +++ b/gpt_computer_agent/display_tools.py @@ -1,242 +1,267 @@ +import json +import re from langchain.tools import tool import traceback +import pyautogui +import time + try: - from .utils.db import load_api_key + from .utils.db import * from .llm import get_model from .top_bar_wrapper import wrapper + from .llm_settings import llm_settings + except ImportError: - from utils.db import load_api_key + from utils.db import * from top_bar_wrapper import wrapper + from llm_settings import llm_settings + + + + + + -@wrapper -def click_on_a_text_on_the_screen_(text: str, click_type: str = "singular") -> bool: + + + + +def mouse_scroll_(direction: str, amount: int = 1) -> bool: """ - A function to click on a text on the screen. + A function to scroll the mouse wheel. Parameters: - - text (str): The text to be clicked on. - - click_type (str): The type of click to be performed. The default value is "singular". Possible values are "singular" and "double". + - direction (str): The direction of the scroll. Possible values are "up" and "down". + - amount (int): The amount of scrolling to be performed. The default value is 1. Returns: - - bool: True if the text was clicked on successfully, False otherwise. + - bool: True if the scrolling was performed successfully, False otherwise. """ try: import pyautogui pyautogui.FAILSAFE = False - from interpreter import OpenInterpreter + if direction == "up": + pyautogui.scroll(amount) + elif direction == "down": + pyautogui.scroll(-amount) + return True + except: + traceback.print_exc() + return False - interpreter = OpenInterpreter() - interpreter.llm.api_key = load_api_key() +mouse_scroll = tool(mouse_scroll_) - screenshot = pyautogui.screenshot() - text_locations = interpreter.computer.display.find_text( - text, screenshot=screenshot - ) - print(text_locations) +def extract_code_from_result(llm_output): + """ + Extract the Python code from the LLM output. + """ + code_match = re.search(r'```json\n(.*?)```', llm_output, re.DOTALL) + if code_match: + return code_match.group(1).strip() + return llm_output.strip() - x, y = text_locations[0]["coordinates"] - x *= interpreter.computer.display.width - y *= interpreter.computer.display.height - x = int(x) - y = int(y) - if click_type == "singular": - interpreter.computer.mouse.click(x=x, y=y, screenshot=screenshot) - elif click_type == "double": - interpreter.computer.mouse.double_click(x=x, y=y, screenshot=screenshot) - return True - except: - traceback.print_exc() - return False +def click_to_text_(text:str, double_click=False) -> bool: + """ + Click on the text + + """ + + try: + from .cu.ask_anthropic import ask_anthropic + from .cu.computer import click_action, mouse_move_action + except ImportError: + from cu.ask_anthropic import ask_anthropic + from cu.computer import click_action, mouse_move_action + + print("click_to_text") + print("text", text) + x_y = ask_anthropic(f"dont use tools, give me exactly location of '{text}' text as json x,y like"+ """{'x': 0, 'y': 0}"""+". Only return the json with ```json ```") + print("result", x_y) + + x_y = extract_code_from_result(x_y) + + x_y = json.loads(x_y) -click_on_a_text_on_the_screen = tool(click_on_a_text_on_the_screen_) + pyautogui.click(x_y['x'], x_y['y'], button='left') + if double_click: + time.sleep(0.1) + pyautogui.click(x_y['x'], x_y['y'], button='left') + return True -@wrapper -def move_on_a_text_on_the_screen_(text: str) -> bool: + +click_to_text = tool(click_to_text_) + + + +def click_to_icon_(icon:str, double_click=False) -> bool: + """ + Click on the icon + """ - A function to move on a text on the screen. - Parameters: - - text (str): The text to be moved on. + try: + from .cu.ask_anthropic import ask_anthropic + from .cu.computer import click_action, mouse_move_action + except ImportError: + from cu.ask_anthropic import ask_anthropic + from cu.computer import click_action, mouse_move_action - Returns: - - bool: True if the text was moved on successfully, False otherwise. + print("click_to_icon") + print("icon", icon) + x_y = ask_anthropic(f"dont use tools, give me exactly location of '{icon}' icon as json x,y like"+ """{'x': 0, 'y': 0}"""+". Only return the json with ```json ```") + print("result", x_y) + + x_y = extract_code_from_result(x_y) + + x_y = json.loads(x_y) + + pyautogui.click(x_y['x'], x_y['y'], button='left') + if double_click: + time.sleep(0.1) + pyautogui.click(x_y['x'], x_y['y'], button='left') + + + return True + + +click_to_icon = tool(click_to_icon_) + + +def click_to_area_( + area:str, double_click=False +) -> bool: """ + Click on the area like search bar + """ + try: - import pyautogui + from .cu.ask_anthropic import ask_anthropic + from .cu.computer import click_action, mouse_move_action + except ImportError: + from cu.ask_anthropic import ask_anthropic + from cu.computer import click_action, mouse_move_action - pyautogui.FAILSAFE = False + print("click_to_area") + print("area", area) + x_y = ask_anthropic(f"dont use tools, give me exactly location of '{area}' area as json x,y like"+ """{'x': 0, 'y': 0}"""+". Only return the json with ```json ```") + print("result", x_y) - from interpreter import OpenInterpreter + x_y = extract_code_from_result(x_y) - interpreter = OpenInterpreter() + x_y = json.loads(x_y) - interpreter.llm.api_key = load_api_key() + pyautogui.click(x_y['x'], x_y['y'], button='left') + if double_click: + time.sleep(0.1) + pyautogui.click(x_y['x'], x_y['y'], button='left') - screenshot = pyautogui.screenshot() - text_locations = interpreter.computer.display.find_text( - text, screenshot=screenshot - ) + return True - print(text_locations) - x, y = text_locations[0]["coordinates"] - x *= interpreter.computer.display.width - y *= interpreter.computer.display.height - x = int(x) - y = int(y) - interpreter.computer.mouse.move(x=x, y=y, screenshot=screenshot) - return True - except: - traceback.print_exc() - return False +click_to_area = tool(click_to_area_) -move_on_a_text_on_the_screen = tool(move_on_a_text_on_the_screen_) -@wrapper -def click_on_a_icon_on_the_screen_( - icon_name: str, click_type: str = "singular" -) -> bool: +def screenshot_(checking:str): + """ + Returns the current screenshot. Explain what should we check on the screenshot. """ - A function to click on a icon name on the screen. - Parameters: - - icon_name (str): The icon name to be clicked on. - - click_type (str): The type of click to be performed. The default value is "singular". Possible values are "singular" and "double". + from langchain_core.messages import HumanMessage, SystemMessage, AIMessage - Returns: - - bool: True if the icon name was clicked on successfully, False otherwise. - """ try: - import pyautogui + from .cu.computer import screenshot_action + from .agent.agent import get_agent_executor + except ImportError: + from cu.computer import screenshot_action + from agent.agent import get_agent_executor - pyautogui.FAILSAFE = False + the_base64 = screenshot_action(direct_base64=True) - from interpreter import OpenInterpreter - screenshot = pyautogui.screenshot() - interpreter = OpenInterpreter() - interpreter.llm.api_key = load_api_key() - if click_type == "singular": - interpreter.computer.mouse.click(icon=icon_name, screenshot=screenshot) - elif click_type == "double": - interpreter.computer.mouse.double_click( - icon=icon_name, screenshot=screenshot - ) - return True - except: - traceback.print_exc() - return False + human_first_message = {"type": "text", "text": f"Explain the image and check '{checking}' on the image."} -click_on_a_icon_on_the_screen = tool(click_on_a_icon_on_the_screen_) + the_message = [ + human_first_message + ] -@wrapper -def move_on_a_icon_on_the_screen_( - icon_name: str, -) -> bool: - """ - A function to move on a icon name on the screen. - Parameters: - - icon_name (str): The icon name to be move on. - Returns: - - bool: True if the icon name was moved on successfully, False otherwise. - """ - try: - import pyautogui + human_second_message = { + "type": "image_url", + "image_url": {"url": f"data:image/png;base64,{the_base64}"}, + } - pyautogui.FAILSAFE = False - from interpreter import OpenInterpreter - screenshot = pyautogui.screenshot() - interpreter = OpenInterpreter() + the_message.append(human_second_message) - interpreter.llm.api_key = load_api_key() - interpreter.computer.mouse.move(icon=icon_name, screenshot=screenshot) - return True - except: - traceback.print_exc() - return False -move_on_a_icon_on_the_screen = tool(move_on_a_icon_on_the_screen_) + the_message = HumanMessage(content=the_message) -def mouse_scroll_(direction: str, amount: int = 1) -> bool: - """ - A function to scroll the mouse wheel. - Parameters: - - direction (str): The direction of the scroll. Possible values are "up" and "down". - - amount (int): The amount of scrolling to be performed. The default value is 1. - Returns: - - bool: True if the scrolling was performed successfully, False otherwise. - """ - try: - import pyautogui - pyautogui.FAILSAFE = False - if direction == "up": - pyautogui.scroll(amount) - elif direction == "down": - pyautogui.scroll(-amount) - return True - except: - traceback.print_exc() - return False + msg = get_agent_executor(no_tools=True).invoke( + {"messages": [the_message]} + ) -mouse_scroll = tool(mouse_scroll_) -@wrapper -def get_texts_on_the_screen_() -> str: - """ - It returns the texts on the screen. - """ - try: - pass - except: - pass + the_last_messages = msg["messages"] + + + + + + + + + + return_value = the_last_messages[-1].content + if isinstance(return_value, list): + the_text = "" + for each in return_value: + the_text += str(each) + return_value = the_text + + if return_value == "": + return_value = "No response " + - import pyautogui - the_screenshot_path = "temp_screenshot.png" - the_screenshot = pyautogui.screenshot() - the_screenshot.save(the_screenshot_path) - from interpreter.core.computer.utils.computer_vision import pytesseract_get_text + return return_value - return pytesseract_get_text(the_screenshot_path) -get_texts_on_the_screen = tool(get_texts_on_the_screen_) +screenshot = tool(screenshot_) \ No newline at end of file diff --git a/gpt_computer_agent/gpt_computer_agent.py b/gpt_computer_agent/gpt_computer_agent.py index 76d1a6a..14cb050 100644 --- a/gpt_computer_agent/gpt_computer_agent.py +++ b/gpt_computer_agent/gpt_computer_agent.py @@ -1,10 +1,10 @@ try: - from .agent.chat_history import * - from .agent.assistant import * + from .assistant.chat_history import * + from .assistant.agent import * from .llm import * from .llm_settings import llm_settings - from .agent.agent import * - from .agent.background import * + from .assistant.assistant import * + from .assistant.background import * from .gui.signal import * from .gui.button import * @@ -20,12 +20,12 @@ except ImportError: # This is for running the script directly # in order to test the GUI without rebuilding the package - from agent.chat_history import * - from agent.assistant import * + from assistant.chat_history import * + from assistant.agent import * from llm import * from llm_settings import llm_settings - from agent.agent import * - from agent.background import * + from assistant.assistant import * + from assistant.background import * from utils.db import * from gui.signal import * from gui.button import * @@ -71,7 +71,7 @@ try: import ctypes - myappid = "khulnasoft.gpt_computer_agent.gui.1" + myappid = "onuratakan.gpt_computer_agent.gui.1" ctypes.windll.shell32.SetCurrentProcessExplicitAppUserModelID(myappid) except: pass @@ -295,11 +295,14 @@ def split_with_multiple_delimiters(text, delimiters): def click_sound(): - pygame.mixer.init() + try: + pygame.mixer.init() - retro = pygame.mixer.Sound(click_sound_path) - retro.set_volume(0.1) - retro.play() + retro = pygame.mixer.Sound(click_sound_path) + retro.set_volume(0.1) + retro.play() + except: + pass class Worker(QThread): @@ -338,13 +341,20 @@ def __init__(self, parent=None): super(CustomTextEdit, self).__init__(parent) def keyPressEvent(self, event): - if event.key() == Qt.Key_Return or event.key() == Qt.Key_Enter: + if (event.key() == Qt.Key_Return or event.key() == Qt.Key_Enter) and not (event.modifiers() & Qt.ShiftModifier): global return_key_event return_key_event() super(CustomTextEdit, self).keyPressEvent( event ) # Process other key events normally + def insertFromMimeData(self, source): + newData = QtCore.QMimeData() + for format in source.formats(): + if format == 'text/plain': + newData.setData(format, source.data(format)) + super().insertFromMimeData(newData) + class Worker_2(QThread): text_to_set = pyqtSignal(str) @@ -1005,7 +1015,7 @@ def wake_word_trigger(self): self.wake_word_thread.start() def wake_word(self): - from .agent.process import tts_if_you_can + from .assistant.process import tts_if_you_can while True and is_wake_word_active() and self.wake_word_active: if wake_word(self): @@ -1174,8 +1184,9 @@ def stop_app(): input_box.setFixedHeight(80) - # Set text wrapping. I dont wat to cut the text - input_box.setWordWrapMode(QtGui.QTextOption.NoWrap) + + # If its used for a chat, you can use the following line to disable word wrap + #input_box.setWordWrapMode(QtGui.QTextOption.NoWrap) # Change the font size font = QtGui.QFont() @@ -1192,7 +1203,7 @@ def stop_app(): input_box.setPlaceholderText("Type here \nsand ↵ ") else: input_box.setPlaceholderText( - "Type here \nand ↵ \nor ⌘ + ↵ (+screenshot)" + "Type here \nand ↵ \nor ⌘ + ↵ (+screenshot) \n\nNew line: shift + ↵" ) else: if llm_settings[load_model_settings()]["vision"] is False: @@ -1436,12 +1447,12 @@ def add_screenshot_button(self): self.update() def update_state(self, new_state): - assistant_stopped = False + agent_stopped = False if self.state == "aitalking" and new_state == "idle": - assistant_stopped = True + agent_stopped = True if self.manuel_stop: - assistant_stopped = False + agent_stopped = False self.manuel_stop = False self.state = new_state @@ -1470,7 +1481,7 @@ def update_state(self, new_state): self.pulse_timer = None self.update() # Trigger a repaint - if assistant_stopped: + if agent_stopped: global the_input_box if ( the_input_box.toPlainText().endswith("?") @@ -1596,4 +1607,4 @@ def put_window_to_right_side_of_screen(self): # Add a small offset to the right side x -= 10 - self.move(x, y) + self.move(x, y) \ No newline at end of file diff --git a/gpt_computer_agent/gui/llmsettings.py b/gpt_computer_agent/gui/llmsettings.py index 0bf09d9..d8855b0 100644 --- a/gpt_computer_agent/gui/llmsettings.py +++ b/gpt_computer_agent/gui/llmsettings.py @@ -65,6 +65,30 @@ def save_openai_url_(): openai_url_save_button.clicked.connect(save_openai_url_) settings_dialog.layout().addWidget(openai_url_save_button) + + + + api_version_label = QLabel("API Version") + settings_dialog.layout().addWidget(api_version_label) + api_version_input = QLineEdit() + api_version = load_api_version() + api_version_input.setText(api_version) + settings_dialog.layout().addWidget(api_version_input) + + def save_api_version_(): + api_version = api_version_input.text() + save_api_version(api_version) + the_main_window.update_from_thread("Saved API Version") + the_main_window.input_box.setPlaceholderText("Type here") + settings_dialog.close() + + api_version_save_button = QPushButton("Save URL") + api_version_save_button.clicked.connect(save_api_version_) + settings_dialog.layout().addWidget(api_version_save_button) + + + + groq_api_key_label = QLabel("Groq API Key") settings_dialog.layout().addWidget(groq_api_key_label) groq_api_key_input = QLineEdit() @@ -106,10 +130,21 @@ def google_save_api_key_(api_key): def hide_openai(): api_key_label.hide() api_key_input.hide() + + save_button.hide() + + + def hide_azureai(): + api_key_label.hide() + api_key_input.hide() + save_button.hide() openai_url_label.hide() openai_url_input.hide() - save_button.hide() openai_url_save_button.hide() + api_version_label.hide() + api_version_input.hide() + api_version_save_button.hide() + def hide_groq(): groq_api_key_label.hide() @@ -124,10 +159,18 @@ def hide_google(): def show_openai(): api_key_label.show() api_key_input.show() + save_button.show() + + def show_azureai(): + api_key_label.show() + api_key_input.show() + save_button.show() openai_url_label.show() openai_url_input.show() - save_button.show() openai_url_save_button.show() + api_version_label.show() + api_version_input.show() + api_version_save_button.show() def show_groq(): groq_api_key_label.show() @@ -140,6 +183,7 @@ def show_google(): google_save_button.show() hide_openai() + hide_azureai() hide_groq() hide_google() @@ -156,6 +200,11 @@ def show_google(): if llm_settings[llm_show_name[model_select.currentText()]]["provider"] == "openai": show_openai() + + if llm_settings[llm_show_name[model_select.currentText()]]["provider"] == "azureai": + show_azureai() + + if llm_settings[llm_show_name[model_select.currentText()]]["provider"] == "groq": show_groq() if llm_settings[llm_show_name[model_select.currentText()]]["provider"] == "google": @@ -166,6 +215,7 @@ def show_google(): def on_model_change(): hide_openai() + hide_azureai() hide_groq() hide_google() the_save_string = llm_show_name[model_select.currentText()] @@ -174,11 +224,30 @@ def on_model_change(): if ( llm_settings[llm_show_name[model_select.currentText()]]["provider"] == "openai" + ): + show_openai() + + if ( + llm_settings[llm_show_name[model_select.currentText()]]["provider"] + == "anthropic" + ): + show_openai() + + if ( + llm_settings[llm_show_name[model_select.currentText()]]["provider"] + == "azureai" + ): + show_azureai() + + if ( + llm_settings[llm_show_name[model_select.currentText()]]["provider"] + == "azureopenai" ): show_openai() openai_url_label.show() openai_url_input.show() openai_url_save_button.show() + if llm_settings[llm_show_name[model_select.currentText()]]["vision"]: the_main_window.add_screenshot_button() else: @@ -265,7 +334,7 @@ def on_stt_model_change(): settings_dialog.layout().addWidget(separator) # Add an powered by label - powered_by_label = QLabel("Powered by KhulnaSoft <3") + powered_by_label = QLabel("Powered by Upsonic <3") # Make label bold font = powered_by_label.font() font.setBold(True) @@ -273,4 +342,4 @@ def on_stt_model_change(): settings_dialog.layout().addWidget(powered_by_label) - settings_dialog.exec_() + settings_dialog.exec_() \ No newline at end of file diff --git a/gpt_computer_agent/llm.py b/gpt_computer_agent/llm.py index 5c94dec..f89c99b 100644 --- a/gpt_computer_agent/llm.py +++ b/gpt_computer_agent/llm.py @@ -1,27 +1,18 @@ from openai import OpenAI -from langchain_openai import ChatOpenAI +from langchain_openai import ChatOpenAI, AzureChatOpenAI from langchain_community.chat_models import ChatOllama from langchain_google_genai import ChatGoogleGenerativeAI from langchain_groq import ChatGroq +from langchain_anthropic import ChatAnthropic +from langchain_aws import ChatBedrock + try: - from .utils.db import ( - load_api_key, - load_openai_url, - load_model_settings, - load_groq_api_key, - load_google_api_key, - ) + from .utils.db import * from .custom_callback import customcallback from .llm_settings import llm_settings except ImportError: - from utils.db import ( - load_api_key, - load_openai_url, - load_model_settings, - load_groq_api_key, - load_google_api_key, - ) + from utils.db import * from custom_callback import customcallback from llm_settings import llm_settings @@ -29,12 +20,14 @@ the_callback = customcallback(strip_tokens=False, answer_prefix_tokens=["Answer"]) -def get_model(high_context=False): - the_model = load_model_settings() +def get_model(high_context=False, the_model=None): + the_model = load_model_settings() if not the_model else the_model the_api_key = load_api_key() + the_anthropic_api_key = load_anthropic_api_key() the_groq_api_key = load_groq_api_key() the_google_api_key = load_google_api_key() the_openai_url = load_openai_url() + the_api_version = load_api_version() def open_ai_base(high_context): if the_openai_url == "default": @@ -52,14 +45,35 @@ def open_ai_base(high_context): return { "model": the_model, "api_key": the_api_key, - "max_retries": 15, + "max_retries": 35, "streaming": True, "callbacks": [the_callback], - "base_url": the_openai_url, } args_mapping = { ChatOpenAI: open_ai_base(high_context=high_context), + ChatAnthropic: { + "model": the_model, + "api_key": the_anthropic_api_key, + "max_retries": 35, + "streaming": False, + "callbacks": [the_callback], + }, + ChatBedrock: { + "model_id": the_model, + "aws_access_key_id": load_aws_access_key_id(), + "aws_secret_access_key": load_aws_secret_access_key(), + "region_name":"us-east-1", + "streaming": False, + "callbacks": [the_callback], + }, + AzureChatOpenAI: { + "azure_deployment": the_model.replace("-azureopenai", ""), + "api_version": the_api_version, + "max_retries": 35, + "streaming": True, + "callbacks": [the_callback], + }, ChatOllama: {"model": the_model}, ChatGroq: { "temperature": 0, @@ -78,6 +92,15 @@ def open_ai_base(high_context): the_tuple = None if model_args["provider"] == "openai": the_tuple = (ChatOpenAI, args_mapping[ChatOpenAI]) + elif model_args["provider"] == "anthropic": + the_tuple = (ChatAnthropic, args_mapping[ChatAnthropic]) + elif model_args["provider"] == "aws": + the_tuple = (ChatBedrock, args_mapping[ChatBedrock]) + elif model_args["provider"] == "azureai": + import os + os.environ["AZURE_OPENAI_API_KEY"] = the_api_key + os.environ["AZURE_OPENAI_ENDPOINT"] = the_openai_url + the_tuple = (AzureChatOpenAI, args_mapping[AzureChatOpenAI]) elif model_args["provider"] == "ollama": the_tuple = ( ChatOpenAI, @@ -101,4 +124,4 @@ def open_ai_base(high_context): def get_client(): the_api_key = load_api_key() - return OpenAI(api_key=the_api_key) + return OpenAI(api_key=the_api_key) \ No newline at end of file diff --git a/gpt_computer_agent/llm_settings.py b/gpt_computer_agent/llm_settings.py index 24747d8..9f94d4a 100644 --- a/gpt_computer_agent/llm_settings.py +++ b/gpt_computer_agent/llm_settings.py @@ -1,3 +1,9 @@ +try: + from .utils.db import * + +except ImportError: + from utils.db import * + llm_settings = { "gpt-4o": { "show_name": "gpt-4o (OpenAI)", @@ -6,6 +12,27 @@ "tools": True, "stream": True, }, + "claude-3-5-sonnet-20241022": { + "show_name": "claude-3-5-sonnet-20241022 (Anthropic)", + "vision": True, + "provider": "anthropic", + "tools": True, + "stream": False, + }, + "us.anthropic.claude-3-5-sonnet-20241022-v2:0": { + "show_name": "claude-3-5-sonnet-20241022 (AWS)", + "vision": True, + "provider": "aws", + "tools": True, + "stream": False, + }, + "gpt-4o-azureopenai": { + "show_name": "gpt-4o (AzureAI)", + "vision": True, + "provider": "azureai", + "tools": True, + "stream": True, + }, "gpt-4o-mini": { "show_name": "gpt-4o-mini (OpenAI)", "vision": True, @@ -103,6 +130,9 @@ def get_openai_models(): return [k for k, v in llm_settings.items() if v["provider"] == "openai"] +def get_azureai_models(): + return [k for k, v in llm_settings.items() if v["provider"] == "azureai"] + def get_ollama_models(): return [k for k, v in llm_settings.items() if v["provider"] == "ollama"] @@ -125,43 +155,12 @@ def get_groq_models(): def first_message(): from .character import name, developer, get_website_content + from .cu.computer import width, height, display_num + from .utils.db import load_system_prompt + model = load_model_settings() the_text = f""" -You are {name()} that developed by {developer()}, you are the first live AI assistant in everyone computer that can complete any task by using tools. - -Before any task, write a plan for your tasks and do it step by step. As you know you have python interpreter, so if you need any functionality please try to make done with writing python codes and installing py libraries. - -Don't forget, you are capable to make any task. - -Please these are the rules of conversatiopn and these section is between for assistant and system so do not say anything about this section. - -# Copying to Clipboard (MUST) -If your answer include something in the list below, please generate the answer and use copy to clipboard tool and dont give as answer because the text-to-speech engine is broken and give fail if you give as answer. - -- List of Somethings -- Detailed Explanation of Something -- Link(s) to a Website -- Code Snippet(s) -- Any Code Part -- Any too Long Text - -After copying the thing that requested please say: "I copied to clipboard" and stop. - - -# Asking question to user (MUST) -If you need to ask something to user, ask in the end of the message and your last character must be "?". - -# Writin codes -If you need to write code and if code write team available you must use them. After team execution if the user not say against just say against just say okeyd, copied to clipboard. - -# Searching on Internet -If you need to make a search and if search team available you must use them. - - -Your GitHub Repository: -https://github.com/KhulnaSoft/gpt-computer-agent - - +{load_system_prompt()} """ the_website_content = get_website_content() @@ -173,13 +172,4 @@ def first_message(): """ - return the_text - - -each_message_extension = """ - -# Usings Answer -Please start with in your last responses. DONT FORGET IT AND DONT TALK ABOUT THIS RULE OR REFFERENCE - - -""" + return the_text \ No newline at end of file diff --git a/gpt_computer_agent/mcp/tool.py b/gpt_computer_agent/mcp/tool.py new file mode 100644 index 0000000..e7d0809 --- /dev/null +++ b/gpt_computer_agent/mcp/tool.py @@ -0,0 +1,303 @@ +import asyncio +import pathlib +import time +from typing import List, Any, Dict + +from mcp import ClientSession, StdioServerParameters +from mcp.client.stdio import stdio_client +from langchain_mcp import MCPToolkit +from langchain_core.tools import BaseTool + + + + +from typing import Any, Dict, List +from langchain_core.tools import BaseTool +from pydantic import Field, PrivateAttr + + + + + +class MCPToolWrapper(BaseTool): + """A wrapper for an individual tool managed by the SyncInvocationManager.""" + _manager: Any = PrivateAttr() + _tool: Any = PrivateAttr() + + def __init__(self, tool: BaseTool, manager: "SyncInvocationManager"): + super().__init__(name=tool.name, description=tool.description) + self.name = tool.name + self.description = tool.description + self._manager = manager + self._tool = tool + + def _run(self, **kwargs: Any) -> Any: + """Run the tool synchronously using the SyncInvocationManager.""" + try: + print(f"Running tool: {self.name} with args: {kwargs}") + result = self._manager.invoke_tool_sync(self._tool, kwargs) + if result is None: + print(f"Tool {self.name} returned no result.") + else: + print(f"Tool {self.name} result: {result}") + return result + except Exception as e: + print(f"Error while running tool {self.name}: {e}") + return None + + async def _arun(self, **kwargs: Any) -> Any: + """Asynchronous run (if needed), wraps the synchronous call.""" + return self._run(**kwargs) + + +class MCPToolManager: + """Manages tools provided by the SyncInvocationManager and converts them into LangChain tools.""" + + def __init__(self, manager: "SyncInvocationManager"): + self.manager = manager + self.tools: List[BaseTool] = [] + + def load_tools(self) -> List[BaseTool]: + """Load tools from SyncInvocationManager and wrap them in LangChain-compatible structure.""" + raw_tools = self.manager.get_tools_sync() + + self.tools = [MCPToolWrapper(tool, self.manager) for tool in raw_tools] + return self.tools + + +class SyncInvocationManager: + def __init__(self, command: str, args: list[str], env: dict[str, str] | None = None): + self.loop = asyncio.new_event_loop() + self.server_params = StdioServerParameters( + command=command, + args=args, + env=env, + + ) + self.client_ctx = None + self.client = None + self.session_ctx = None + self.session = None + self.toolkit = None + self._task = None # Add this line + + + async def _start_async(self): + # Manually enter the stdio_client context + self.client_ctx = stdio_client(self.server_params) + self.client = await self.client_ctx.__aenter__() + read, write = self.client + + # Manually enter the ClientSession context + self.session_ctx = ClientSession(read, write) + self.session = await self.session_ctx.__aenter__() + + self.toolkit = MCPToolkit(session=self.session) + await self.toolkit.initialize() + + def get_tools_sync(self) -> List[BaseTool]: + # Now that session is open, just return tools directly + return self.toolkit.get_tools() + + def invoke_tool_sync(self, tool: BaseTool, input_data: Dict[str, Any]) -> Any: + try: + return self.loop.run_until_complete(tool.ainvoke(input_data)) + except Exception as e: + print(f"Error invoking tool {tool.name}: {e}") + return None + + def start(self): + asyncio.set_event_loop(self.loop) + self._task = self.loop.create_task(self._start_async()) + self.loop.run_until_complete(self._task) + + def stop(self): + if self._task and not self._task.done(): + cleanup_task = self.loop.create_task(self._stop_async()) + self.loop.run_until_complete(cleanup_task) + self.loop.close() + + async def _stop_async(self): + # Exit contexts in the same task and loop they were entered + if self.session_ctx: + await self.session_ctx.__aexit__(None, None, None) + if self.client_ctx: + await self.client_ctx.__aexit__(None, None, None) + + + + +def file_system_tool(): + print(""" + +This is file_system_tool + + """) + + + manager = SyncInvocationManager(command="npx", args=["-y", "@modelcontextprotocol/server-filesystem", "/"]) + manager.start() + tool_manager = MCPToolManager(manager) + tools = tool_manager.load_tools() + print(tools) + return tools + + +def memory_tool(): + + print(""" + +This is memory_tool + + """) + + + manager = SyncInvocationManager(command="npx", args=["-y", "@modelcontextprotocol/server-memory"]) + manager.start() + tool_manager = MCPToolManager(manager) + tools = tool_manager.load_tools() + print(tools) + return tools + + +def playwright(): + + print(""" + +This is playwright + + """) + + manager = SyncInvocationManager(command="npx", args=["-y", "@executeautomation/playwright-mcp-server"]) + manager.start() + tool_manager = MCPToolManager(manager) + tools = tool_manager.load_tools() + print(tools) + return tools + + +def youtube_transcript(): + + print(""" + +This is youtube_transcript + + """) + + manager = SyncInvocationManager(command="npx", args=["-y", "@kimtaeyoon83/mcp-server-youtube-transcript"]) + manager.start() + tool_manager = MCPToolManager(manager) + tools = tool_manager.load_tools() + print(tools) + return tools + +def fetch(): + + print(""" + +This is fetch + + """) + + manager = SyncInvocationManager(command="uvx", args=["mcp-server-fetch"]) + manager.start() + tool_manager = MCPToolManager(manager) + tools = tool_manager.load_tools() + print(tools) + return tools + + + + +def websearch(): + + print(""" + +This is websearch + + """) + + + manager = SyncInvocationManager(command="npx", args=["-y", "@mzxrai/mcp-webresearch"]) + manager.start() + tool_manager = MCPToolManager(manager) + tools = tool_manager.load_tools() + print(tools) + return tools + + + +custom_mcp_severs_ = [] +previous_mcp_servers = [] + +loaded_mcp_servers = [] + +def custom_mcp_servers(): + + print("Custom MCP Servers") + global custom_mcp_severs_ + global previous_mcp_servers + global loaded_mcp_servers + if custom_mcp_severs_ == previous_mcp_servers: + print("Returning loaded mcp servers") + return loaded_mcp_servers + + else: + # The custom_mcp_servers_ list is like [{name: "file_system_tool", command:"npx", args:["-y", "@mzxrai/mcp-webresearch"]}, {name: "memory_tool", command:"npx", args:["-y", "@mzxrai/mcp-webresearch"]}] + # We shouldnt load same mcp server twice. For that we need to intersect the custom_mcp_servers_ and previous_mcp_servers + # and load only the difference + # This is to avoid loading the same mcp server twice + + # Get the names of the mcp servers that are already loaded + previous_mcp_server_names = [mcp_server["name"] for mcp_server in loaded_mcp_servers] + # Get the names of the mcp servers that are in the custom_mcp_servers_ list + custom_mcp_server_names = [mcp_server["name"] for mcp_server in custom_mcp_severs_] + # Get the names of the mcp servers that are not loaded + mcp_server_names_to_load = list(set(custom_mcp_server_names) - set(previous_mcp_server_names)) + + # Load the mcp servers that are not loaded + + for mcp_server in custom_mcp_severs_: + if mcp_server["name"] in mcp_server_names_to_load: + manager = SyncInvocationManager(command=mcp_server["command"], args=mcp_server["args"]) + manager.start() + tool_manager = MCPToolManager(manager) + tools = tool_manager.load_tools() + loaded_mcp_servers = loaded_mcp_servers + tools + previous_mcp_servers = custom_mcp_severs_ + print("Returning loaded mcp servers", loaded_mcp_servers) + return loaded_mcp_servers + +def add_custom_mcp_server(name: str, command: str, args: List[str]): + global custom_mcp_severs_ + print("****************\nAdding custom mcp server") + print(name, command, args) + custom_mcp_severs_.append({"name": name, "command": command, "args": args}) + +def remove_custom_mcp_server(name: str): + global custom_mcp_severs_ + custom_mcp_severs_ = [mcp_server for mcp_server in custom_mcp_severs_ if mcp_server["name"] != name] + +def get_custom_mcp_server(name: str): + global custom_mcp_severs_ + for mcp_server in custom_mcp_severs_: + if mcp_server["name"] == name: + return mcp_server + return None + + + + + + + + + + +the_tools_ = None +def mcp_tools(): + global the_tools_ + if the_tools_ is None: + #the_tools_ = file_system_tool() + the_tools_ = [] + return the_tools_ + custom_mcp_servers() \ No newline at end of file diff --git a/gpt_computer_agent/remote.py b/gpt_computer_agent/remote.py index 4deb698..0654db9 100644 --- a/gpt_computer_agent/remote.py +++ b/gpt_computer_agent/remote.py @@ -2,7 +2,7 @@ import requests import time -from upsonic import Tiger + the_upsonic_ = None @@ -12,6 +12,7 @@ def the_upsonic(): global the_upsonic_ if not the_upsonic_: + from upsonic import Tiger the_upsonic_ = Tiger() return the_upsonic_ @@ -22,7 +23,7 @@ def __init__(self, url): self.url = url if self.status != True: - raise Exception("The server is not running") + print("\n\nThe server is not running\n\n") def send_request(self, path, data, files=None, dont_error=False): try: @@ -55,11 +56,21 @@ def input(self, text: str, screen: bool = False, talk: bool = False) -> str: response = self.send_request("/input", data) return response["response"] + def request(self, request: str, response: str, screen: bool = False) -> str: + data = {"request": request, "response": response, "screen": str(screen).lower()} + response = self.send_request("/request", data) + return response["response"] + def just_screenshot(self) -> str: data = {} response = self.send_request("/screenshot", data) return response["response"] + def stop_server(self) -> str: + data = {} + response = self.send_request("/stop_server", data) + return True + def screenshot_to_memory(self) -> str: return self.just_screenshot() @@ -194,13 +205,52 @@ def save_openai_api_key(self, openai_api_key): response = self.send_request("/save_openai_api_key", data) return response["response"] + def save_user_id(self, user_id): + data = {"user_id": user_id} + response = self.send_request("/save_user_id", data) + return response["response"] + + def save_aws_access_key_id(self, aws_access_key_id): + data = {"aws_access_key_id": aws_access_key_id} + response = self.send_request("/save_aws_access_key_id", data) + return response["response"] + def save_aws_secret_access_key(self, aws_secret_access_key): + data = {"aws_secret_access_key": aws_secret_access_key} + response = self.send_request("/save_aws_secret_access_key", data) + return response["response"] + + + def save_system_prompt(self, prompt): + data = {"prompt": prompt} + response = self.send_request("/save_system_prompt", data) + return response["response"] + + def save_anthropic_api_key(self, anthropic_api_key): + data = {"anthropic_api_key": anthropic_api_key} + response = self.send_request("/save_anthropic_api_key", data) + return response["response"] + def save_openai_url(self, openai_url): data = {"openai_url": openai_url} response = self.send_request("/save_openai_url", data) return response["response"] + + + + + def add_mcp_server(self, name, command, args): + data = {"name": name, "command": command, "args": args} + response = self.send_request("/add_mcp", data) + return response["response"] + + + def save_api_version(self, api_version): + data = {"api_version": api_version} + response = self.send_request("/save_api_version", data) + return response["response"] def save_model_settings(self, model_name): - data = {"model_name": model_name} + data = {"model_settings": model_name} response = self.send_request("/save_model_settings", data) return response["response"] @@ -232,6 +282,11 @@ def get_openai_models(self): response = self.send_request("/get_openai_models", data) return response["response"] + def get_azureai_models(self): + data = {} + response = self.send_request("/get_azureai_models", data) + return response["response"] + def get_ollama_models(self): data = {} response = self.send_request("/get_ollama_models", data) @@ -284,8 +339,22 @@ def train(self, url): response = self.send_request("/train", data) return response["response"] + + def mouse_scroll_down(self, amount): + data = {"amount": amount} + response = self.send_request("/mouse_scroll_down", data) + return response["response"] + + + def mouse_scroll_up(self, amount): + data = {"amount": amount} + response = self.send_request("/mouse_scroll_up", data) + return response["response"] + def wait(self, second): time.sleep(second) -remote = Remote_Client("http://localhost:7541") + + +#remote = Remote_Client("http://localhost:7541") \ No newline at end of file diff --git a/gpt_computer_agent/screen/shot.py b/gpt_computer_agent/screen/shot.py index 83cb118..125ef79 100644 --- a/gpt_computer_agent/screen/shot.py +++ b/gpt_computer_agent/screen/shot.py @@ -4,9 +4,11 @@ try: from ..gui.signal import signal_handler from ..utils.db import just_screenshot_path + from ..cu.computer import screenshot_action_ except ImportError: from gui.signal import signal_handler from utils.db import just_screenshot_path + from cu.computer import screenshot_action_ def encode_image(image_path): @@ -36,14 +38,13 @@ def take_screenshot(): This function takes a screenshot of the entire screen using pyautogui, saves it to the specified path, and emits a signal indicating that - the assistant is thinking. + the agent is thinking. Returns: - None """ try: - screenshot = pyautogui.screenshot() - screenshot.save(just_screenshot_path) - signal_handler.assistant_thinking.emit() + screenshot_action_(just_screenshot_path) + signal_handler.agent_thinking.emit() except Exception as e: - print(f"An error occurred while taking the screenshot: {e}") + print(f"An error occurred while taking the screenshot: {e}") \ No newline at end of file diff --git a/gpt_computer_agent/standard_tools.py b/gpt_computer_agent/standard_tools.py index 794a7c0..4895403 100644 --- a/gpt_computer_agent/standard_tools.py +++ b/gpt_computer_agent/standard_tools.py @@ -104,16 +104,7 @@ def duckduckgo(query: str, max_number: int = 20) -> list: return "An exception occurred" -@register_tool -@wrapper -def copy(text: str): - """ - Copy the text to the clipboard. - """ - import pyperclip - pyperclip.copy(text) - pyperclip.copy(text) @register_tool @@ -131,7 +122,7 @@ def open_url(url) -> bool: return True except: return False - return False + @register_tool @@ -145,27 +136,7 @@ def sleep(seconds: int): time.sleep(seconds) -@register_tool -@wrapper -def keyboard_write(text: str): - """ - Write the text using the keyboard. - """ - import pyautogui - - pyautogui.write(text) - - -@register_tool -@wrapper -def keyboard_press(key: str): - """ - Press the key using the keyboard. - """ - import pyautogui - pyautogui.press(key) - pyautogui.press(key) from langchain_experimental.utilities import PythonREPL @@ -182,42 +153,27 @@ def python_repl(code: str) -> str: return the_py_client.run(code) + @register_tool @wrapper -def app_open(app_name: str) -> bool: +def keyboard_write(text: str): """ - Opens the native apps. + Write the text using the keyboard (its use pyautogui). """ - try: - from AppOpener import open - - open(app_name, throw_error=True) - return True - except: - try: - from MacAppOpener import open - - open(app_name) - except: - return False - + import pyautogui + pyautogui.write(text) @register_tool @wrapper -def app_close(app_name: str) -> bool: +def keyboard_press(key: str): """ - Closes the native apps. + Press the key using the keyboard (its use pyautogui). """ - try: - from AppOpener import close + import pyautogui + pyautogui.press(key) + pyautogui.press(key) + - close(app_name, throw_error=True) - return True - except: - try: - close(app_name) - except: - return False @register_tool @@ -278,31 +234,28 @@ def connect_wifi(ssid: str, password: str) -> bool: return False + +import subprocess + + @register_tool @wrapper -def ask_to_user(question: str, wait_for_answer: str = None) -> str: +def run_terminal_command(command:str) -> str: """ - Its ask to the user for your question and return the answer + Executes a terminal command and returns the result. + Args: + command (str): The command to run in the terminal. + Returns: + str: The output of the command. """ try: - try: - from .agent.process import tts_if_you_can - from .audio.record import quick_speech_to_text - except: - from agent.process import tts_if_you_can - from audio.record import quick_speech_to_text - - print("TTS") - tts_if_you_can(question, bypass_other_settings=True, not_threaded=True) - print("TTS END") - - if wait_for_answer: - return quick_speech_to_text(wait_for_answer) - else: - return quick_speech_to_text() - except: - traceback.print_exc() - return False + import shlex + safe_command = shlex.split(command) + result = subprocess.run(safe_command, capture_output=True, text=True) + return result.stdout.strip() + except Exception as e: + return str(e) + def get_standard_tools(): @@ -312,4 +265,5 @@ def get_standard_tools(): if __name__ == "__main__": - print(ask_to_user("What is your age")) +# Deprecated function, removing: +# print(ask_to_user("What is your age")) \ No newline at end of file diff --git a/gpt_computer_agent/start.py b/gpt_computer_agent/start.py index ab30c4d..38d952e 100644 --- a/gpt_computer_agent/start.py +++ b/gpt_computer_agent/start.py @@ -3,18 +3,35 @@ import sys import webbrowser -from PyQt5.QtWidgets import QApplication -from PyQt5.QtGui import * -from PyQt5.QtWidgets import * -from PyQt5.QtCore import Qt -from pynput import keyboard + + + +def start_api(): + try: + from .api import start_api + + start_api(api=True) + except: + raise Exception( + "API could not be started, please install gpt-computer-agent[api]" + ) + + + def start(api=False): + if api: + return start_api() + from PyQt5.QtWidgets import QApplication + from PyQt5.QtGui import QIcon + from PyQt5.QtWidgets import QSystemTrayIcon, QMenu, QAction + from PyQt5.QtCore import Qt + from pynput import keyboard """ - Starts the computer assistant application. + Starts the computer agent application. - This function starts the computer assistant application, which includes parsing command-line arguments + This function starts the computer agent application, which includes parsing command-line arguments to set the profile, initializing the graphical user interface, and starting the application event loop. Command-line Arguments: @@ -201,4 +218,4 @@ def screenshot_and_microphone_connect(): # Add the menu to the tray tray.setContextMenu(menu) - sys.exit(app.exec_()) + sys.exit(app.exec_()) \ No newline at end of file diff --git a/gpt_computer_agent/tracing.py b/gpt_computer_agent/tracing.py new file mode 100644 index 0000000..e69de29 diff --git a/gpt_computer_agent/utils/chat_history.py b/gpt_computer_agent/utils/chat_history.py new file mode 100644 index 0000000..f923287 --- /dev/null +++ b/gpt_computer_agent/utils/chat_history.py @@ -0,0 +1,220 @@ + +from kot import KOT + +try: + from .folder import currently_dir, artifacts_dir, media_dir +except: + from folder import currently_dir, artifacts_dir, media_dir + + + + + + + + + +from langchain_core.messages import HumanMessage, SystemMessage, AIMessage + + +try: +try: + from .db import get_profile, load_system_prompt +except ImportError: + from db import get_profile, load_system_prompt + + +import time + + + +class Human: + def __init__(self, content, the_time, auto_delete:int=None): + + self.that_was_empty = False + + if isinstance(content, dict): + if "text" in content: + if content["text"] == "": + self.that_was_empty = True + content["text"] = "No response" + + if isinstance(content, list): + for i in range(len(content)): + if "text" in content[i]: + if content[i]["text"] == "": + self.that_was_empty = True + content[i]["text"] = "No response" + + + self.content = content + self.timestamp = the_time + self.auto_delete = auto_delete + + def __dict__(self): + current_time = time.time() + + if self.auto_delete is not None: + print(current_time, self.timestamp, self.auto_delete) + if current_time - self.timestamp > self.auto_delete: + return {"type": "human", "content": "This content deleted.", "timestamp": self.timestamp, "auto_delete": self.auto_delete, "that_was_empty": self.that_was_empty} + + return {"type": "human", "content": self.content, "timestamp": self.timestamp, "auto_delete": self.auto_delete, "that_was_empty": self.that_was_empty} + +class Agent: + def __init__(self, content, the_time): + + self.that_was_empty = False + + if isinstance(content, dict): + if "text" in content: + if content["text"] == "": + self.that_was_empty = True + content["text"] = "No response" + + if isinstance(content, list): + for i in range(len(content)): + if "text" in content[i]: + if content[i]["text"] == "": + self.that_was_empty = True + content[i]["text"] = "No response" + self.content = content + self.timestamp = the_time + + def __dict__(self): + return {"type": "agent", "content": self.content, "timestamp": self.timestamp, "that_was_empty": self.that_was_empty} + +class System: + def __init__(self, content, the_time): + + self.that_was_empty = False + + if isinstance(content, dict): + if "text" in content: + if content["text"] == "": + self.that_was_empty = True + content["text"] = "No response" + + if isinstance(content, list): + for i in range(len(content)): + if "text" in content[i]: + if content[i]["text"] == "": + self.that_was_empty = True + content[i]["text"] = "No response" + + + self.content = content + self.timestamp = the_time + + def __dict__(self): + return {"type": "system", "content": self.content, "timestamp": self.timestamp, "that_was_empty": self.that_was_empty} + + +class ChatHistory: + + def __init__(self): + self.chat_id = get_profile() + self.db = KOT(f"chat_history_{self.chat_id}", folder=artifacts_dir, enable_hashing=True) + + if self.db.get("chat") is None: + self.db.set("chat", []) + + if self.get_chat() == []: + print("SETTING CHAT") + self.add_message("system", {"type":"text", "text": load_system_prompt()}) + + + def add_message(self, message_type:str, content, auto_delete:int=None): + + the_time = time.time() + + + if content == []: + content = {"type":"text", "text": "No response"} + + + if message_type == "human": + message = Human(content, the_time, auto_delete) + elif message_type == "agent": + print("AGENT", content) + message = Agent(content, the_time) + elif message_type == "system": + print("SYSTEM", content) + message = System(content, the_time) + else: + raise ValueError("Invalid message type") + + + chat = self.db.get("chat") + chat.append(message.__dict__()) + + + self.db.set("chat", chat) + + def get_chat(self): + chat = self.db.get("chat") + chat = sorted(chat, key=lambda x: x["timestamp"]) + + print("CHAT", chat) + # Transform dict to Message objects + + the_chat = [] + for message in chat: + if message["type"] == "human": + the_chat.append(Human(content=message["content"], the_time=message["timestamp"], auto_delete=message["auto_delete"])) + elif message["type"] == "agent": + the_chat.append(Agent(content=message["content"], the_time=message["timestamp"])) + elif message["type"] == "system": + the_chat.append(System(content=message["content"], the_time=message["timestamp"])) + + + last_chat = [] + for message in the_chat: + if message.that_was_empty: + continue + last_chat.append(message.__dict__()) + + + chat = last_chat + + langchain_messages = [] + + for message in chat: + + if isinstance(message["content"], tuple): + message["content"] = list(message["content"]) + if isinstance(message["content"], dict): + message["content"] = [message["content"]] + + + + + + + + + + if message["type"] == "human": + + langchain_messages.append(HumanMessage(content= + message["content"] + )) + elif message["type"] == "agent": + langchain_messages.append(AIMessage(content= + message["content"] + )) + elif message["type"] == "system": + langchain_messages.append(SystemMessage(content= + message["content"] + )) + + + return langchain_messages + + + def clear_chat(self): + self.db.set("chat", []) + + + + \ No newline at end of file diff --git a/gpt_computer_agent/utils/db.py b/gpt_computer_agent/utils/db.py index 67ccc97..70253dc 100644 --- a/gpt_computer_agent/utils/db.py +++ b/gpt_computer_agent/utils/db.py @@ -2,14 +2,23 @@ import uuid from dotenv import load_dotenv +try: + from .folder import currently_dir, artifacts_dir, media_dir + from .kot_db import kot_db_ + from .user_id import * +except: + from folder import currently_dir, artifacts_dir, media_dir + from kot_db import kot_db_ + from user_id import * + + + + load_dotenv(".env") -currently_dir = os.path.dirname(os.path.abspath(__file__)) -artifacts_dir = os.path.join(currently_dir, "artifacts") -media_dir = os.path.join(currently_dir, "media") -if not os.path.exists(artifacts_dir): - os.makedirs(artifacts_dir) + + mic_record_location = os.path.join(artifacts_dir, "mic_record.wav") system_sound_location = os.path.join(artifacts_dir, "system_sound.wav") @@ -37,43 +46,63 @@ def get_history_db(): return os.path.join(artifacts_dir, f"history_{the_profile}.db") -openaikey = os.path.join(artifacts_dir, "openaikey.db") - +# API KEY SAVING AND LOADING def save_api_key(api_key): - """Save the OpenAI API key to a file.""" - with open(openaikey, "w") as f: - f.write(api_key) + kot_db_.set("openai_api_key", api_key) +def load_api_key(): + if kot_db_.get("openai_api_key"): + return kot_db_.get("openai_api_key") + else: + env_variable = os.getenv("OPENAI_API_KEY") + if env_variable: + save_api_key(env_variable) + return env_variable + return "CHANGE_ME" +def save_anthropic_api_key(api_key): + kot_db_.set("anthropic_api_key", api_key) +def load_anthropic_api_key(): + if kot_db_.get("anthropic_api_key"): + return kot_db_.get("anthropic_api_key") + else: + env_variable = os.getenv("ANTHROPIC_API_KEY") + if env_variable: + save_anthropic_api_key(env_variable) + return env_variable + return "CHANGE_ME" -def load_api_key(): - """Load the OpenAI API key from a file or environment variables.""" - if not os.path.exists(openaikey): - env = os.getenv("OPENAI_API_KEY") - if env: - save_api_key(env) - return env - else: - return "CHANGE_ME" - with open(openaikey, "r") as f: - return f.read() +# OPENAI URL SAVING AND LOADING +def save_openai_url(url): + kot_db_.set("openai_url", url) +def load_openai_url(): + if kot_db_.get("openai_url"): + return kot_db_.get("openai_url") + else: + return "default" -openai_url_db = os.path.join(artifacts_dir, "openai_url.db") -def save_openai_url(url): - """Save the custom OpenAI base URL to a file.""" - with open(openai_url_db, "w") as f: - f.write(url) +def save_system_prompt(prompt): + kot_db_.set("system_prompt", prompt) +def load_system_prompt(): + if kot_db_.get("system_prompt"): + return kot_db_.get("system_prompt") + else: + return "Hi, you are an platform for vertical AI. You need to understant the user aspect and then trying to do these things and give valuation." -def load_openai_url(): - """Load the custom OpenAI base URL from a file.""" - if not os.path.exists(openai_url_db): - return "default" - with open(openai_url_db, "r") as f: - return f.read() + + +# API VERSION SAVING AND LOADING +def save_api_version(url): + kot_db_.set("api_version", url) +def load_api_version(): + if kot_db_.get("api_version"): + return kot_db_.get("api_version") + else: + return "CHANGE_ME" model_settings_db = os.path.join(artifacts_dir, "model_settings.db") @@ -158,23 +187,6 @@ def load_groq_api_key(): return f.read() -user_id_db = os.path.join(artifacts_dir, "user_id.db") - - -def save_user_id(): - """Save a unique user ID to a file.""" - with open(user_id_db, "w") as f: - uuid4 = str(uuid.uuid4()) - f.write(uuid4) - return uuid4 - - -def load_user_id(): - """Load the unique user ID from a file.""" - if not os.path.exists(user_id_db): - return save_user_id() - with open(user_id_db, "r") as f: - return f.read() collapse_setting = os.path.join(artifacts_dir, "collapse_setting.db") @@ -525,3 +537,24 @@ def load_location_setting(): return "right" with open(location_setting, "r") as f: return f.read() + + + +# OPENAI URL SAVING AND LOADING +def save_aws_access_key_id(id): + kot_db_.set("aws_access_key_id", id) +def load_aws_access_key_id(): + if kot_db_.get("aws_access_key_id"): + return kot_db_.get("aws_access_key_id") + else: + return "default" + + +# OPENAI URL SAVING AND LOADING +def save_aws_secret_access_key(key): + kot_db_.set("aws_secret_access_key", key) +def load_aws_secret_access_key(): + if kot_db_.get("aws_secret_access_key"): + return kot_db_.get("aws_secret_access_key") + else: + return "default" \ No newline at end of file diff --git a/gpt_computer_agent/utils/folder.py b/gpt_computer_agent/utils/folder.py new file mode 100644 index 0000000..06495f7 --- /dev/null +++ b/gpt_computer_agent/utils/folder.py @@ -0,0 +1,8 @@ +import os + +currently_dir = os.path.dirname(os.path.abspath(__file__)) +artifacts_dir = os.path.join(currently_dir, "artifacts") +media_dir = os.path.join(currently_dir, "media") + +if not os.path.exists(artifacts_dir): + os.makedirs(artifacts_dir) \ No newline at end of file diff --git a/gpt_computer_agent/utils/kot_db.py b/gpt_computer_agent/utils/kot_db.py new file mode 100644 index 0000000..8544ffa --- /dev/null +++ b/gpt_computer_agent/utils/kot_db.py @@ -0,0 +1,6 @@ +try: + from .folder import artifacts_dir # import only what's needed +except ImportError: + from folder import artifacts_dir +from kot import KOT +kot_db_ = KOT("gca", folder=artifacts_dir, enable_hashing=True) \ No newline at end of file diff --git a/gpt_computer_agent/utils/user_id.py b/gpt_computer_agent/utils/user_id.py new file mode 100644 index 0000000..29c9af2 --- /dev/null +++ b/gpt_computer_agent/utils/user_id.py @@ -0,0 +1,35 @@ +import uuid +import os + + +try: + from .folder import currently_dir, artifacts_dir, media_dir +except ImportError: + from folder import currently_dir, artifacts_dir, media_dir + + +user_id_db = os.path.join(artifacts_dir, "user_id.db") + + +def save_user_id(): + """Save a unique user ID to a file.""" + with open(user_id_db, "w") as f: + uuid4 = str(uuid.uuid4()) + f.write(uuid4) + return uuid4 + + +def load_user_id(): + """Load the unique user ID from a file.""" + if not os.path.exists(user_id_db): + return save_user_id() + with open(user_id_db, "r") as f: + return f.read() + + +def change_user_id(user_id): + """Change the unique user ID.""" + with open(user_id_db, "w") as f: + f.write(user_id) + return user_id + \ No newline at end of file diff --git a/gpt_computer_agent/version.py b/gpt_computer_agent/version.py new file mode 100644 index 0000000..6aa6052 --- /dev/null +++ b/gpt_computer_agent/version.py @@ -0,0 +1,17 @@ +# Get the current dir then open __init__.py then extract the version from it + +# __version__ = '0.27.5' + +import os + + +current_dir = os.path.dirname(os.path.abspath(__file__)) + +def get_version(): + with open(os.path.join(current_dir, "__init__.py")) as f: + for line in f: + if "__version__" in line: + the_str = line.split("=")[1].strip().strip("'") + the_str = the_str.replace("' # fmt: skip", "") + return the_str + return None \ No newline at end of file diff --git a/refactor.py b/refactor.py index 5c8d2b7..08a76a9 100644 --- a/refactor.py +++ b/refactor.py @@ -1,26 +1,27 @@ import os +import subprocess def install_refactor_tool(): - os.system("pip install ruff==0.6.0") + subprocess.check_call([sys.executable, '-m', 'pip', 'install', 'ruff==0.6.0']) def refactor(): - os.system("ruff check --fix") - os.system("ruff format") + subprocess.check_call(['ruff', 'check', '--fix']) + subprocess.check_call(['ruff', 'format']) -def create_commit(): - os.system("git add .") - os.system("git commit -m 'refactor: Scheduled refactoring'") +def create_commit(version): + subprocess.check_call(['git', 'add', '.']) + subprocess.check_call(['git', 'commit', '-m', f'refactor: Scheduled refactoring {version}']) def push(): - os.system("git push") + subprocess.check_call(['git', 'push']) if __name__ == "__main__": install_refactor_tool() refactor() - create_commit() + create_commit('0.28.3') push() diff --git a/requirements.in b/requirements.in index f20461f..0b09397 100644 --- a/requirements.in +++ b/requirements.in @@ -1,10 +1,11 @@ -langgraph==0.0.51 -pyqt5==5.15.10 +langgraph==0.2.56 +#pyqt5==5.15.10 +pyqt5==5.15.* scipy==1.13.1 pygame==2.5.2 soundcard==0.4.3 -openai==1.30.3 -langchain-google-genai==1.0.4 +openai==1.57.0 +langchain-google-genai==2.0.6 python-dotenv==1.0.0 upsonic==0.28.4 pyautogui==0.9.54 @@ -13,18 +14,17 @@ soundfile==0.12.1 pydub==0.25.1 pyscreeze==0.1.30 pyperclip==1.8.2 -pydantic==2.7.2 +pydantic==2.10.3 pillow==10.3.0 langchainhub==0.1.18 -langchain-experimental==0.0.58 +langchain-experimental==0.3.3 opentelemetry-sdk==1.24.0 opentelemetry-exporter-otlp==1.24.0 -langchain-groq==0.1.5 -langchain-openai==0.1.6 -open-interpreter==0.2.6 -langchain==0.1.20 -langchain-community==0.0.38 -langchain-core==0.1.52 +langchain-groq==0.2.1 +langchain-openai==0.2.11 +langchain==0.3.10 +langchain-community==0.3.10 +langchain-core==0.3.22 # custom tools pyperclip==1.8.2 @@ -35,4 +35,29 @@ beautifulsoup4==4.12.3 pytesseract==0.3.10 pywifi-controls==0.7 -pynput==1.7.7 \ No newline at end of file +pynput==1.7.7 + +kot==0.1.2 + + +screeninfo==0.8.1 + +anthropic==0.40.0 +langchain-anthropic==0.3.0 + + + +StrEnum==0.4.15 + + +langchain-mcp==0.1.0a1 + + +waitress==3.0.2 + +langchain-aws==0.2.9 + +rich + +sentry-sdk==2.19.2 +sentry-sdk[opentelemetry] \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 79ab216..28e49a7 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,598 +1,58 @@ -# -# This file is autogenerated by pip-compile with Python 3.10 -# by the following command: -# -# pip-compile requirements.in -# -aiohttp==3.9.5 - # via - # langchain - # langchain-community - # litellm -langchain-google-genai==1.0.4 -aiosignal==1.3.1 - # via aiohttp -annotated-types==0.7.0 - # via pydantic -ansicon==1.89.0 - # via jinxed -anyio==4.4.0 - # via - # groq - # httpx - # openai -astor==0.8.1 - # via open-interpreter -asttokens==2.4.1 - # via stack-data -async-timeout==4.0.3 - # via - # aiohttp - # langchain -attrs==23.2.0 - # via aiohttp -blessed==1.20.0 - # via inquirer -cachetools==5.3.3 - # via google-auth -certifi==2024.6.2 - # via - # httpcore - # httpx - # requests -cffi==1.16.0 - # via - # cryptography - # soundcard - # sounddevice - # soundfile -charset-normalizer==3.3.2 - # via requests -click==8.1.7 - # via - # litellm - # nltk -cloudpickle==3.0.0 - # via upsonic -colorama==0.4.6 - # via - # click - # ipython - # tqdm -comm==0.2.2 - # via ipykernel -contourpy==1.2.1 - # via matplotlib -cryptography==41.0.3 - # via upsonic -cycler==0.12.1 - # via matplotlib -dataclasses-json==0.6.7 - # via - # langchain - # langchain-community -debugpy==1.8.1 - # via ipykernel -decorator==5.1.1 - # via ipython -deprecated==1.2.14 - # via - # opentelemetry-api - # opentelemetry-exporter-otlp-proto-grpc - # opentelemetry-exporter-otlp-proto-http -dill==0.3.8 - # via upsonic -distro==1.9.0 - # via - # groq - # openai -editor==1.6.6 - # via inquirer -exceptiongroup==1.2.1 - # via - # anyio - # ipython -executing==2.0.1 - # via stack-data -filelock==3.14.0 - # via huggingface-hub -fire==0.5.0 - # via upsonic -fonttools==4.53.0 - # via matplotlib -frozenlist==1.4.1 - # via - # aiohttp - # aiosignal -fsspec==2024.6.0 - # via huggingface-hub -git-python==1.0.3 - # via open-interpreter -gitdb==4.0.11 - # via gitpython -gitpython==3.1.43 - # via git-python -google-ai-generativelanguage==0.6.4 - # via google-generativeai -google-api-core[grpc]==2.19.0 - # via - # google-ai-generativelanguage - # google-api-python-client - # google-generativeai -google-api-python-client==2.133.0 - # via google-generativeai -google-auth==2.30.0 - # via - # google-ai-generativelanguage - # google-api-core - # google-api-python-client - # google-auth-httplib2 - # google-generativeai -google-auth-httplib2==0.2.0 - # via google-api-python-client -google-generativeai==0.5.4 - # via langchain-google-genai -googleapis-common-protos==1.63.1 - # via - # google-api-core - # grpcio-status - # opentelemetry-exporter-otlp-proto-grpc - # opentelemetry-exporter-otlp-proto-http -greenlet==3.0.3 - # via sqlalchemy -groq==0.8.0 - # via langchain-groq -grpcio==1.64.1 - # via - # google-api-core - # grpcio-status - # opentelemetry-exporter-otlp-proto-grpc -grpcio-status==1.62.2 - # via google-api-core -h11==0.14.0 - # via httpcore -html2image==2.0.4.3 - # via open-interpreter -httpcore==1.0.5 - # via httpx -httplib2==0.22.0 - # via - # google-api-python-client - # google-auth-httplib2 -httpx==0.27.0 - # via - # groq - # openai -huggingface-hub==0.23.3 - # via tokenizers -idna==3.7 - # via - # anyio - # httpx - # requests - # yarl -importlib-metadata==7.0.0 - # via - # litellm - # opentelemetry-api -inquirer==3.2.4 - # via open-interpreter -ipykernel==6.29.4 - # via open-interpreter -ipython==8.25.0 - # via ipykernel -jedi==0.19.1 - # via ipython -jinja2==3.1.4 - # via litellm -jinxed==1.2.1 - # via blessed -joblib==1.4.2 - # via nltk -jsonpatch==1.33 - # via langchain-core -jsonpointer==3.0.0 - # via jsonpatch -jupyter-client==8.6.2 - # via - # ipykernel - # open-interpreter -jupyter-core==5.7.2 - # via - # ipykernel - # jupyter-client -kiwisolver==1.4.5 - # via matplotlib -langchain==0.1.20 - # via - # -r requirements.in - # langchain-experimental -langchain-community==0.0.38 - # via - # -r requirements.in - # langchain -langchain-core==0.1.52 - # via - # -r requirements.in - # langchain - # langchain-community - # langchain-experimental - # langchain-google-genai - # langchain-groq - # langchain-openai - # langchain-text-splitters - # langgraph -langchain-experimental==0.0.58 - # via -r requirements.in -langchain-google-genai==1.0.4 - # via -r requirements.in -langchain-groq==0.1.5 - # via -r requirements.in -langchain-openai==0.1.6 - # via - # -r requirements.in - # upsonic -langchain-text-splitters==0.0.2 - # via langchain -langchainhub==0.1.18 - # via -r requirements.in -langgraph==0.0.51 - # via -r requirements.in -langsmith==0.1.76 - # via - # langchain - # langchain-community - # langchain-core -litellm==1.40.8 - # via open-interpreter -markdown-it-py==3.0.0 - # via rich -markupsafe==2.1.5 - # via jinja2 -marshmallow==3.21.3 - # via dataclasses-json -matplotlib==3.9.0 - # via open-interpreter -matplotlib-inline==0.1.7 - # via - # ipykernel - # ipython -mdurl==0.1.2 - # via markdown-it-py -memory-profiler==0.61.0 - # via upsonic -mgzip==0.2.1 - # via upsonic -mouseinfo==0.1.3 - # via pyautogui -multidict==6.0.5 - # via - # aiohttp - # yarl -mypy-extensions==1.0.0 - # via typing-inspect -nest-asyncio==1.6.0 - # via ipykernel -nltk==3.8.1 - # via open-interpreter -numpy==1.26.4 - # via - # contourpy - # langchain - # langchain-community - # matplotlib - # scipy - # soundcard -open-interpreter==0.2.6 - # via -r requirements.in -openai==1.30.3 - # via - # -r requirements.in - # langchain-openai - # litellm -opentelemetry-api==1.24.0 - # via - # opentelemetry-exporter-otlp-proto-grpc - # opentelemetry-exporter-otlp-proto-http - # opentelemetry-sdk -opentelemetry-exporter-otlp==1.24.0 - # via -r requirements.in -opentelemetry-exporter-otlp-proto-common==1.24.0 - # via - # opentelemetry-exporter-otlp-proto-grpc - # opentelemetry-exporter-otlp-proto-http -opentelemetry-exporter-otlp-proto-grpc==1.24.0 - # via opentelemetry-exporter-otlp -opentelemetry-exporter-otlp-proto-http==1.24.0 - # via opentelemetry-exporter-otlp -opentelemetry-proto==1.24.0 - # via - # opentelemetry-exporter-otlp-proto-common - # opentelemetry-exporter-otlp-proto-grpc - # opentelemetry-exporter-otlp-proto-http -opentelemetry-sdk==1.24.0 - # via - # -r requirements.in - # opentelemetry-exporter-otlp-proto-grpc - # opentelemetry-exporter-otlp-proto-http -opentelemetry-semantic-conventions==0.45b0 - # via opentelemetry-sdk - -packaging==23.2 - # via - # huggingface-hub - # ipykernel - # langchain-core - # marshmallow - # matplotlib -parso==0.8.4 - # via jedi -pillow==10.3.0 - # via - # -r requirements.in - # matplotlib - # pyscreeze -platformdirs==4.2.2 - # via - # jupyter-core - # open-interpreter -prompt-toolkit==3.0.47 - # via ipython -proto-plus==1.23.0 - # via - # google-ai-generativelanguage - # google-api-core -protobuf==4.25.3 - # via - # google-ai-generativelanguage - # google-api-core - # google-generativeai - # googleapis-common-protos - # grpcio-status - # opentelemetry-proto - # proto-plus -psutil==5.9.8 - # via - # ipykernel - # memory-profiler - # open-interpreter -pure-eval==0.2.2 - # via stack-data -pyasn1==0.6.0 - # via - # pyasn1-modules - # rsa -pyasn1-modules==0.4.0 - # via google-auth -pyautogui==0.9.54 - # via -r requirements.in -pycparser==2.22 - # via cffi -pydantic==2.7.2 - # via - # -r requirements.in - # google-generativeai - # groq - # langchain - # langchain-core - # langsmith - # open-interpreter - # openai -pydantic-core==2.18.3 - # via pydantic -pydub==0.25.1 - # via -r requirements.in -pygame==2.5.2 - # via -r requirements.in -pygetwindow==0.0.9 - # via pyautogui -pygments==2.18.0 - # via - # ipython - # rich -pymsgbox==1.0.9 - # via pyautogui -pyparsing==3.1.2 - # via - # httplib2 - # matplotlib -pyperclip==1.8.2 - # via - # -r requirements.in - # mouseinfo -pyqt5==5.15.10 - # via -r requirements.in -#pyqt5-qt5==5.15.2 any compatible version -pyqt5-qt5~=5.15.2 - # via pyqt5 -pyqt5-sip==12.13.0 - # via pyqt5 -pyreadline3==3.4.1 - # via open-interpreter -pyrect==0.2.0 - # via pygetwindow -pyscreeze==0.1.30 - # via - # -r requirements.in - # pyautogui -python-dateutil==2.9.0.post0 - # via - # jupyter-client - # matplotlib -python-dotenv==1.0.0 - # via - # -r requirements.in - # litellm - # upsonic -pytweening==1.2.0 - # via pyautogui -#pywin32==306 - # via jupyter-core -pyyaml==6.0.1 - # via - # huggingface-hub - # langchain - # langchain-community - # langchain-core - # open-interpreter -pyzmq==26.0.3 - # via - # ipykernel - # jupyter-client -readchar==4.1.0 - # via inquirer -regex==2024.5.15 - # via - # nltk - # tiktoken -requests==2.32.3 - # via - # google-api-core - # html2image - # huggingface-hub - # langchain - # langchain-community - # langchainhub - # langsmith - # litellm - # opentelemetry-exporter-otlp-proto-http - # tiktoken - # upsonic -rich==13.7.1 - # via - # open-interpreter - # upsonic -rsa==4.9 - # via google-auth -runs==1.2.2 - # via editor +langgraph==0.2.56 +#pyqt5==5.15.10 +pyqt5==5.15.* scipy==1.13.1 - # via -r requirements.in -send2trash==1.8.3 - # via open-interpreter -six==1.16.0 - # via - # asttokens - # blessed - # fire - # open-interpreter - # python-dateutil -smmap==5.0.1 - # via gitdb -sniffio==1.3.1 - # via - # anyio - # groq - # httpx - # openai +pygame==2.5.2 soundcard==0.4.3 - # via -r requirements.in +openai==1.57.0 +langchain-google-genai==2.0.6 +python-dotenv==1.0.0 +upsonic==0.28.4 +pyautogui==0.9.54 sounddevice==0.4.6 - # via -r requirements.in soundfile==0.12.1 - # via -r requirements.in -sqlalchemy==2.0.30 - # via - # langchain - # langchain-community -stack-data==0.6.3 - # via ipython -tenacity==8.3.0 - # via - # langchain - # langchain-community - # langchain-core -termcolor==2.4.0 - # via fire -tiktoken==0.6.0 - # via - # langchain-openai - # litellm - # open-interpreter - # tokentrim -tokenizers==0.19.1 - # via litellm -tokentrim==0.1.13 - # via open-interpreter -toml==0.10.2 - # via open-interpreter -tornado==6.4.1 - # via - # ipykernel - # jupyter-client -tqdm==4.66.4 - # via - # google-generativeai - # huggingface-hub - # nltk - # openai -traitlets==5.14.3 - # via - # comm - # ipykernel - # ipython - # jupyter-client - # jupyter-core - # matplotlib-inline -types-requests==2.32.0.20240602 - # via langchainhub -typing-extensions==4.12.2 - # via - # anyio - # google-generativeai - # groq - # huggingface-hub - # ipython - # openai - # opentelemetry-sdk - # pydantic - # pydantic-core - # sqlalchemy - # typing-inspect -typing-inspect==0.9.0 - # via dataclasses-json -upsonic==0.28.4 - # via -r requirements.in -uritemplate==4.1.1 - # via google-api-python-client -urllib3==2.2.1 - # via - # requests - # types-requests -uuid6==2024.1.12 - # via langgraph -wcwidth==0.2.13 - # via - # blessed - # prompt-toolkit -websocket-client==1.8.0 - # via html2image -wget==3.2 - # via open-interpreter -wrapt==1.16.0 - # via deprecated -xmod==1.8.1 - # via - # editor - # runs -yarl==1.9.4 - # via aiohttp -zipp==3.19.2 - # via importlib-metadata - -# The following packages are considered to be unsafe in a requirements file: -# setuptools - - +pydub==0.25.1 +pyscreeze==0.1.30 +pyperclip==1.8.2 +pydantic==2.10.3 +pillow==10.3.0 +langchainhub==0.1.18 +langchain-experimental==0.3.3 +opentelemetry-sdk==1.24.0 +opentelemetry-exporter-otlp==1.24.0 +langchain-groq==0.2.1 +langchain-openai==0.2.11 +langchain==0.3.10 +langchain-community==0.3.10 +langchain-core==0.3.22 -# default tools +# custom tools pyperclip==1.8.2 google==3.0.0 duckduckgo-search==5.3.0 beautifulsoup4==4.12.3 pytesseract==0.3.10 - pywifi-controls==0.7 -pynput==1.7.7 \ No newline at end of file +pynput==1.7.7 + +kot==0.1.2 + + +screeninfo==0.8.1 + +anthropic==0.40.0 +langchain-anthropic==0.3.0 + +StrEnum==0.4.15 + +langchain-mcp==0.1.0a1 + +waitress==3.0.2 + +langchain-aws==0.2.9 +rich + +sentry-sdk==2.19.2 +sentry-sdk[opentelemetry] \ No newline at end of file diff --git a/setup.py b/setup.py index 06572f5..0cfdcfc 100644 --- a/setup.py +++ b/setup.py @@ -1,60 +1,62 @@ #!/usr/bin/python3 # -*- coding: utf-8 -*- -from setuptools import setup, find_packages -import platform +from setuptools import setup # Read the requirements from the requirements.txt file with open("requirements.txt") as fp: - base_requirements = fp.read().splitlines() + install_requires = fp.read().splitlines() # Platform-specific dependencies if platform.system() == "Windows": - base_requirements.append("AppOpener==1.7") - + install_requires.append("AppOpener==1.7") elif platform.system() == "Darwin": # Darwin is the system name for macOS - base_requirements.append("MacAppOpener==0.0.5") - -# Optional dependencies -extras_require = { - "agentic": ["crewai==0.86.0"], - "wakeword": ["pvporcupine", "pyaudio"], - "api": ["flask==3.0.3"], - "local_tts": [ - "tensorflow==2.18.0", - "datasets[audio]==2.20.0", - "sentencepiece==0.2.0", - "torch==2.4.0", - "transformers==4.43.3", - ], - "local_stt": ["openai-whisper==20231117"], -} + install_requires.append("MacAppOpener==0.0.5") # Replace with actual macOS specific package -# Merge base requirements with extras -extras_require["default"] = base_requirements -extras_require["base"] = base_requirements setup( name="gpt_computer_agent", - version="0.23.0", - description="GPT Computer Agent - A multi-purpose agent system", + version="0.28.3", + description="GPT Computer Agent", long_description=open("README.md", encoding="utf-8").read(), long_description_content_type="text/markdown", url="https://github.com/khulnasoft/gpt-computer-agent", author="KhulnaSoft DevOps", author_email="info@khulnasoft.com", license="MIT", - packages=find_packages( - include=["client", "client.*", "server", "server.*", "shared", "shared.*"] - ), + packages=[ + "gpt_computer_agent", + "gpt_computer_agent.agent", + "gpt_computer_agent.cu", + "gpt_computer_agent.mcp", + "gpt_computer_agent.gui", + "gpt_computer_agent.screen", + "gpt_computer_agent.utils", + "gpt_computer_agent.audio", + "gpt_computer_agent.audio.tts_providers", + "gpt_computer_agent.audio.stt_providers", + ], include_package_data=True, - install_requires=base_requirements, - extras_require=extras_require, + install_requires=install_requires, entry_points={ - "console_scripts": [ - "computeragent=client.agent:start" - ], # Adjusted for new structure + "console_scripts": ["computeragent=gpt_computer_agent.start:start"], }, python_requires=">=3.9", zip_safe=False, -) + extras_require={ + "base": install_requires, + "default": install_requires, + "agentic": ["crewai==0.30.11"], + "wakeword": ["pvporcupine", "pyaudio"], + "api": ["flask==3.0.3"], + "display": ["opencv-python"], + "local_tts": [ + "tensorflow==2.17.0", + "datasets[audio]==2.20.0", + "sentencepiece==0.2.0", + "torch==2.4.0", + "transformers==4.43.3", + ], + "local_stt": ["openai-whisper==20231117"], + }, +) \ No newline at end of file