diff --git a/runpod/__init__.py b/runpod/__init__.py index c084a037..fbf30e1b 100644 --- a/runpod/__init__.py +++ b/runpod/__init__.py @@ -13,7 +13,7 @@ get_gpu, get_gpus, get_pod, get_pods, create_pod, stop_pod, resume_pod, terminate_pod, create_template, - create_endpoint + get_endpoints, create_endpoint, update_endpoint_template ) from .cli.groups.config.functions import set_credentials, check_credentials, get_credentials diff --git a/runpod/api/ctl_commands.py b/runpod/api/ctl_commands.py index 8f681b44..3d2e2803 100644 --- a/runpod/api/ctl_commands.py +++ b/runpod/api/ctl_commands.py @@ -9,6 +9,7 @@ from .mutations import user as user_mutations from .queries import gpus from .queries import pods as pod_queries +from .queries import endpoints as endpoint_queries from .graphql import run_graphql_query from .mutations import pods as pod_mutations from .mutations import endpoints as endpoint_mutations @@ -228,6 +229,14 @@ def create_template( return raw_response["data"]["saveTemplate"] +def get_endpoints() -> dict: + ''' + Get all endpoints + ''' + raw_return = run_graphql_query(endpoint_queries.QUERY_ENDPOINT) + cleaned_return = raw_return["data"]["myself"]["endpoints"] + return cleaned_return + def create_endpoint( name:str, template_id:str, gpu_ids:str="AMPERE_16", network_volume_id:str=None, locations:str=None, @@ -262,3 +271,25 @@ def create_endpoint( ) return raw_response["data"]["saveEndpoint"] + + +def update_endpoint_template( + endpoint_id:str, template_id:str +): + ''' + Update an endpoint template + + :param endpoint_id: the id of the endpoint + :param template_id: the id of the template to use for the endpoint + + :example: + + >>> endpoint_id = runpod.update_endpoint_template("test", "template_id") + ''' + raw_response = run_graphql_query( + endpoint_mutations.update_endpoint_template_mutation( + endpoint_id, template_id + ) + ) + + return raw_response["data"]["updateEndpointTemplate"] diff --git a/runpod/api/mutations/endpoints.py b/runpod/api/mutations/endpoints.py index 503e94dc..85603340 100644 --- a/runpod/api/mutations/endpoints.py +++ b/runpod/api/mutations/endpoints.py @@ -57,3 +57,25 @@ def generate_endpoint_mutation( }} }} """ + +def update_endpoint_template_mutation( + endpoint_id: str, template_id: str +): + """ Generate a string for a GraphQL mutation to update an existing endpoint's template. """ + input_fields = [] + + # ------------------------------ Required Fields ----------------------------- # + input_fields.append(f'templateId: "{template_id}"') + input_fields.append(f'endpointId: "{endpoint_id}"') + + # Format the input fields into a string + input_fields_string = ", ".join(input_fields) + result = f""" + mutation {{ + updateEndpointTemplate(input: {{{input_fields_string}}}) {{ + id + templateId + }} + }} + """ + return result \ No newline at end of file diff --git a/runpod/api/queries/endpoints.py b/runpod/api/queries/endpoints.py new file mode 100644 index 00000000..70c1d9b1 --- /dev/null +++ b/runpod/api/queries/endpoints.py @@ -0,0 +1,34 @@ +QUERY_ENDPOINT=""" +query Query { + myself { + endpoints { + aiKey + gpuIds + id + idleTimeout + name + networkVolumeId + locations + scalerType + scalerValue + templateId + type + userId + version + workersMax + workersMin + workersStandby + gpuCount + env { + key + value + } + createdAt + networkVolume { + id + dataCenterId + } + } + } +} +""" \ No newline at end of file diff --git a/runpod/cli/groups/project/functions.py b/runpod/cli/groups/project/functions.py index fbc30ff6..ef42ebf5 100644 --- a/runpod/cli/groups/project/functions.py +++ b/runpod/cli/groups/project/functions.py @@ -9,10 +9,10 @@ import tomlkit from tomlkit import document, comment, table, nl -from runpod import __version__, get_pod, create_template, create_endpoint +from runpod import __version__, get_pod, create_template, create_endpoint, update_endpoint_template, get_endpoints from runpod.cli import BASE_DOCKER_IMAGE, GPU_TYPES, ENV_VARS from runpod.cli.utils.ssh_cmd import SSHConnection -from .helpers import get_project_pod, copy_template_files, attempt_pod_launch, load_project_config +from .helpers import get_project_pod, copy_template_files, attempt_pod_launch, load_project_config, get_project_endpoint from ...utils.rp_sync import sync_directory STARTER_TEMPLATES = os.path.join(os.path.dirname(__file__), 'starter_templates') @@ -96,6 +96,38 @@ def create_new_project(project_name, runpod_volume_id, cuda_version, python_vers with open(os.path.join(project_folder, "runpod.toml"), 'w', encoding="UTF-8") as config_file: tomlkit.dump(toml_config, config_file) +def launch_pod(): + config = load_project_config() # Load runpod.toml + + print("Deploying development pod on RunPod...") + + # Prepare the environment variables + environment_variables = {"RUNPOD_PROJECT_ID": config["project"]["uuid"]} + for variable in config['project'].get('env_vars', {}): + environment_variables[variable] = config['project']['env_vars'][variable] + + # Prepare the GPU types + selected_gpu_types = config['project'].get('gpu_types', []) + if config['project'].get('gpu', None): + selected_gpu_types.append(config['project']['gpu']) + + # Attempt to launch a pod with the given configuration + new_pod = attempt_pod_launch(config, environment_variables) + if new_pod is None: + print("Selected GPU types unavailable, try again later or use a different type.") + return + + print("Waiting for pod to come online... ", end="") + sys.stdout.flush() + + # Wait for the pod to come online + while new_pod.get('desiredStatus', None) != 'RUNNING' or new_pod.get('runtime') is None: + new_pod = get_pod(new_pod['id']) + + project_pod_id = new_pod['id'] + + print(f"Project {config['project']['name']} pod ({project_pod_id}) created.", end="\n\n") + return project_pod_id # ------------------------------- Start Project ------------------------------ # def start_project(): # pylint: disable=too-many-locals, too-many-branches @@ -124,63 +156,37 @@ def start_project(): # pylint: disable=too-many-locals, too-many-branches # Check if the project pod already exists, if not create it. if not project_pod_id: - - print("Deploying development pod on RunPod...") - - # Prepare the environment variables - environment_variables = {"RUNPOD_PROJECT_ID": config["project"]["uuid"]} - for variable in config['project'].get('env_vars', {}): - environment_variables[variable] = config['project']['env_vars'][variable] - - # Prepare the GPU types - selected_gpu_types = config['project'].get('gpu_types', []) - if config['project'].get('gpu', None): - selected_gpu_types.append(config['project']['gpu']) - - # Attempt to launch a pod with the given configuration - new_pod = attempt_pod_launch(config, environment_variables) - if new_pod is None: - print("Selected GPU types unavailable, try again later or use a different type.") - return - - print("Waiting for pod to come online... ", end="") - sys.stdout.flush() - - # Wait for the pod to come online - while new_pod.get('desiredStatus', None) != 'RUNNING' or new_pod.get('runtime') is None: - new_pod = get_pod(new_pod['id']) - - project_pod_id = new_pod['id'] - - print(f"Project {config['project']['name']} pod ({project_pod_id}) created.", end="\n\n") + project_pod_id = launch_pod() with SSHConnection(project_pod_id) as ssh_conn: project_path_uuid = f'{config["project"]["volume_mount_path"]}/{config["project"]["uuid"]}' - remote_project_path = os.path.join(project_path_uuid, config["project"]["name"]) + project_path_uuid_dev = os.path.join(project_path_uuid, 'dev') + project_path_uuid_prod = os.path.join(project_path_uuid, 'prod') + remote_project_path = os.path.join(project_path_uuid_dev, config["project"]["name"]) # Create the project folder on the pod print(f'Checking pod project folder: {remote_project_path} on pod {project_pod_id}') - ssh_conn.run_commands([f'mkdir -p {remote_project_path}']) + ssh_conn.run_commands([f'mkdir -p {remote_project_path} {project_path_uuid_prod}']) # Copy local files to the pod project folder print(f'Syncing files to pod {project_pod_id}') - ssh_conn.rsync(os.getcwd(), project_path_uuid) + ssh_conn.rsync(os.getcwd(), project_path_uuid_dev) # Create the virtual environment - venv_path = os.path.join(project_path_uuid, "venv") + venv_path = os.path.join(project_path_uuid_dev, "venv") print(f'Activating Python virtual environment: {venv_path} on pod {project_pod_id}') commands = [ f'python{config["runtime"]["python_version"]} -m venv {venv_path}', f'source {venv_path}/bin/activate &&' f'cd {remote_project_path} &&' 'python -m pip install --upgrade pip &&' - f'python -m pip install --requirement {config["runtime"]["requirements_path"]}' + f'python -m pip install -v --requirement {config["runtime"]["requirements_path"]}' ] ssh_conn.run_commands(commands) # Start the watcher and then start the API development server - sync_directory(ssh_conn, os.getcwd(), project_path_uuid) + sync_directory(ssh_conn, os.getcwd(), project_path_uuid_dev) project_name = config["project"]["name"] pip_req_path = os.path.join(remote_project_path, config['runtime']['requirements_path']) @@ -217,14 +223,14 @@ def start_project(): # pylint: disable=too-many-locals, too-many-branches trap cleanup EXIT SIGINT - if source {project_path_uuid}/venv/bin/activate; then + if source {project_path_uuid_dev}/venv/bin/activate; then echo -e "- Activated virtual environment." else echo "Failed to activate virtual environment." exit 1 fi - if cd {project_path_uuid}/{project_name}; then + if cd {project_path_uuid_dev}/{project_name}; then echo -e "- Changed to project directory." else echo "Failed to change directory." @@ -287,35 +293,75 @@ def start_project(): # pylint: disable=too-many-locals, too-many-branches # ------------------------------ Deploy Project ------------------------------ # def create_project_endpoint(): """ Create a project endpoint. + - Move code in dev to prod folder + - TODO: git commit the diff from current state to new state - Create a serverless template for the project - Create a new endpoint using the template """ config = load_project_config() + project_id = config['project']['uuid'] + project_pod_id = get_project_pod(project_id) + + # Check if the project pod already exists, if not create it. + if not project_pod_id: + project_pod_id = launch_pod() + + with SSHConnection(project_pod_id) as ssh_conn: + project_path_uuid = f'{config["project"]["volume_mount_path"]}/{config["project"]["uuid"]}' + project_path_uuid_prod = os.path.join(project_path_uuid, 'prod') + remote_project_path = os.path.join(project_path_uuid_prod, config["project"]["name"]) + # Copy local files to the pod project folder + ssh_conn.run_commands([f'mkdir -p {remote_project_path}']) + print(f'Syncing files to pod {project_pod_id} prod') + ssh_conn.rsync(os.getcwd(), project_path_uuid_prod) + # Create the virtual environment + venv_path = os.path.join(project_path_uuid_prod, 'venv') + print(f'Activating Python virtual environment: {venv_path} on pod {project_pod_id}') + commands = [ + f'python{config["runtime"]["python_version"]} -m venv {venv_path}', + f'source {venv_path}/bin/activate &&' + f'cd {remote_project_path} &&' + 'python -m pip install --upgrade pip &&' + f'python -m pip install -v --requirement {config["runtime"]["requirements_path"]}' + ] + ssh_conn.run_commands(commands) + ssh_conn.close() + environment_variables = {} for variable in config['project']['env_vars']: environment_variables[variable] = config['project']['env_vars'][variable] # Construct the docker start command docker_start_cmd_prefix = 'bash -c "' - activate_cmd = f'. /runpod-volume/{config["project"]["uuid"]}/venv/bin/activate' - python_cmd = f'python -u /runpod-volume/{config["project"]["uuid"]}/{config["project"]["name"]}/{config["runtime"]["handler_path"]}' # pylint: disable=line-too-long + activate_cmd = f'. /runpod-volume/{config["project"]["uuid"]}/prod/venv/bin/activate' + python_cmd = f'python -u /runpod-volume/{config["project"]["uuid"]}/prod/{config["project"]["name"]}/{config["runtime"]["handler_path"]}' # pylint: disable=line-too-long docker_start_cmd_suffix = '"' docker_start_cmd = docker_start_cmd_prefix + activate_cmd + ' && ' + \ python_cmd + docker_start_cmd_suffix # pylint: disable=line-too-long - + + project_name_unique = str(uuid.uuid4())[:8] #project name unique constraint project_endpoint_template = create_template( - name=f'{config["project"]["name"]}-endpoint | {config["project"]["uuid"]}', + name=f'{config["project"]["name"]}-endpoint | {config["project"]["uuid"]} | {project_name_unique}', image_name=config['project']['base_image'], container_disk_in_gb=config['project']['container_disk_size_gb'], docker_start_cmd=docker_start_cmd, env=environment_variables, is_serverless=True ) - deployed_endpoint = create_endpoint( - name=f'{config["project"]["name"]}-endpoint | {config["project"]["uuid"]}', - template_id=project_endpoint_template['id'], - network_volume_id=config['project']['storage_id'], - ) + deployed_endpoint = get_project_endpoint(project_id) + if not deployed_endpoint: + deployed_endpoint = create_endpoint( + name=f'{config["project"]["name"]}-endpoint | {config["project"]["uuid"]}', + template_id=project_endpoint_template['id'], + network_volume_id=config['project']['storage_id'], + ) + else: + deployed_endpoint = update_endpoint_template( + endpoint_id=deployed_endpoint['id'], + template_id=project_endpoint_template['id'], + ) + + #does user want to tear down and recreate workers immediately? return deployed_endpoint['id'] diff --git a/runpod/cli/groups/project/helpers.py b/runpod/cli/groups/project/helpers.py index 0e8e33c1..3de08481 100644 --- a/runpod/cli/groups/project/helpers.py +++ b/runpod/cli/groups/project/helpers.py @@ -7,7 +7,7 @@ import click import tomlkit -from runpod import get_pods, create_pod +from runpod import get_pods, create_pod, get_endpoints from runpod import error as rp_error def validate_project_name(name): @@ -29,6 +29,16 @@ def get_project_pod(project_id: str): return None +def get_project_endpoint(project_id: str): + """Check if a project endpoint exists. + Return the endpoint_id if it exists, else return None. + """ + for endpoint in get_endpoints(): + if project_id in endpoint['name']: + return endpoint + + return None + def copy_template_files(template_dir, destination): """Copy the template files to the destination directory.""" for item in os.listdir(template_dir): diff --git a/runpod/cli/utils/ssh_cmd.py b/runpod/cli/utils/ssh_cmd.py index 5225e772..e23ca6ec 100644 --- a/runpod/cli/utils/ssh_cmd.py +++ b/runpod/cli/utils/ssh_cmd.py @@ -131,6 +131,7 @@ def rsync(self, local_path, remote_path, quiet=False): return subprocess.run(rsync_cmd, check=True) + def close(self): ''' Close the SSH connection. ''' self.ssh.close()