Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions src/hyperpod_cli/commands/cluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,8 @@
TEMP_KUBE_CONFIG_FILE,
OutputFormat,
)
from hyperpod_cli.telemetry import _hyperpod_telemetry_emitter
from hyperpod_cli.telemetry.constants import Feature
from hyperpod_cli.telemetry.user_agent import (
get_user_agent_extra_suffix,
)
Expand Down Expand Up @@ -107,6 +109,7 @@
multiple=True,
help="Optional. The namespace that you want to check the capacity for. Only SageMaker managed namespaces are supported.",
)
@_hyperpod_telemetry_emitter(Feature.HYPERPOD_V2, "hyperpod_v2.get_clusters_cli")
def get_clusters(
region: Optional[str],
orchestrator: Optional[str],
Expand Down Expand Up @@ -463,6 +466,7 @@ def _aggregate_nodes_info(
is_flag=True,
help="Enable debug mode",
)
@_hyperpod_telemetry_emitter(Feature.HYPERPOD_V2, "hyperpod_v2.connect_cluster_cli")
def connect_cluster(
cluster_name: str,
region: Optional[str],
Expand Down
26 changes: 16 additions & 10 deletions src/hyperpod_cli/commands/job.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,8 @@
Volume,
USER_NAME_LABEL_KEY,
)
from hyperpod_cli.telemetry import _hyperpod_telemetry_emitter
from hyperpod_cli.telemetry.constants import Feature
from hyperpod_cli.clients.kubernetes_client import (
KubernetesClient,
)
Expand Down Expand Up @@ -124,6 +126,7 @@
is_flag=True,
help="Enable debug mode",
)
@_hyperpod_telemetry_emitter(Feature.HYPERPOD_V2, "hyperpod_v2.get_job_cli")
def get_job(
job_name: str,
namespace: Optional[str],
Expand All @@ -144,9 +147,8 @@ def get_job(
result = get_training_job_service.get_training_job(job_name, namespace, verbose)
click.echo(result)
except Exception as e:
sys.exit(
f"Unexpected error happens when trying to get training job {job_name} : {e}"
)
logger.error(f"Unexpected error happens when trying to get training job {job_name} : {e}")
raise


@click.command()
Expand Down Expand Up @@ -186,6 +188,7 @@ def get_job(
is_flag=True,
help="Enable debug mode",
)
@_hyperpod_telemetry_emitter(Feature.HYPERPOD_V2, "hyperpod_v2.list_jobs_cli")
def list_jobs(
namespace: Optional[str],
all_namespaces: Optional[bool],
Expand All @@ -205,7 +208,8 @@ def list_jobs(
)
click.echo(result)
except Exception as e:
sys.exit(f"Unexpected error happens when trying to list training job : {e}")
logger.error(f"Unexpected error happens when trying to list training job : {e}")
raise


@click.command()
Expand All @@ -228,6 +232,7 @@ def list_jobs(
is_flag=True,
help="Enable debug mode",
)
@_hyperpod_telemetry_emitter(Feature.HYPERPOD_V2, "hyperpod_v2.list_pods_cli")
def list_pods(
job_name: str,
namespace: Optional[str],
Expand All @@ -246,9 +251,8 @@ def list_pods(
result = list_pods_service.list_pods_for_training_job(job_name, namespace, True)
click.echo(result)
except Exception as e:
sys.exit(
f"Unexpected error happens when trying to list pods for training job {job_name} : {e}"
)
logger.error(f"Unexpected error happens when trying to list pods for training job {job_name} : {e}")
raise


@click.command()
Expand All @@ -271,6 +275,7 @@ def list_pods(
is_flag=True,
help="Enable debug mode",
)
@_hyperpod_telemetry_emitter(Feature.HYPERPOD_V2, "hyperpod_v2.cancel_job_cli")
def cancel_job(
job_name: str,
namespace: Optional[str],
Expand All @@ -287,9 +292,8 @@ def cancel_job(
result = cancel_training_job_service.cancel_training_job(job_name, namespace)
click.echo(result)
except Exception as e:
sys.exit(
f"Unexpected error happens when trying to cancel training job {job_name} : {e}"
)
logger.error(f"Unexpected error happens when trying to cancel training job {job_name} : {e}")
raise


@click.command()
Expand Down Expand Up @@ -536,6 +540,7 @@ def cancel_job(
is_flag=True,
help="Enable debug mode",
)
@_hyperpod_telemetry_emitter(Feature.HYPERPOD_V2, "hyperpod_v2.start_job_cli")
def start_job(
config_file: Optional[str],
job_name: Optional[str],
Expand Down Expand Up @@ -876,6 +881,7 @@ def start_job(
help="Optional. The namespace to use. If not specified, this command will first use the namespace wh connecting the cluster."
"Otherwise if namespace is not configured when connecting to the cluster, a namespace that is managed by SageMaker will be auto discovered.",
)
@_hyperpod_telemetry_emitter(Feature.HYPERPOD_V2, "hyperpod_v2.patch_job_cli")
def patch_job(patch_type: str, job_name: str, namespace: Optional[str]):

if patch_type not in JobPatchType.get_values():
Expand Down
14 changes: 8 additions & 6 deletions src/hyperpod_cli/commands/pod.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@
setup_logger,
set_logging_level,
)
from hyperpod_cli.telemetry import _hyperpod_telemetry_emitter
from hyperpod_cli.telemetry.constants import Feature

logger = setup_logger(__name__)

Expand Down Expand Up @@ -54,6 +56,7 @@
is_flag=True,
help="Enable debug mode",
)
@_hyperpod_telemetry_emitter(Feature.HYPERPOD_V2, "hyperpod_v2.get_log_cli")
def get_log(
job_name: str,
pod: str,
Expand All @@ -73,9 +76,8 @@ def get_log(
)
click.echo(result)
except Exception as e:
sys.exit(
f"Unexpected error happens when trying to get logs for training job {job_name} : {e}"
)
logger.error(f"Unexpected error happens when trying to get logs for training job {job_name} : {e}")
raise

try:
cloudwatch_link = get_logs_service.generate_cloudwatch_link(pod, namespace=namespace)
Expand Down Expand Up @@ -148,6 +150,7 @@ def invoke(self, ctx):
is_flag=True,
help="Enable debug mode",
)
@_hyperpod_telemetry_emitter(Feature.HYPERPOD_V2, "hyperpod_v2.exec_cli")
def exec(
job_name: str,
namespace: Optional[str],
Expand All @@ -173,6 +176,5 @@ def exec(
)
click.echo(result)
except Exception as e:
sys.exit(
f"Unexpected error happens when trying to exec command for pod {pod} : {e}"
)
logger.error(f"Unexpected error happens when trying to exec command for pod {pod} : {e}")
raise
2 changes: 2 additions & 0 deletions src/hyperpod_cli/telemetry/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,5 @@
# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific
# language governing permissions and limitations under the License.
from __future__ import absolute_import
from .telemetry_logging import _hyperpod_telemetry_emitter
60 changes: 60 additions & 0 deletions src/hyperpod_cli/telemetry/constants.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
from __future__ import absolute_import
from enum import Enum


class Feature(Enum):
"""Enumeration of feature names used in telemetry."""

HYPERPOD_V2 = 10

def __str__(self): # pylint: disable=E0307
"""Return the feature name."""
return self.name


class Status(Enum):
"""Enumeration of status values used in telemetry."""

SUCCESS = 1
FAILURE = 0

def __str__(self): # pylint: disable=E0307
"""Return the status name."""
return self.name


class Region(str, Enum):
"""Telemetry: List of all supported AWS regions."""

# Classic
US_EAST_1 = "us-east-1" # IAD
US_EAST_2 = "us-east-2" # CMH
US_WEST_1 = "us-west-1" # SFO
US_WEST_2 = "us-west-2" # PDX
AP_NORTHEAST_1 = "ap-northeast-1" # NRT
AP_NORTHEAST_2 = "ap-northeast-2" # ICN
AP_NORTHEAST_3 = "ap-northeast-3" # KIX
AP_SOUTH_1 = "ap-south-1" # BOM
AP_SOUTHEAST_1 = "ap-southeast-1" # SIN
AP_SOUTHEAST_2 = "ap-southeast-2" # SYD
CA_CENTRAL_1 = "ca-central-1" # YUL
EU_CENTRAL_1 = "eu-central-1" # FRA
EU_NORTH_1 = "eu-north-1" # ARN
EU_WEST_1 = "eu-west-1" # DUB
EU_WEST_2 = "eu-west-2" # LHR
EU_WEST_3 = "eu-west-3" # CDG
SA_EAST_1 = "sa-east-1" # GRU
# Opt-in
AP_EAST_1 = "ap-east-1" # HKG
AP_SOUTHEAST_3 = "ap-southeast-3" # CGK
AF_SOUTH_1 = "af-south-1" # CPT
EU_SOUTH_1 = "eu-south-1" # MXP
ME_SOUTH_1 = "me-south-1" # BAH
MX_CENTRAL_1 = "mx-central-1" # QRO
AP_SOUTHEAST_7 = "ap-southeast-7" # BKK
AP_SOUTH_2 = "ap-south-2" # HYD
AP_SOUTHEAST_4 = "ap-southeast-4" # MEL
EU_CENTRAL_2 = "eu-central-2" # ZRH
EU_SOUTH_2 = "eu-south-2" # ZAZ
IL_CENTRAL_1 = "il-central-1" # TLV
ME_CENTRAL_1 = "me-central-1" # DXB
Loading