-
Notifications
You must be signed in to change notification settings - Fork 45
feat: Hyperparameter Optimization APIs in Kubeflow SDK #124
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 11 commits
65e4dea
2bd1540
778c6a9
f7f7aba
192299c
bf0b93a
cdec3b9
5e7d131
55a89fa
14c1497
1353fc9
a1bcab9
50d743f
57c0a40
6ca385e
85c63f4
a044087
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -33,6 +33,7 @@ jobs: | |
| ci | ||
| docs | ||
| examples | ||
| optimizer | ||
| scripts | ||
| test | ||
| trainer | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,22 @@ | ||
| # Copyright 2025 The Kubeflow Authors. | ||
| # | ||
| # Licensed under the Apache License, Version 2.0 (the "License"); | ||
| # you may not use this file except in compliance with the License. | ||
| # You may obtain a copy of the License at | ||
| # | ||
| # http://www.apache.org/licenses/LICENSE-2.0 | ||
| # | ||
| # Unless required by applicable law or agreed to in writing, software | ||
| # distributed under the License is distributed on an "AS IS" BASIS, | ||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| # See the License for the specific language governing permissions and | ||
| # limitations under the License. | ||
|
|
||
| # The default Kubernetes namespace. | ||
| DEFAULT_NAMESPACE = "default" | ||
|
|
||
| # How long to wait in seconds for requests to the Kubernetes API Server. | ||
| DEFAULT_TIMEOUT = 120 | ||
|
|
||
| # Unknown indicates that the value can't be identified. | ||
| UNKNOWN = "Unknown" |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,40 @@ | ||
| # Copyright 2025 The Kubeflow Authors. | ||
| # | ||
| # Licensed under the Apache License, Version 2.0 (the "License"); | ||
| # you may not use this file except in compliance with the License. | ||
| # You may obtain a copy of the License at | ||
| # | ||
| # http://www.apache.org/licenses/LICENSE-2.0 | ||
| # | ||
| # Unless required by applicable law or agreed to in writing, software | ||
| # distributed under the License is distributed on an "AS IS" BASIS, | ||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| # See the License for the specific language governing permissions and | ||
| # limitations under the License. | ||
| import os | ||
| from typing import Optional | ||
|
|
||
| from kubernetes import config | ||
|
|
||
| from kubeflow.common import constants | ||
|
|
||
|
|
||
| def is_running_in_k8s() -> bool: | ||
| return os.path.isdir("/var/run/secrets/kubernetes.io/") | ||
|
|
||
|
|
||
| def get_default_target_namespace(context: Optional[str] = None) -> str: | ||
| if not is_running_in_k8s(): | ||
| try: | ||
| all_contexts, current_context = config.list_kube_config_contexts() | ||
| # If context is set, we should get namespace from it. | ||
| if context: | ||
| for c in all_contexts: | ||
| if isinstance(c, dict) and c.get("name") == context: | ||
| return c["context"]["namespace"] | ||
| # Otherwise, try to get namespace from the current context. | ||
| return current_context["context"]["namespace"] | ||
| except Exception: | ||
| return constants.DEFAULT_NAMESPACE | ||
| with open("/var/run/secrets/kubernetes.io/serviceaccount/namespace") as f: | ||
| return f.readline() |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,38 @@ | ||
| # Copyright 2025 The Kubeflow Authors. | ||
| # | ||
| # Licensed under the Apache License, Version 2.0 (the "License"); | ||
| # you may not use this file except in compliance with the License. | ||
| # You may obtain a copy of the License at | ||
| # | ||
| # http://www.apache.org/licenses/LICENSE-2.0 | ||
| # | ||
| # Unless required by applicable law or agreed to in writing, software | ||
| # distributed under the License is distributed on an "AS IS" BASIS, | ||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| # See the License for the specific language governing permissions and | ||
| # limitations under the License. | ||
|
|
||
| # Import common types. | ||
| from kubeflow.common.types import KubernetesBackendConfig | ||
|
|
||
| # Import the Kubeflow Optimizer client. | ||
| from kubeflow.optimizer.api.optimizer_client import OptimizerClient | ||
|
|
||
| # Import the Kubeflow Optimizer types. | ||
| from kubeflow.optimizer.types.algorithm_types import RandomSearch | ||
| from kubeflow.optimizer.types.optimization_types import Objective, OptimizationJob, TrialConfig | ||
| from kubeflow.optimizer.types.search_types import Search | ||
|
|
||
| # Import the Kubeflow Trainer types. | ||
| from kubeflow.trainer.types.types import TrainJobTemplate | ||
|
|
||
| __all__ = [ | ||
| "KubernetesBackendConfig", | ||
| "Objective", | ||
| "OptimizationJob", | ||
| "OptimizerClient", | ||
| "RandomSearch", | ||
| "Search", | ||
| "TrainJobTemplate", | ||
| "TrialConfig", | ||
| ] | ||
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
| @@ -0,0 +1,126 @@ | ||||||
| # Copyright 2025 The Kubeflow Authors. | ||||||
| # | ||||||
| # Licensed under the Apache License, Version 2.0 (the "License"); | ||||||
| # you may not use this file except in compliance with the License. | ||||||
| # You may obtain a copy of the License at | ||||||
| # | ||||||
| # http://www.apache.org/licenses/LICENSE-2.0 | ||||||
| # | ||||||
| # Unless required by applicable law or agreed to in writing, software | ||||||
| # distributed under the License is distributed on an "AS IS" BASIS, | ||||||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||||||
| # See the License for the specific language governing permissions and | ||||||
| # limitations under the License. | ||||||
|
|
||||||
| import logging | ||||||
| from typing import Any, Optional | ||||||
|
|
||||||
| from kubeflow.common.types import KubernetesBackendConfig | ||||||
| from kubeflow.optimizer.backends.kubernetes.backend import KubernetesBackend | ||||||
| from kubeflow.optimizer.types.algorithm_types import RandomSearch | ||||||
| from kubeflow.optimizer.types.optimization_types import Objective, OptimizationJob, TrialConfig | ||||||
| from kubeflow.trainer.types.types import TrainJobTemplate | ||||||
|
|
||||||
| logger = logging.getLogger(__name__) | ||||||
|
|
||||||
|
|
||||||
| class OptimizerClient: | ||||||
| def __init__( | ||||||
| self, | ||||||
| backend_config: Optional[KubernetesBackendConfig] = None, | ||||||
| ): | ||||||
| """Initialize a Kubeflow Optimizer client. | ||||||
|
|
||||||
| Args: | ||||||
| backend_config: Backend configuration. Either KubernetesBackendConfig or None to use | ||||||
| default config class. Defaults to KubernetesBackendConfig. | ||||||
|
|
||||||
| Raises: | ||||||
| ValueError: Invalid backend configuration. | ||||||
|
|
||||||
| """ | ||||||
| # Set the default backend config. | ||||||
| if not backend_config: | ||||||
| backend_config = KubernetesBackendConfig() | ||||||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit, just for consistency shall we match trainer and use the same import style: if not backend_config:
backend_config = common_types.KubernetesBackendConfig()There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Let me go other way around, tho. |
||||||
|
|
||||||
| if isinstance(backend_config, KubernetesBackendConfig): | ||||||
| self.backend = KubernetesBackend(backend_config) | ||||||
| else: | ||||||
| raise ValueError(f"Invalid backend config '{backend_config}'") | ||||||
|
|
||||||
| def optimize( | ||||||
| self, | ||||||
| trial_template: TrainJobTemplate, | ||||||
| *, | ||||||
| trial_config: Optional[TrialConfig] = None, | ||||||
| search_space: dict[str, Any], | ||||||
| objectives: Optional[list[Objective]] = None, | ||||||
| algorithm: Optional[RandomSearch] = None, | ||||||
|
||||||
| algorithm: Optional[RandomSearch] = None, | |
| algorithm: Optional[BaseAlgorithm] = None, |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Good point!
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Shall we add GridSearch here?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Good catch!