-
Notifications
You must be signed in to change notification settings - Fork 9
feat(RHOAIENG-33702): Update LMEval configuration to support environment variable for Kubernetes usage #58
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -923,27 +923,40 @@ class LMEval(Eval, BenchmarksProtocolPrivate): | |
|
|
||
| def __init__(self, config: LMEvalEvalProviderConfig): | ||
| self._config = config | ||
| self._namespace: str | None = None | ||
|
|
||
| self._namespace = _resolve_namespace(self._config) | ||
|
|
||
| logger.debug("LMEval provider initialized with namespace: %s", self._namespace) | ||
| logger.debug("LMEval provider config values: %s", vars(self._config)) | ||
| self.benchmarks: dict[str, Benchmark] = {} | ||
| self._jobs: list[Job] = [] | ||
| self._job_metadata: dict[str, dict[str, Any]] = {} | ||
|
|
||
| self._k8s_client: k8s_client.ApiClient | None = None | ||
| self._k8s_custom_api: k8s_client.CustomObjectsApi | None = None | ||
| if self.use_k8s: | ||
| self._cr_builder: LMEvalCRBuilder | None = None | ||
|
|
||
| def _ensure_k8s_initialized(self): | ||
| """Ensure Kubernetes client and namespace are initialized when needed.""" | ||
| if not self.use_k8s: | ||
| logger.warning("Non-K8s evaluation backend is not implemented yet") | ||
|
Comment on lines
+939
to
+940
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. suggestion: Logging a warning for non-K8s backend may be redundant given the subsequent NotImplementedError. Consider removing the warning in _ensure_k8s_initialized, as NotImplementedError is raised elsewhere if use_k8s is False. Alternatively, update the warning to clarify where the error will occur. Suggested implementation: def _ensure_k8s_initialized(self):
"""Ensure Kubernetes client and namespace are initialized when needed."""
if not self.use_k8s:
returnIf you want to clarify in the log that the error will be raised elsewhere, you could replace the warning with: logger.warning("use_k8s is False; NotImplementedError will be raised in subsequent calls.")But the cleanest approach is simply to remove the warning as shown above. |
||
| return | ||
|
|
||
| if self._k8s_client is None: | ||
| self._init_k8s_client() | ||
| logger.debug( | ||
| "Initialized Kubernetes client with namespace: %s", self._namespace | ||
| ) | ||
|
|
||
| if self._namespace is None: | ||
| self._namespace = _resolve_namespace(self._config) | ||
| logger.debug("LMEval provider resolved namespace: %s", self._namespace) | ||
|
|
||
| if self._cr_builder is None: | ||
| self._cr_builder = LMEvalCRBuilder( | ||
| namespace=self._namespace, | ||
| service_account=getattr(self._config, "service_account", None), | ||
| ) | ||
| self._cr_builder._config = self._config | ||
| logger.debug( | ||
| "Initialized Kubernetes client and CR builder with namespace: %s", | ||
| self._namespace, | ||
| ) | ||
|
|
||
| def _init_k8s_client(self): | ||
| """Initialize the Kubernetes client.""" | ||
|
|
@@ -1048,7 +1061,8 @@ def _deploy_lmeval_cr(self, cr: dict, job_id: str) -> None: | |
| pvc_name = None | ||
|
|
||
| if ( | ||
| self._cr_builder._config is not None | ||
| self._cr_builder is not None | ||
| and self._cr_builder._config is not None | ||
| and hasattr(self._cr_builder._config, "metadata") | ||
| and self._cr_builder._config.metadata | ||
| ): | ||
|
|
@@ -1198,6 +1212,7 @@ async def run_eval( | |
| Returns: | ||
| Dict containing job_id for evaluation tracking | ||
| """ | ||
| self._ensure_k8s_initialized() | ||
| if not self.use_k8s: | ||
| raise NotImplementedError("Non-K8s evaluation not implemented yet") | ||
|
|
||
|
|
@@ -1224,6 +1239,11 @@ async def run_eval( | |
| benchmark_config.metadata["input"]["storage"], | ||
| ) | ||
|
|
||
| if self._cr_builder is None: | ||
| raise LMEvalConfigError( | ||
| "CR builder not initialized - ensure K8s is properly configured" | ||
| ) | ||
|
|
||
| cr = self._cr_builder.create_cr( | ||
| benchmark_id=benchmark_id, | ||
| task_config=benchmark_config, | ||
|
|
@@ -1295,6 +1315,7 @@ async def evaluate_rows( | |
| Returns: | ||
| EvaluateResponse: Object containing generations and scores | ||
| """ | ||
| self._ensure_k8s_initialized() | ||
| if not self.use_k8s: | ||
| raise NotImplementedError("Non-K8s evaluation not implemented yet") | ||
|
|
||
|
|
@@ -1323,6 +1344,7 @@ async def job_status(self, benchmark_id: str, job_id: str) -> dict[str, str] | N | |
| Returns: | ||
| Dict with current status of the job | ||
| """ | ||
| self._ensure_k8s_initialized() | ||
| if not self.use_k8s: | ||
| raise NotImplementedError("Non-K8s evaluation not implemented yet") | ||
|
|
||
|
|
@@ -1395,6 +1417,7 @@ async def job_cancel(self, benchmark_id: str, job_id: str) -> None: | |
| benchmark_id: The benchmark identifier | ||
| job_id: The job identifier | ||
| """ | ||
| self._ensure_k8s_initialized() | ||
| if not self.use_k8s: | ||
| raise NotImplementedError("Non-K8s evaluation not implemented yet") | ||
|
|
||
|
|
@@ -1568,6 +1591,7 @@ async def job_result(self, benchmark_id: str, job_id: str) -> EvaluateResponse: | |
| Returns: | ||
| EvaluateResponse: Results of the evaluation | ||
| """ | ||
| self._ensure_k8s_initialized() | ||
| if not self.use_k8s: | ||
| return EvaluateResponse( | ||
| generations=[], | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
question: Removing the check for use_k8s disables early error reporting for unsupported backends.
This change may result in less informative error messages, as misconfigured backends will fail later at runtime instead of during configuration validation.