From e1bcb15e4b62c86087eac491c98563a644ce9999 Mon Sep 17 00:00:00 2001 From: Hritik003 Date: Mon, 14 Apr 2025 22:55:59 +0530 Subject: [PATCH 01/12] Add override option in eval Signed-off-by: Hritik003 --- src/aiq/cli/commands/evaluate.py | 18 ++++++++++++++++++ src/aiq/eval/config.py | 2 +- 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/src/aiq/cli/commands/evaluate.py b/src/aiq/cli/commands/evaluate.py index 429f9c87d..22aeafea6 100644 --- a/src/aiq/cli/commands/evaluate.py +++ b/src/aiq/cli/commands/evaluate.py @@ -21,6 +21,8 @@ from aiq.eval.evaluate import EvaluationRun from aiq.eval.evaluate import EvaluationRunConfig +from aiq.cli.cli_utils.config_override import load_and_override_config +from aiq.data_models.config import AIQConfig logger = logging.getLogger(__name__) @@ -77,6 +79,12 @@ default=1, help="Number of repetitions for the evaluation.", ) +@click.option( + "--override", + type=(str, str), + multiple=True, + help="Override config values using dot notation (e.g., --override llms.nim_llm.temperature 0.7)", +) @click.pass_context def eval_command(ctx, **kwargs) -> None: """ Evaluate datasets with the specified mechanism""" @@ -84,6 +92,14 @@ def eval_command(ctx, **kwargs) -> None: async def run_and_evaluate(config: EvaluationRunConfig): + from aiq.runtime.loader import discover_and_register_plugins, PluginTypes + + # Register plugins before validation + discover_and_register_plugins(PluginTypes.ALL) + + # Apply overrides (validates that config is now correct) + _ = load_and_override_config(config.config_file, config.override) + # Run evaluation eval_runner = EvaluationRun(config=config) await eval_runner.run_and_evaluate() @@ -101,6 +117,7 @@ def process_aiq_eval( endpoint: str, endpoint_timeout: int, reps: int, + override: tuple[tuple[str, str], ...], ): """ Process the eval command and execute the evaluation. Here the config_file, if provided, is checked for its existence @@ -127,5 +144,6 @@ def process_aiq_eval( endpoint=endpoint, endpoint_timeout=endpoint_timeout, reps=reps, + override=override, ) asyncio.run(run_and_evaluate(config)) diff --git a/src/aiq/eval/config.py b/src/aiq/eval/config.py index da10cde0d..c71c585a8 100644 --- a/src/aiq/eval/config.py +++ b/src/aiq/eval/config.py @@ -30,7 +30,7 @@ class EvaluationRunConfig(BaseModel): endpoint: str | None = None # only used when running the workflow remotely endpoint_timeout: int = 300 reps: int = 1 - + override: tuple[tuple[str, str], ...] = () class EvaluationRunOutput(BaseModel): """ From 594f91fbd6e32fc66ca9734d138ea8975ca40cda Mon Sep 17 00:00:00 2001 From: Anuradha Karuppiah Date: Tue, 22 Apr 2025 14:49:11 -0700 Subject: [PATCH 02/12] Only load config_object plugins This is for consistency with the start commands Signed-off-by: Anuradha Karuppiah --- src/aiq/cli/commands/evaluate.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/aiq/cli/commands/evaluate.py b/src/aiq/cli/commands/evaluate.py index 22aeafea6..e34f467a8 100644 --- a/src/aiq/cli/commands/evaluate.py +++ b/src/aiq/cli/commands/evaluate.py @@ -19,10 +19,9 @@ import click +from aiq.cli.cli_utils.config_override import load_and_override_config from aiq.eval.evaluate import EvaluationRun from aiq.eval.evaluate import EvaluationRunConfig -from aiq.cli.cli_utils.config_override import load_and_override_config -from aiq.data_models.config import AIQConfig logger = logging.getLogger(__name__) @@ -92,10 +91,11 @@ def eval_command(ctx, **kwargs) -> None: async def run_and_evaluate(config: EvaluationRunConfig): - from aiq.runtime.loader import discover_and_register_plugins, PluginTypes + from aiq.runtime.loader import PluginTypes + from aiq.runtime.loader import discover_and_register_plugins # Register plugins before validation - discover_and_register_plugins(PluginTypes.ALL) + discover_and_register_plugins(PluginTypes.CONFIG_OBJECT) # Apply overrides (validates that config is now correct) _ = load_and_override_config(config.config_file, config.override) From 32d3fef361b5626d71005d5a7fbf2f333d908b77 Mon Sep 17 00:00:00 2001 From: Anuradha Karuppiah Date: Tue, 22 Apr 2025 15:10:05 -0700 Subject: [PATCH 03/12] Add a note in the evaluate README for override option use Signed-off-by: Anuradha Karuppiah --- docs/source/guides/evaluate.md | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/docs/source/guides/evaluate.md b/docs/source/guides/evaluate.md index 5c61fc271..9b1d772a8 100644 --- a/docs/source/guides/evaluate.md +++ b/docs/source/guides/evaluate.md @@ -46,6 +46,7 @@ eval: The dataset section specifies the dataset to use for running the workflow. The dataset can be of type `json`, `jsonl`, `csv`, `xls`, or `parquet`. The dataset file path is specified using the `file_path` key. + ## Understanding the Dataset Format The dataset file provides a list of questions and expected answers. The following is an example of a dataset file: @@ -232,6 +233,14 @@ You can also evaluate workflows via the AgentIQ evaluation endpoint. The evaluat ## Adding Custom Evaluators You can add custom evaluators to evaluate the workflow output. To add a custom evaluator, you need to implement the evaluator and register it with the AgentIQ evaluator system. See the [Custom Evaluator](custom-evaluator.md) documentation for more information. +## Overriding Evaluation Configuration +You can override the configuration in the `eval_config.yml` file using the `--override` command line flag. The following is an example of overriding the configuration: +```bash +aiq eval --config_file examples/simple/configs/eval_config.yml \ + --override llms.nim_llm.temperature 0.7 \ + --override llms.nim_llm.model_name meta/llama-3.3-70b-instruct +``` + ## Additional Evaluation Options For details on other evaluators and evaluation options, refer to [AgentIQ Evaluation Concepts](../concepts/evaluate.md) for more information. From 30addb6afac84355ab1d2118b97b1ffff5a21352 Mon Sep 17 00:00:00 2001 From: Anuradha Karuppiah Date: Tue, 22 Apr 2025 15:28:21 -0700 Subject: [PATCH 04/12] Use the overriden config for evaluation Signed-off-by: Anuradha Karuppiah --- src/aiq/cli/commands/evaluate.py | 4 ---- src/aiq/eval/evaluate.py | 8 ++++++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/aiq/cli/commands/evaluate.py b/src/aiq/cli/commands/evaluate.py index e34f467a8..408755360 100644 --- a/src/aiq/cli/commands/evaluate.py +++ b/src/aiq/cli/commands/evaluate.py @@ -19,7 +19,6 @@ import click -from aiq.cli.cli_utils.config_override import load_and_override_config from aiq.eval.evaluate import EvaluationRun from aiq.eval.evaluate import EvaluationRunConfig @@ -97,9 +96,6 @@ async def run_and_evaluate(config: EvaluationRunConfig): # Register plugins before validation discover_and_register_plugins(PluginTypes.CONFIG_OBJECT) - # Apply overrides (validates that config is now correct) - _ = load_and_override_config(config.config_file, config.override) - # Run evaluation eval_runner = EvaluationRun(config=config) await eval_runner.run_and_evaluate() diff --git a/src/aiq/eval/evaluate.py b/src/aiq/eval/evaluate.py index 4b59f8353..4ccbb45b6 100644 --- a/src/aiq/eval/evaluate.py +++ b/src/aiq/eval/evaluate.py @@ -233,10 +233,14 @@ async def run_and_evaluate(self, logger.info("Starting evaluation run with config file: %s", self.config.config_file) from aiq.builder.eval_builder import WorkflowEvalBuilder + from aiq.cli.cli_utils.config_override import load_and_override_config from aiq.runtime.loader import load_config - # Load the config object - config = load_config(self.config.config_file) + # Load and override the config + if self.config.override: + config = load_and_override_config(self.config.config_file, self.config.override) + else: + config = load_config(self.config.config_file) self.eval_config = config.eval logger.debug("Loaded evaluation configuration: %s", self.eval_config) From df5db5618d06c44033f7ffcee0e3964d5b919624 Mon Sep 17 00:00:00 2001 From: Anuradha Karuppiah Date: Tue, 22 Apr 2025 16:16:03 -0700 Subject: [PATCH 05/12] Fix overrides handler Signed-off-by: Anuradha Karuppiah --- docs/source/guides/evaluate.md | 4 ++-- src/aiq/eval/evaluate.py | 17 +++++++++++++++-- 2 files changed, 17 insertions(+), 4 deletions(-) diff --git a/docs/source/guides/evaluate.md b/docs/source/guides/evaluate.md index 9b1d772a8..f708b183b 100644 --- a/docs/source/guides/evaluate.md +++ b/docs/source/guides/evaluate.md @@ -237,8 +237,8 @@ You can add custom evaluators to evaluate the workflow output. To add a custom e You can override the configuration in the `eval_config.yml` file using the `--override` command line flag. The following is an example of overriding the configuration: ```bash aiq eval --config_file examples/simple/configs/eval_config.yml \ - --override llms.nim_llm.temperature 0.7 \ - --override llms.nim_llm.model_name meta/llama-3.3-70b-instruct + --override llms.nim_rag_eval_llm.temperature 0.7 \ + --override llms.nim_rag_eval_llm.model_name meta/llama-3.1-70b-instruct ``` ## Additional Evaluation Options diff --git a/src/aiq/eval/evaluate.py b/src/aiq/eval/evaluate.py index 4ccbb45b6..57ab3600d 100644 --- a/src/aiq/eval/evaluate.py +++ b/src/aiq/eval/evaluate.py @@ -224,6 +224,20 @@ async def run_evaluators(self, evaluators: dict[str, Any]): logger.exception("An error occurred while running evaluators: %s", e, exc_info=True) raise + def apply_overrides(self): + from aiq.cli.cli_utils.config_override import load_and_override_config + from aiq.data_models.config import AIQConfig + from aiq.runtime.loader import PluginTypes + from aiq.runtime.loader import discover_and_register_plugins + from aiq.utils.data_models.schema_validator import validate_schema + + # Register plugins before validation + discover_and_register_plugins(PluginTypes.CONFIG_OBJECT) + + config_dict = load_and_override_config(self.config.config_file, self.config.override) + config = validate_schema(config_dict, AIQConfig) + return config + async def run_and_evaluate(self, session_manager: AIQSessionManager | None = None, job_id: str | None = None) -> EvaluationRunOutput: @@ -233,12 +247,11 @@ async def run_and_evaluate(self, logger.info("Starting evaluation run with config file: %s", self.config.config_file) from aiq.builder.eval_builder import WorkflowEvalBuilder - from aiq.cli.cli_utils.config_override import load_and_override_config from aiq.runtime.loader import load_config # Load and override the config if self.config.override: - config = load_and_override_config(self.config.config_file, self.config.override) + config = self.apply_overrides() else: config = load_config(self.config.config_file) self.eval_config = config.eval From 3ca7ef9d34bcc65302caf12300a64801eb3326af Mon Sep 17 00:00:00 2001 From: Anuradha Karuppiah Date: Tue, 22 Apr 2025 16:18:36 -0700 Subject: [PATCH 06/12] Revert whitespace change Signed-off-by: Anuradha Karuppiah --- docs/source/guides/evaluate.md | 1 - 1 file changed, 1 deletion(-) diff --git a/docs/source/guides/evaluate.md b/docs/source/guides/evaluate.md index f708b183b..a169871cb 100644 --- a/docs/source/guides/evaluate.md +++ b/docs/source/guides/evaluate.md @@ -46,7 +46,6 @@ eval: The dataset section specifies the dataset to use for running the workflow. The dataset can be of type `json`, `jsonl`, `csv`, `xls`, or `parquet`. The dataset file path is specified using the `file_path` key. - ## Understanding the Dataset Format The dataset file provides a list of questions and expected answers. The following is an example of a dataset file: From 68e2321543c8d4efe769e34cb591145f796516af Mon Sep 17 00:00:00 2001 From: Anuradha Karuppiah Date: Tue, 22 Apr 2025 16:21:01 -0700 Subject: [PATCH 07/12] Drop duplicate plugin discovery Signed-off-by: Anuradha Karuppiah --- src/aiq/cli/commands/evaluate.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/src/aiq/cli/commands/evaluate.py b/src/aiq/cli/commands/evaluate.py index 408755360..ab72c840a 100644 --- a/src/aiq/cli/commands/evaluate.py +++ b/src/aiq/cli/commands/evaluate.py @@ -90,12 +90,6 @@ def eval_command(ctx, **kwargs) -> None: async def run_and_evaluate(config: EvaluationRunConfig): - from aiq.runtime.loader import PluginTypes - from aiq.runtime.loader import discover_and_register_plugins - - # Register plugins before validation - discover_and_register_plugins(PluginTypes.CONFIG_OBJECT) - # Run evaluation eval_runner = EvaluationRun(config=config) await eval_runner.run_and_evaluate() From fd53deed24f6a94446e47811c534a38ff19c4af0 Mon Sep 17 00:00:00 2001 From: Anuradha Karuppiah Date: Tue, 22 Apr 2025 16:32:40 -0700 Subject: [PATCH 08/12] Fixes from precommit run -a Signed-off-by: Anuradha Karuppiah --- src/aiq/eval/config.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/aiq/eval/config.py b/src/aiq/eval/config.py index c71c585a8..9af5e8f9f 100644 --- a/src/aiq/eval/config.py +++ b/src/aiq/eval/config.py @@ -30,7 +30,8 @@ class EvaluationRunConfig(BaseModel): endpoint: str | None = None # only used when running the workflow remotely endpoint_timeout: int = 300 reps: int = 1 - override: tuple[tuple[str, str], ...] = () + override: tuple[tuple[str, str], ...] = () + class EvaluationRunOutput(BaseModel): """ From c6e89da4ac3ab516063e1d4331686111d9eaf174 Mon Sep 17 00:00:00 2001 From: Hritik Raj Date: Thu, 1 May 2025 11:12:59 +0530 Subject: [PATCH 09/12] Removed pylint unused-argument Signed-off-by: Hritik Raj --- src/aiq/cli/commands/evaluate.py | 2 +- src/aiq/utils/data_models/schema_validator.py | 2 +- src/aiq/utils/exception_handlers/schemas.py | 2 +- tests/aiq/cli/test_type_registry.py | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/aiq/cli/commands/evaluate.py b/src/aiq/cli/commands/evaluate.py index ab72c840a..7c24f480b 100644 --- a/src/aiq/cli/commands/evaluate.py +++ b/src/aiq/cli/commands/evaluate.py @@ -97,7 +97,7 @@ async def run_and_evaluate(config: EvaluationRunConfig): @eval_command.result_callback(replace=True) def process_aiq_eval( - processors, # pylint: disable=unused-argument + processors, *, config_file: Path, dataset: Path, diff --git a/src/aiq/utils/data_models/schema_validator.py b/src/aiq/utils/data_models/schema_validator.py index 750f64edc..090e7f410 100644 --- a/src/aiq/utils/data_models/schema_validator.py +++ b/src/aiq/utils/data_models/schema_validator.py @@ -31,7 +31,7 @@ def validate_schema(metadata, Schema): # pylint: disable=invalid-name @yaml_exception_handler -def validate_yaml(ctx, param, value): # pylint: disable=unused-argument +def validate_yaml(ctx, param, value): """ Validate that the file is a valid YAML file diff --git a/src/aiq/utils/exception_handlers/schemas.py b/src/aiq/utils/exception_handlers/schemas.py index d7e361037..720b54480 100644 --- a/src/aiq/utils/exception_handlers/schemas.py +++ b/src/aiq/utils/exception_handlers/schemas.py @@ -21,7 +21,7 @@ logger = logging.getLogger(__name__) -def schema_exception_handler(func, **kwargs): # pylint: disable=unused-argument +def schema_exception_handler(func, **kwargs): """ A decorator that handles `ValidationError` exceptions for schema validation functions. diff --git a/tests/aiq/cli/test_type_registry.py b/tests/aiq/cli/test_type_registry.py index 180679258..abf4d2133 100644 --- a/tests/aiq/cli/test_type_registry.py +++ b/tests/aiq/cli/test_type_registry.py @@ -25,7 +25,7 @@ def test_register_function(registry: TypeRegistry): with pytest.raises(KeyError): registry.get_function(FunctionTestConfig) - def tool_fn(builder: Builder): # pylint: disable=unused-argument + def tool_fn(builder: Builder): pass registry.register_function( From 8e242863445fa392cc332915285162086bf06cd1 Mon Sep 17 00:00:00 2001 From: Hritik Raj Date: Thu, 1 May 2025 11:19:18 +0530 Subject: [PATCH 10/12] Removed pylint unused-argument Signed-off-by: Hritik Raj From 38eb56fd723b7c4d061a3cff1992a9df547c889e Mon Sep 17 00:00:00 2001 From: Hritik Raj Date: Fri, 2 May 2025 07:58:17 +0530 Subject: [PATCH 11/12] removed pylint from toml Signed-off-by: Hritik Raj --- pyproject.toml | 1 - src/aiq/cli/commands/evaluate.py | 2 +- src/aiq/utils/data_models/schema_validator.py | 2 +- src/aiq/utils/exception_handlers/schemas.py | 2 +- tests/aiq/cli/test_type_registry.py | 2 +- 5 files changed, 4 insertions(+), 5 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index b51b5bd39..0ab7ccde1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -619,7 +619,6 @@ disable = [ "too-many-statements", "unnecessary-lambda", # We pass lambdas around a lot, so this is not useful "unnecessary-pass", # Allow empty classes/methods with only a `pass` statement in the body - "unused-argument", # Allow unused arguments in functions for rapid prototyping (delete after initial AIQToolkit prototype) "use-symbolic-message-instead", "useless-suppression", "wrong-import-order", # pylint mistakenly thinks that the test utils are third party, and we have isort for this diff --git a/src/aiq/cli/commands/evaluate.py b/src/aiq/cli/commands/evaluate.py index 7c24f480b..ab72c840a 100644 --- a/src/aiq/cli/commands/evaluate.py +++ b/src/aiq/cli/commands/evaluate.py @@ -97,7 +97,7 @@ async def run_and_evaluate(config: EvaluationRunConfig): @eval_command.result_callback(replace=True) def process_aiq_eval( - processors, + processors, # pylint: disable=unused-argument *, config_file: Path, dataset: Path, diff --git a/src/aiq/utils/data_models/schema_validator.py b/src/aiq/utils/data_models/schema_validator.py index 090e7f410..40b2ddff0 100644 --- a/src/aiq/utils/data_models/schema_validator.py +++ b/src/aiq/utils/data_models/schema_validator.py @@ -31,7 +31,7 @@ def validate_schema(metadata, Schema): # pylint: disable=invalid-name @yaml_exception_handler -def validate_yaml(ctx, param, value): +def validate_yaml(ctx, param, value): # pylint: disable=unused-argument """ Validate that the file is a valid YAML file diff --git a/src/aiq/utils/exception_handlers/schemas.py b/src/aiq/utils/exception_handlers/schemas.py index 720b54480..d7e361037 100644 --- a/src/aiq/utils/exception_handlers/schemas.py +++ b/src/aiq/utils/exception_handlers/schemas.py @@ -21,7 +21,7 @@ logger = logging.getLogger(__name__) -def schema_exception_handler(func, **kwargs): +def schema_exception_handler(func, **kwargs): # pylint: disable=unused-argument """ A decorator that handles `ValidationError` exceptions for schema validation functions. diff --git a/tests/aiq/cli/test_type_registry.py b/tests/aiq/cli/test_type_registry.py index abf4d2133..180679258 100644 --- a/tests/aiq/cli/test_type_registry.py +++ b/tests/aiq/cli/test_type_registry.py @@ -25,7 +25,7 @@ def test_register_function(registry: TypeRegistry): with pytest.raises(KeyError): registry.get_function(FunctionTestConfig) - def tool_fn(builder: Builder): + def tool_fn(builder: Builder): # pylint: disable=unused-argument pass registry.register_function( From b7ebd841fb2d5e4f343d40912f688bc6c123e348 Mon Sep 17 00:00:00 2001 From: Hritik Raj Date: Fri, 2 May 2025 07:59:21 +0530 Subject: [PATCH 12/12] removed space Signed-off-by: Hritik Raj --- src/aiq/utils/data_models/schema_validator.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/aiq/utils/data_models/schema_validator.py b/src/aiq/utils/data_models/schema_validator.py index 40b2ddff0..750f64edc 100644 --- a/src/aiq/utils/data_models/schema_validator.py +++ b/src/aiq/utils/data_models/schema_validator.py @@ -31,7 +31,7 @@ def validate_schema(metadata, Schema): # pylint: disable=invalid-name @yaml_exception_handler -def validate_yaml(ctx, param, value): # pylint: disable=unused-argument +def validate_yaml(ctx, param, value): # pylint: disable=unused-argument """ Validate that the file is a valid YAML file