diff --git a/sdk/evaluation/azure-ai-evaluation/CHANGELOG.md b/sdk/evaluation/azure-ai-evaluation/CHANGELOG.md index 87b2c1a43b67..2eba4c099b63 100644 --- a/sdk/evaluation/azure-ai-evaluation/CHANGELOG.md +++ b/sdk/evaluation/azure-ai-evaluation/CHANGELOG.md @@ -10,6 +10,7 @@ ### Bugs Fixed - Non adversarial simulator works with `gpt-4o` models using the `json_schema` response format +- Fixed an issue where the `evaluate` API would fail with "[WinError 32] The process cannot access the file because it is being used by another process" when venv folder and target function file are in the same directory. - Fix evaluate API failure when `trace.destination` is set to `none` ### Other Changes diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_batch_run_client/__init__.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_batch_run/__init__.py similarity index 59% rename from sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_batch_run_client/__init__.py rename to sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_batch_run/__init__.py index 5f811e4513a6..7b4b3526734a 100644 --- a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_batch_run_client/__init__.py +++ b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_batch_run/__init__.py @@ -1,8 +1,9 @@ # --------------------------------------------------------- # Copyright (c) Microsoft Corporation. All rights reserved. # --------------------------------------------------------- -from .batch_run_context import BatchRunContext +from .eval_run_context import EvalRunContext from .code_client import CodeClient from .proxy_client import ProxyClient +from .target_run_context import TargetRunContext -__all__ = ["CodeClient", "ProxyClient", "BatchRunContext"] +__all__ = ["CodeClient", "ProxyClient", "EvalRunContext", "TargetRunContext"] diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_batch_run_client/code_client.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_batch_run/code_client.py similarity index 100% rename from sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_batch_run_client/code_client.py rename to sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_batch_run/code_client.py diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_batch_run_client/batch_run_context.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_batch_run/eval_run_context.py similarity index 92% rename from sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_batch_run_client/batch_run_context.py rename to sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_batch_run/eval_run_context.py index 089ab12dd40d..5eea27afbd8d 100644 --- a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_batch_run_client/batch_run_context.py +++ b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_batch_run/eval_run_context.py @@ -22,13 +22,13 @@ from .proxy_client import ProxyClient -class BatchRunContext: - """Context manager for batch run clients. +class EvalRunContext: + """Context manager for eval batch run. :param client: The client to run in the context. :type client: Union[ - ~azure.ai.evaluation._evaluate._batch_run_client.code_client.CodeClient, - ~azure.ai.evaluation._evaluate._batch_run_client.proxy_client.ProxyClient + ~azure.ai.evaluation._evaluate._batch_run.code_client.CodeClient, + ~azure.ai.evaluation._evaluate._batch_run.proxy_client.ProxyClient ] """ diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_batch_run_client/proxy_client.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_batch_run/proxy_client.py similarity index 100% rename from sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_batch_run_client/proxy_client.py rename to sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_batch_run/proxy_client.py diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_batch_run/target_run_context.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_batch_run/target_run_context.py new file mode 100644 index 000000000000..62a14aa75aa8 --- /dev/null +++ b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_batch_run/target_run_context.py @@ -0,0 +1,35 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- +import os +import types +from typing import Optional, Type + +from promptflow._sdk._constants import PF_FLOW_ENTRY_IN_TMP + + +class TargetRunContext: + """Context manager for target batch run. + + :param upload_snapshot: Whether to upload target snapshot. + :type upload_snapshot: bool + """ + + def __init__(self, upload_snapshot: bool) -> None: + self._upload_snapshot = upload_snapshot + + def __enter__(self) -> None: + # Address "[WinError 32] The process cannot access the file" error, + # caused by conflicts when the venv and target function are in the same directory. + # Setting PF_FLOW_ENTRY_IN_TMP to true uploads only the flex entry file (flow.flex.yaml). + if not self._upload_snapshot: + os.environ[PF_FLOW_ENTRY_IN_TMP] = "true" + + def __exit__( + self, + exc_type: Optional[Type[BaseException]], + exc_value: Optional[BaseException], + exc_tb: Optional[types.TracebackType], + ) -> None: + if not self._upload_snapshot: + os.environ.pop(PF_FLOW_ENTRY_IN_TMP, None) diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_evaluate.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_evaluate.py index 5885ccd49439..d1b629094ed0 100644 --- a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_evaluate.py +++ b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_evaluate.py @@ -26,7 +26,7 @@ ) from .._model_configurations import AzureAIProject, EvaluationResult, EvaluatorConfig from .._user_agent import USER_AGENT -from ._batch_run_client import BatchRunContext, CodeClient, ProxyClient +from ._batch_run import EvalRunContext, CodeClient, ProxyClient, TargetRunContext from ._utils import ( _apply_column_mapping, _log_metrics_and_instance_results, @@ -395,7 +395,7 @@ def _apply_target_to_data( pf_client: PFClient, initial_data: pd.DataFrame, evaluation_name: Optional[str] = None, - _run_name: Optional[str] = None, + **kwargs, ) -> Tuple[pd.DataFrame, Set[str], Run]: """ Apply the target function to the data set and return updated data and generated columns. @@ -410,22 +410,22 @@ def _apply_target_to_data( :type initial_data: pd.DataFrame :param evaluation_name: The name of the evaluation. :type evaluation_name: Optional[str] - :param _run_name: The name of target run. Used for testing only. - :type _run_name: Optional[str] :return: The tuple, containing data frame and the list of added columns. :rtype: Tuple[pandas.DataFrame, List[str]] """ - # We are manually creating the temporary directory for the flow - # because the way tempdir remove temporary directories will - # hang the debugger, because promptflow will keep flow directory. - run: Run = pf_client.run( - flow=target, - display_name=evaluation_name, - data=data, - properties={EvaluationRunProperties.RUN_TYPE: "eval_run", "isEvaluatorRun": "true"}, - stream=True, - name=_run_name, - ) + _run_name = kwargs.get("_run_name") + upload_target_snaphot = kwargs.get("_upload_target_snapshot", False) + + with TargetRunContext(upload_target_snaphot): + run: Run = pf_client.run( + flow=target, + display_name=evaluation_name, + data=data, + properties={EvaluationRunProperties.RUN_TYPE: "eval_run", "isEvaluatorRun": "true"}, + stream=True, + name=_run_name, + ) + target_output: pd.DataFrame = pf_client.runs.get_details(run, all_results=True) # Remove input and output prefix generated_columns = { @@ -706,7 +706,7 @@ def _evaluate( # pylint: disable=too-many-locals,too-many-statements target_generated_columns: Set[str] = set() if data is not None and target is not None: input_data_df, target_generated_columns, target_run = _apply_target_to_data( - target, data, pf_client, input_data_df, evaluation_name, _run_name=kwargs.get("_run_name") + target, data, pf_client, input_data_df, evaluation_name, **kwargs ) for evaluator_name, mapping in column_mapping.items(): @@ -738,7 +738,7 @@ def _evaluate( # pylint: disable=too-many-locals,too-many-statements def eval_batch_run( batch_run_client: TClient, *, data=Union[str, os.PathLike, pd.DataFrame] ) -> Dict[str, __EvaluatorInfo]: - with BatchRunContext(batch_run_client): + with EvalRunContext(batch_run_client): runs = { evaluator_name: batch_run_client.run( flow=evaluator, @@ -752,7 +752,7 @@ def eval_batch_run( for evaluator_name, evaluator in evaluators.items() } - # get_details needs to be called within BatchRunContext scope in order to have user agent populated + # get_details needs to be called within EvalRunContext scope in order to have user agent populated return { evaluator_name: { "result": batch_run_client.get_details(run, all_results=True), diff --git a/sdk/evaluation/azure-ai-evaluation/tests/unittests/test_batch_run_context.py b/sdk/evaluation/azure-ai-evaluation/tests/unittests/test_batch_run_context.py index 88113e3dd360..0f28d5738003 100644 --- a/sdk/evaluation/azure-ai-evaluation/tests/unittests/test_batch_run_context.py +++ b/sdk/evaluation/azure-ai-evaluation/tests/unittests/test_batch_run_context.py @@ -6,7 +6,7 @@ from azure.ai.evaluation._constants import PF_BATCH_TIMEOUT_SEC, PF_BATCH_TIMEOUT_SEC_DEFAULT from azure.ai.evaluation._user_agent import USER_AGENT -from azure.ai.evaluation._evaluate._batch_run_client import BatchRunContext, CodeClient, ProxyClient +from azure.ai.evaluation._evaluate._batch_run import EvalRunContext, CodeClient, ProxyClient @pytest.fixture @@ -20,7 +20,7 @@ def pf_client_mock(): @pytest.mark.unittest -class TestBatchRunContext: +class TestEvalRunContext: def test_with_codeclient(self, mocker, code_client_mock): mock_append_user_agent = mocker.patch( "promptflow._utils.user_agent_utils.ClientUserAgentUtil.append_user_agent" @@ -28,7 +28,7 @@ def test_with_codeclient(self, mocker, code_client_mock): mock_inject_openai_api = mocker.patch("promptflow.tracing._integrations._openai_injector.inject_openai_api") mock_recover_openai_api = mocker.patch("promptflow.tracing._integrations._openai_injector.recover_openai_api") - with BatchRunContext(code_client_mock): + with EvalRunContext(code_client_mock): # TODO: Failed to mock inject_openai_api and recover_openai_api for some reason. # Need to investigate further. # mock_inject_openai_api.assert_called_once() @@ -46,7 +46,7 @@ def test_with_pfclient(self, mocker, pf_client_mock): mock_inject_openai_api = mocker.patch("promptflow.tracing._integrations._openai_injector.inject_openai_api") mock_recover_openai_api = mocker.patch("promptflow.tracing._integrations._openai_injector.recover_openai_api") - with BatchRunContext(code_client_mock): + with EvalRunContext(code_client_mock): mock_append_user_agent.assert_not_called() mock_inject_openai_api.assert_not_called() pass @@ -57,11 +57,11 @@ def test_batch_timeout_default(self): before_timeout = os.environ.get(PF_BATCH_TIMEOUT_SEC) assert before_timeout is None - with BatchRunContext(ProxyClient(PFClient)): + with EvalRunContext(ProxyClient(PFClient)): during_timeout = int(os.environ.get(PF_BATCH_TIMEOUT_SEC)) assert during_timeout == PF_BATCH_TIMEOUT_SEC_DEFAULT - # Default timeout should be reset after exiting BatchRunContext + # Default timeout should be reset after exiting EvalRunContext after_timeout = os.environ.get(PF_BATCH_TIMEOUT_SEC) assert after_timeout is None @@ -69,10 +69,10 @@ def test_batch_timeout_custom(self): custom_timeout = 1000 os.environ[PF_BATCH_TIMEOUT_SEC] = str(custom_timeout) - with BatchRunContext(ProxyClient(PFClient)): + with EvalRunContext(ProxyClient(PFClient)): during_timeout = int(os.environ.get(PF_BATCH_TIMEOUT_SEC)) assert during_timeout == custom_timeout - # Custom timeouts should not be reset after exiting BatchRunContext + # Custom timeouts should not be reset after exiting EvalRunContext after_timeout = int(os.environ.get(PF_BATCH_TIMEOUT_SEC)) assert after_timeout == custom_timeout