Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions sdk/evaluation/azure-ai-evaluation/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@

### Bugs Fixed
- Non adversarial simulator works with `gpt-4o` models using the `json_schema` response format
- Fixed an issue where the `evaluate` API would fail with "[WinError 32] The process cannot access the file because it is being used by another process" when venv folder and target function file are in the same directory.
- Fix evaluate API failure when `trace.destination` is set to `none`

### Other Changes
Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
# ---------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# ---------------------------------------------------------
from .batch_run_context import BatchRunContext
from .eval_run_context import EvalRunContext
from .code_client import CodeClient
from .proxy_client import ProxyClient
from .target_run_context import TargetRunContext

__all__ = ["CodeClient", "ProxyClient", "BatchRunContext"]
__all__ = ["CodeClient", "ProxyClient", "EvalRunContext", "TargetRunContext"]
Original file line number Diff line number Diff line change
Expand Up @@ -22,13 +22,13 @@
from .proxy_client import ProxyClient


class BatchRunContext:
"""Context manager for batch run clients.
class EvalRunContext:
"""Context manager for eval batch run.

:param client: The client to run in the context.
:type client: Union[
~azure.ai.evaluation._evaluate._batch_run_client.code_client.CodeClient,
~azure.ai.evaluation._evaluate._batch_run_client.proxy_client.ProxyClient
~azure.ai.evaluation._evaluate._batch_run.code_client.CodeClient,
~azure.ai.evaluation._evaluate._batch_run.proxy_client.ProxyClient
]
"""

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
# ---------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# ---------------------------------------------------------
import os
import types
from typing import Optional, Type

from promptflow._sdk._constants import PF_FLOW_ENTRY_IN_TMP


class TargetRunContext:
"""Context manager for target batch run.

:param upload_snapshot: Whether to upload target snapshot.
:type upload_snapshot: bool
"""

def __init__(self, upload_snapshot: bool) -> None:
self._upload_snapshot = upload_snapshot

def __enter__(self) -> None:
# Address "[WinError 32] The process cannot access the file" error,
# caused by conflicts when the venv and target function are in the same directory.
# Setting PF_FLOW_ENTRY_IN_TMP to true uploads only the flex entry file (flow.flex.yaml).
if not self._upload_snapshot:
os.environ[PF_FLOW_ENTRY_IN_TMP] = "true"

def __exit__(
self,
exc_type: Optional[Type[BaseException]],
exc_value: Optional[BaseException],
exc_tb: Optional[types.TracebackType],
) -> None:
if not self._upload_snapshot:
os.environ.pop(PF_FLOW_ENTRY_IN_TMP, None)
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
)
from .._model_configurations import AzureAIProject, EvaluationResult, EvaluatorConfig
from .._user_agent import USER_AGENT
from ._batch_run_client import BatchRunContext, CodeClient, ProxyClient
from ._batch_run import EvalRunContext, CodeClient, ProxyClient, TargetRunContext
from ._utils import (
_apply_column_mapping,
_log_metrics_and_instance_results,
Expand Down Expand Up @@ -395,7 +395,7 @@ def _apply_target_to_data(
pf_client: PFClient,
initial_data: pd.DataFrame,
evaluation_name: Optional[str] = None,
_run_name: Optional[str] = None,
**kwargs,
) -> Tuple[pd.DataFrame, Set[str], Run]:
"""
Apply the target function to the data set and return updated data and generated columns.
Expand All @@ -410,22 +410,22 @@ def _apply_target_to_data(
:type initial_data: pd.DataFrame
:param evaluation_name: The name of the evaluation.
:type evaluation_name: Optional[str]
:param _run_name: The name of target run. Used for testing only.
:type _run_name: Optional[str]
:return: The tuple, containing data frame and the list of added columns.
:rtype: Tuple[pandas.DataFrame, List[str]]
"""
# We are manually creating the temporary directory for the flow
# because the way tempdir remove temporary directories will
# hang the debugger, because promptflow will keep flow directory.
run: Run = pf_client.run(
flow=target,
display_name=evaluation_name,
data=data,
properties={EvaluationRunProperties.RUN_TYPE: "eval_run", "isEvaluatorRun": "true"},
stream=True,
name=_run_name,
)
_run_name = kwargs.get("_run_name")
upload_target_snaphot = kwargs.get("_upload_target_snapshot", False)

with TargetRunContext(upload_target_snaphot):
run: Run = pf_client.run(
flow=target,
display_name=evaluation_name,
data=data,
properties={EvaluationRunProperties.RUN_TYPE: "eval_run", "isEvaluatorRun": "true"},
stream=True,
name=_run_name,
)

target_output: pd.DataFrame = pf_client.runs.get_details(run, all_results=True)
# Remove input and output prefix
generated_columns = {
Expand Down Expand Up @@ -706,7 +706,7 @@ def _evaluate( # pylint: disable=too-many-locals,too-many-statements
target_generated_columns: Set[str] = set()
if data is not None and target is not None:
input_data_df, target_generated_columns, target_run = _apply_target_to_data(
target, data, pf_client, input_data_df, evaluation_name, _run_name=kwargs.get("_run_name")
target, data, pf_client, input_data_df, evaluation_name, **kwargs
)

for evaluator_name, mapping in column_mapping.items():
Expand Down Expand Up @@ -738,7 +738,7 @@ def _evaluate( # pylint: disable=too-many-locals,too-many-statements
def eval_batch_run(
batch_run_client: TClient, *, data=Union[str, os.PathLike, pd.DataFrame]
) -> Dict[str, __EvaluatorInfo]:
with BatchRunContext(batch_run_client):
with EvalRunContext(batch_run_client):
runs = {
evaluator_name: batch_run_client.run(
flow=evaluator,
Expand All @@ -752,7 +752,7 @@ def eval_batch_run(
for evaluator_name, evaluator in evaluators.items()
}

# get_details needs to be called within BatchRunContext scope in order to have user agent populated
# get_details needs to be called within EvalRunContext scope in order to have user agent populated
return {
evaluator_name: {
"result": batch_run_client.get_details(run, all_results=True),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

from azure.ai.evaluation._constants import PF_BATCH_TIMEOUT_SEC, PF_BATCH_TIMEOUT_SEC_DEFAULT
from azure.ai.evaluation._user_agent import USER_AGENT
from azure.ai.evaluation._evaluate._batch_run_client import BatchRunContext, CodeClient, ProxyClient
from azure.ai.evaluation._evaluate._batch_run import EvalRunContext, CodeClient, ProxyClient


@pytest.fixture
Expand All @@ -20,15 +20,15 @@ def pf_client_mock():


@pytest.mark.unittest
class TestBatchRunContext:
class TestEvalRunContext:
def test_with_codeclient(self, mocker, code_client_mock):
mock_append_user_agent = mocker.patch(
"promptflow._utils.user_agent_utils.ClientUserAgentUtil.append_user_agent"
)
mock_inject_openai_api = mocker.patch("promptflow.tracing._integrations._openai_injector.inject_openai_api")
mock_recover_openai_api = mocker.patch("promptflow.tracing._integrations._openai_injector.recover_openai_api")

with BatchRunContext(code_client_mock):
with EvalRunContext(code_client_mock):
# TODO: Failed to mock inject_openai_api and recover_openai_api for some reason.
# Need to investigate further.
# mock_inject_openai_api.assert_called_once()
Expand All @@ -46,7 +46,7 @@ def test_with_pfclient(self, mocker, pf_client_mock):
mock_inject_openai_api = mocker.patch("promptflow.tracing._integrations._openai_injector.inject_openai_api")
mock_recover_openai_api = mocker.patch("promptflow.tracing._integrations._openai_injector.recover_openai_api")

with BatchRunContext(code_client_mock):
with EvalRunContext(code_client_mock):
mock_append_user_agent.assert_not_called()
mock_inject_openai_api.assert_not_called()
pass
Expand All @@ -57,22 +57,22 @@ def test_batch_timeout_default(self):
before_timeout = os.environ.get(PF_BATCH_TIMEOUT_SEC)
assert before_timeout is None

with BatchRunContext(ProxyClient(PFClient)):
with EvalRunContext(ProxyClient(PFClient)):
during_timeout = int(os.environ.get(PF_BATCH_TIMEOUT_SEC))
assert during_timeout == PF_BATCH_TIMEOUT_SEC_DEFAULT

# Default timeout should be reset after exiting BatchRunContext
# Default timeout should be reset after exiting EvalRunContext
after_timeout = os.environ.get(PF_BATCH_TIMEOUT_SEC)
assert after_timeout is None

def test_batch_timeout_custom(self):
custom_timeout = 1000
os.environ[PF_BATCH_TIMEOUT_SEC] = str(custom_timeout)

with BatchRunContext(ProxyClient(PFClient)):
with EvalRunContext(ProxyClient(PFClient)):
during_timeout = int(os.environ.get(PF_BATCH_TIMEOUT_SEC))
assert during_timeout == custom_timeout

# Custom timeouts should not be reset after exiting BatchRunContext
# Custom timeouts should not be reset after exiting EvalRunContext
after_timeout = int(os.environ.get(PF_BATCH_TIMEOUT_SEC))
assert after_timeout == custom_timeout