Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -1052,6 +1052,10 @@ def _evaluate( # pylint: disable=too-many-locals,too-many-statements

result_df_dict = results_df.to_dict("records")
result: EvaluationResult = {"rows": result_df_dict, "metrics": metrics, "studio_url": studio_url} # type: ignore
if eval_run_info_list:
result["oai_eval_run_ids"] = [
{"eval_group_id": info["eval_group_id"], "eval_run_id": info["eval_run_id"]} for info in eval_run_info_list
]
# _add_aoai_structured_results_to_results(result, LOGGER, kwargs.get("eval_meta_data"))

eval_id: Optional[str] = kwargs.get("_eval_id")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,7 @@ class Conversation(TypedDict):

class EvaluationResult(TypedDict):
metrics: Dict
oai_eval_run_ids: NotRequired[List[Dict[str, str]]]
studio_url: NotRequired[str]
rows: List[Dict]
_evaluation_results_list: List[Dict]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ def get_strategy_name(attack_strategy: Union[AttackStrategy, List[AttackStrategy


def get_flattened_attack_strategies(
attack_strategies: List[Union[AttackStrategy, List[AttackStrategy]]]
attack_strategies: List[Union[AttackStrategy, List[AttackStrategy]]],
) -> List[Union[AttackStrategy, List[AttackStrategy]]]:
"""Flatten complex attack strategies into individual strategies.

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -426,7 +426,7 @@ def _parse_prompty_response(self, *, response: str) -> Dict[str, Any]:
try:
if isinstance(response, str):
response = response.replace("\u2019", "'").replace("\u2018", "'")
response = response.replace("\u201C", '"').replace("\u201D", '"')
response = response.replace("\u201c", '"').replace("\u201d", '"')

# Replace None with null
response = response.replace("None", "null")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
- AZURE_AI_PROJECT_ENDPOINT
2. Hub-based project (legacy):
- AZURE_SUBSCRIPTION_ID
- AZURE_RESOURCE_GROUP_NAME
- AZURE_RESOURCE_GROUP_NAME
- AZURE_PROJECT_NAME
"""

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
"""
DESCRIPTION:
These samples demonstrate usage of _SafetyEvaluation class with various _SafetyEvaluator instances.

USAGE:
python evaluation_samples_safety_evaluation.py

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
"""
DESCRIPTION:
These samples demonstrate usage of various classes and methods used to perform simulation in the azure-ai-evaluation library.

USAGE:
python evaluation_samples_simulate.py

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
"""
DESCRIPTION:
These samples demonstrate usage of various classes and methods used to perform evaluation with thresholds in the azure-ai-evaluation library.

USAGE:
python evaluation_samples_threshold.py

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
DESCRIPTION:
These samples demonstrate usage of various classes and methods used in Red Team
functionality within the azure-ai-evaluation library.

USAGE:
python red_team_samples.py

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
- AZURE_AI_PROJECT_ENDPOINT
2. Hub-based project (legacy):
- AZURE_SUBSCRIPTION_ID
- AZURE_RESOURCE_GROUP_NAME
- AZURE_RESOURCE_GROUP_NAME
- AZURE_PROJECT_NAME
"""

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
- AZURE_AI_PROJECT_ENDPOINT
2. Hub-based project (legacy):
- AZURE_SUBSCRIPTION_ID
- AZURE_RESOURCE_GROUP_NAME
- AZURE_RESOURCE_GROUP_NAME
- AZURE_PROJECT_NAME
"""

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
- AZURE_AI_PROJECT_ENDPOINT
2. Hub-based project (legacy):
- AZURE_SUBSCRIPTION_ID
- AZURE_RESOURCE_GROUP_NAME
- AZURE_RESOURCE_GROUP_NAME
- AZURE_PROJECT_NAME
"""

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1345,6 +1345,62 @@ def run_test():
assert len(empty_converted["_evaluation_results_list"]) == 0
assert empty_converted["_evaluation_summary"]["result_counts"]["total"] == 0

@patch(
"azure.ai.evaluation._evaluate._evaluate._map_names_to_builtins",
return_value={},
)
@patch("azure.ai.evaluation._evaluate._evaluate._get_evaluation_run_results")
@patch("azure.ai.evaluation._evaluate._evaluate._begin_aoai_evaluation")
@patch("azure.ai.evaluation._evaluate._evaluate._preprocess_data")
def test_evaluate_returns_oai_eval_run_ids(
self,
mock_preprocess,
mock_begin,
mock_get_results,
_,
mock_model_config,
):
df = pd.DataFrame([{"query": "hi"}])
grader = AzureOpenAILabelGrader(
model_config=mock_model_config,
input=[{"content": "{{item.query}}", "role": "user"}],
labels=["positive", "negative", "neutral"],
passing_labels=["neutral"],
model="gpt-4o-2024-11-20",
name="labelgrader",
)
mock_preprocess.return_value = {
"column_mapping": {},
"evaluators": {},
"graders": {"g": grader},
"input_data_df": df,
"target_run": None,
"batch_run_client": None,
"batch_run_data": None,
}
mock_begin.return_value = [
{
"client": None,
"eval_group_id": "grp1",
"eval_run_id": "run1",
"grader_name_map": {},
"expected_rows": len(df),
}
]
mock_get_results.return_value = (
pd.DataFrame([{"outputs.g.score": 1}]),
{"g.pass_rate": 1.0},
)

result = evaluate(
evaluators={"g": grader},
data="dummy_path",
azure_ai_project=None,
)

assert "oai_eval_run_ids" in result
assert result["oai_eval_run_ids"] == [{"eval_group_id": "grp1", "eval_run_id": "run1"}]


@pytest.mark.unittest
class TestTagsInLoggingFunctions:
Expand Down
Loading