Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@
from ._aoai.label_grader import AzureOpenAILabelGrader
from ._aoai.string_check_grader import AzureOpenAIStringCheckGrader
from ._aoai.text_similarity_grader import AzureOpenAITextSimilarityGrader
from ._aoai.score_model_grader import AzureOpenAIScoreModelGrader


_patch_all = []
Expand Down Expand Up @@ -99,6 +100,7 @@
"AzureOpenAILabelGrader",
"AzureOpenAIStringCheckGrader",
"AzureOpenAITextSimilarityGrader",
"AzureOpenAIScoreModelGrader",
]

__all__.extend([p for p in _patch_all if p not in __all__])
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,9 @@


from .aoai_grader import AzureOpenAIGrader
from .score_model_grader import AzureOpenAIScoreModelGrader

__all__ = [
"AzureOpenAIGrader",
"AzureOpenAIScoreModelGrader",
]
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ class AzureOpenAIGrader():

"""

id = "aoai://general"
id = "azureai://built-in/evaluators/azure-openai/custom_grader"

def __init__(self, *, model_config : Union[AzureOpenAIModelConfiguration, OpenAIModelConfiguration], grader_config: Dict[str, Any], **kwargs: Any):
self._model_config = model_config
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ class AzureOpenAILabelGrader(AzureOpenAIGrader):

"""

id = "aoai://label_model"
id = "azureai://built-in/evaluators/azure-openai/label_grader"

def __init__(
self,
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
# ---------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# ---------------------------------------------------------
from typing import Any, Dict, Union, List, Optional

from azure.ai.evaluation._model_configurations import (
AzureOpenAIModelConfiguration,
OpenAIModelConfiguration
)
from openai.types.graders import ScoreModelGrader
from azure.ai.evaluation._common._experimental import experimental

from .aoai_grader import AzureOpenAIGrader


@experimental
class AzureOpenAIScoreModelGrader(AzureOpenAIGrader):
"""
Wrapper class for OpenAI's score model graders.

Enables continuous scoring evaluation with custom prompts and flexible
conversation-style inputs. Supports configurable score ranges and
pass thresholds for binary classification.

Supplying a ScoreModelGrader to the `evaluate` method will cause an
asynchronous request to evaluate the grader via the OpenAI API. The
results of the evaluation will then be merged into the standard
evaluation results.

:param model_config: The model configuration to use for the grader.
:type model_config: Union[
~azure.ai.evaluation.AzureOpenAIModelConfiguration,
~azure.ai.evaluation.OpenAIModelConfiguration
]
:param input: The input messages for the grader. List of conversation
messages with role and content.
:type input: List[Dict[str, str]]
:param model: The model to use for the evaluation.
:type model: str
:param name: The name of the grader.
:type name: str
:param range: The range of the score. Defaults to [0, 1].
:type range: Optional[List[float]]
:param pass_threshold: Score threshold for pass/fail classification.
Defaults to midpoint of range.
:type pass_threshold: Optional[float]
:param sampling_params: The sampling parameters for the model.
:type sampling_params: Optional[Dict[str, Any]]
:param kwargs: Additional keyword arguments to pass to the grader.
:type kwargs: Any
"""

id = "azureai://built-in/evaluators/azure-openai/scorer_grader"

def __init__(
self,
*,
model_config: Union[
AzureOpenAIModelConfiguration, OpenAIModelConfiguration
],
input: List[Dict[str, str]],
model: str,
name: str,
range: Optional[List[float]] = None,
pass_threshold: Optional[float] = None,
sampling_params: Optional[Dict[str, Any]] = None,
**kwargs: Any
):
# Validate range and pass_threshold
if range is not None:
if len(range) != 2 or range[0] >= range[1]:
raise ValueError("range must be a list of two numbers [min, max] where min < max")
else:
range = [0.0, 1.0] # Default range

if pass_threshold is not None:
if range and (pass_threshold < range[0] or pass_threshold > range[1]):
raise ValueError(f"pass_threshold {pass_threshold} must be within range {range}")
else:
pass_threshold = (range[0] + range[1]) / 2 # Default to midpoint

# Store pass_threshold as instance attribute
self.pass_threshold = pass_threshold

# Create OpenAI ScoreModelGrader instance
grader_kwargs = {
"input": input,
"model": model,
"name": name,
"type": "score_model"
}

if range is not None:
grader_kwargs["range"] = range
if sampling_params is not None:
grader_kwargs["sampling_params"] = sampling_params

grader = ScoreModelGrader(**grader_kwargs)

super().__init__(
model_config=model_config,
grader_config=grader,
**kwargs
)
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ class AzureOpenAIStringCheckGrader(AzureOpenAIGrader):

"""

id = "aoai://string_check"
id = "azureai://built-in/evaluators/azure-openai/string_check_grader"

def __init__(
self,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ class AzureOpenAITextSimilarityGrader(AzureOpenAIGrader):

"""

id = "aoai://text_similarity"
id = "azureai://built-in/evaluators/azure-openai/text_similarity_grader"

def __init__(
self,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -316,12 +316,14 @@ def _get_grader_class(model_id: str) -> Type[AzureOpenAIGrader]:
AzureOpenAILabelGrader,
AzureOpenAIStringCheckGrader,
AzureOpenAITextSimilarityGrader,
AzureOpenAIScoreModelGrader,
)
id_map = {
AzureOpenAIGrader.id: AzureOpenAIGrader,
AzureOpenAILabelGrader.id: AzureOpenAILabelGrader,
AzureOpenAIStringCheckGrader.id: AzureOpenAIStringCheckGrader,
AzureOpenAITextSimilarityGrader.id: AzureOpenAITextSimilarityGrader,
AzureOpenAIScoreModelGrader.id: AzureOpenAIScoreModelGrader,
}

for key in id_map.keys():
Expand Down
Loading