Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
980e2fa
add eval result converter
YoYoJa Oct 3, 2025
57c73b8
Add result converter
YoYoJa Oct 6, 2025
1730b17
update converter params to optional
YoYoJa Oct 6, 2025
3bf93f7
add eval meta data
YoYoJa Oct 7, 2025
e47639d
merge converter change to include eval_meta_data
YoYoJa Oct 7, 2025
5b198b4
fix type
YoYoJa Oct 8, 2025
5fbbabe
remove useless file
YoYoJa Oct 8, 2025
6ca31a1
get eval meta data as input
YoYoJa Oct 8, 2025
ea93d1a
fix build errors
YoYoJa Oct 8, 2025
e6a9caa
remove useless import
YoYoJa Oct 8, 2025
f24f0e0
resolve comments
YoYoJa Oct 8, 2025
0abddb0
update
YoYoJa Oct 8, 2025
518b4af
update comments
YoYoJa Oct 8, 2025
74dfddc
merge otel
YoYoJa Oct 8, 2025
15d881e
merge result converter
YoYoJa Oct 8, 2025
5c44f70
fix checker failure
YoYoJa Oct 9, 2025
8d2eb5e
Groundedness Evaluator to not add tool result to tool call message (#…
singankit Oct 9, 2025
417277d
Add ledger certificate package (#43278)
catalinaperalta Oct 9, 2025
48f4b19
[Identity] Update test-resources bicep (#43304)
pvaneck Oct 9, 2025
92274fb
[Communication Shared] Adding the mypy fixes (#42925)
v-dharmarajv Oct 9, 2025
2ce023e
add error msg and error code
YoYoJa Oct 9, 2025
654e28f
merge main
YoYoJa Oct 9, 2025
32aad08
Surface evaluator error msg
YoYoJa Oct 10, 2025
d1449f5
surface out error
YoYoJa Oct 10, 2025
47d20e3
update
YoYoJa Oct 10, 2025
5cee7e4
update UT
YoYoJa Oct 10, 2025
9256912
fix usage
YoYoJa Oct 10, 2025
0ff811b
fix usage
YoYoJa Oct 10, 2025
18b0db2
resolve conflict
YoYoJa Oct 10, 2025
36b0761
merge type updat
YoYoJa Oct 10, 2025
59b0aab
make eval_meta_data optional
YoYoJa Oct 12, 2025
d4d768c
remove useless lines
YoYoJa Oct 12, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,80 @@ class TokenScope(str, enum.Enum):
COGNITIVE_SERVICES_MANAGEMENT = "https://ai.azure.com/.default"
AZURE_ML = "https://ml.azure.com/.default"

class _EvaluatorMetricMapping:
"""
Static mapping of evaluator names to their metric names, based on assets.json.
The 'builtin.' prefix is removed from the evaluator name keys.
"""
EVALUATOR_NAME_METRICS_MAPPINGS = {
"bleu_score": ["bleu"],
"coherence": ["coherence"],
"document_retrieval": [
"ndcg@3", "xdcg@3", "fidelity", "top1_relevance", "top3_max_relevance",
"holes", "holes_ratio", "total_retrieved_documents", "total_ground_truth_documents"
],
"f1_score": ["f1_score"],
"fluency": ["fluency"],
"gleu_score": ["gleu"],
"meteor_score": ["meteor"],
"relevance": ["relevance"],
"response_completeness": ["response_completeness"],
"rouge_score": ["rouge_precision", "rouge_recall", "rouge_f1_score"],
"groundedness_pro": ["groundedness_pro"],
"similarity": ["similarity"],
"intent_resolution": ["intent_resolution"],
"retrieval": ["retrieval"],
"task_adherence": ["task_adherence"],
"tool_call_accuracy": ["tool_call_accuracy"],
"groundedness": ["groundedness"],
"code_vulnerability": ["code_vulnerability"],
"eci": ["eci"],
"protected_material": ["protected_material"],
"ungrounded_attributes": ["ungrounded_attributes"],
"indirect_attack": [
"xpia", "xpia_manipulated_content", "xpia_intrusion", "xpia_information_gathering"
],
"label_grader": ["label_model"],
"string_check_grader": ["string_check"],
"text_similarity_grader": ["similarity"],
"score_model_grader": ["score_model"],
"sexual": ["sexual"],
"self_harm": ["self_harm"],
"violence": ["violence"],
"hate_unfairness": ["hate_unfairness"],
}

EVAL_CLASS_NAME_MAP = {
"BleuScoreEvaluator": "bleu_score",
"CodeVulnerabilityEvaluator": "code_vulnerability",
"CoherenceEvaluator": "coherence",
"ContentSafetyEvaluator": "content_safety",
"DocumentRetrievalEvaluator": "document_retrieval",
"ECIEvaluator": "eci",
"F1ScoreEvaluator": "f1_score",
"FluencyEvaluator": "fluency",
"GleuScoreEvaluator": "gleu_score",
"GroundednessEvaluator": "groundedness",
"GroundednessProEvaluator": "groundedness_pro",
"HateUnfairnessEvaluator": "hate_unfairness",
"IndirectAttackEvaluator": "indirect_attack",
"IntentResolutionEvaluator": "intent_resolution",
"MeteorScoreEvaluator": "meteor_score",
"ProtectedMaterialEvaluator": "protected_material",
"QAEvaluator": "qa",
"RelevanceEvaluator": "relevance",
"ResponseCompletenessEvaluator": "response_completeness",
"RetrievalEvaluator": "retrieval",
"RougeScoreEvaluator": "rouge_score",
"SelfHarmEvaluator": "self_harm",
"SexualEvaluator": "sexual",
"SimilarityEvaluator": "similarity",
"TaskAdherenceEvaluator": "task_adherence",
"TaskCompletionEvaluator": "task_completion",
"ToolCallAccuracyEvaluator": "tool_call_accuracy",
"UngroundedAttributesEvaluator": "ungrounded_attributes",
"ViolenceEvaluator": "violence",
}

DEFAULT_EVALUATION_RESULTS_FILE_NAME = "evaluation_results.json"

Expand Down
Loading