Skip to content

Commit 3d66029

Browse files
committed
fix dataset
1 parent a9dc47f commit 3d66029

File tree

2 files changed

+48
-41
lines changed

2 files changed

+48
-41
lines changed

b.py

+40-39
Original file line numberDiff line numberDiff line change
@@ -13,11 +13,12 @@
1313
from deepeval.test_case.llm_test_case import LLMTestCaseParams
1414

1515
test_case = ConversationalTestCase(
16+
chatbot_role="A programmer",
1617
turns=[
1718
LLMTestCase(
1819
input="Message input", actual_output="Message actual output"
1920
)
20-
]
21+
],
2122
)
2223
test_case2 = ConversationalTestCase(
2324
turns=[
@@ -36,46 +37,46 @@
3637

3738
from deepeval.metrics import GEval
3839

39-
correctness_metric = GEval(
40-
name="Correctness",
41-
criteria="Determine whether the actual output is factually correct based on the expected output.",
42-
# NOTE: you can only provide either criteria or evaluation_steps, and not both
43-
evaluation_steps=[
44-
"Check whether the facts in 'actual output' contradicts any facts in 'expected output'",
45-
"You should also heavily penalize omission of detail",
46-
"Vague language, or contradicting OPINIONS, are OK",
47-
],
48-
evaluation_params=[
49-
LLMTestCaseParams.INPUT,
50-
LLMTestCaseParams.ACTUAL_OUTPUT,
51-
],
52-
)
40+
# correctness_metric = GEval(
41+
# name="Correctness",
42+
# criteria="Determine whether the actual output is factually correct based on the expected output.",
43+
# # NOTE: you can only provide either criteria or evaluation_steps, and not both
44+
# evaluation_steps=[
45+
# "Check whether the facts in 'actual output' contradicts any facts in 'expected output'",
46+
# "You should also heavily penalize omission of detail",
47+
# "Vague language, or contradicting OPINIONS, are OK",
48+
# ],
49+
# evaluation_params=[
50+
# LLMTestCaseParams.INPUT,
51+
# LLMTestCaseParams.ACTUAL_OUTPUT,
52+
# ],
53+
# )
5354

54-
evaluate(
55-
test_cases=[
56-
LLMTestCase(
57-
input="Message input number 1!",
58-
actual_output="Message actual output number 1...",
59-
retrieval_context=["I love dogs"],
60-
),
61-
LLMTestCase(
62-
input="Message input 2, this is just a test",
63-
actual_output="Message actual output 2, this is just a test",
64-
retrieval_context=["I love dogs"],
65-
),
66-
],
67-
metrics=[
68-
# correctness_metric,
69-
# AnswerRelevancyMetric(),
70-
# BiasMetric(),
71-
SummarizationMetric(verbose_mode=True, truths_extraction_limit=3),
72-
FaithfulnessMetric(verbose_mode=True, truths_extraction_limit=3),
73-
],
74-
# throttle_value=10,
75-
# max_concurrent=1,
76-
)
55+
# evaluate(
56+
# test_cases=[
57+
# LLMTestCase(
58+
# input="Message input number 1!",
59+
# actual_output="Message actual output number 1...",
60+
# retrieval_context=["I love dogs"],
61+
# ),
62+
# LLMTestCase(
63+
# input="Message input 2, this is just a test",
64+
# actual_output="Message actual output 2, this is just a test",
65+
# retrieval_context=["I love dogs"],
66+
# ),
67+
# ],
68+
# metrics=[
69+
# # correctness_metric,
70+
# # AnswerRelevancyMetric(),
71+
# # BiasMetric(),
72+
# SummarizationMetric(verbose_mode=True, truths_extraction_limit=3),
73+
# FaithfulnessMetric(verbose_mode=True, truths_extraction_limit=3),
74+
# ],
75+
# # throttle_value=10,
76+
# # max_concurrent=1,
77+
# )
7778

78-
# confident_evaluate(experiment_name="Convo", test_cases=[test_case])
79+
confident_evaluate(experiment_name="Convo", test_cases=[test_case])
7980

8081

8182
# evaluate(

deepeval/dataset/dataset.py

+8-2
Original file line numberDiff line numberDiff line change
@@ -544,14 +544,20 @@ def add_goldens_from_json_file(
544544
)
545545
)
546546

547-
def push(self, alias: str, overwrite: Optional[bool] = None):
547+
def push(
548+
self,
549+
alias: str,
550+
overwrite: Optional[bool] = None,
551+
auto_convert_test_cases_to_goldens: bool = False,
552+
):
548553
if len(self.test_cases) == 0 and len(self.goldens) == 0:
549554
raise ValueError(
550555
"Unable to push empty dataset to Confident AI, there must be at least one test case or golden in dataset"
551556
)
552557
if is_confident():
553558
goldens = self.goldens
554-
goldens.extend(convert_test_cases_to_goldens(self.test_cases))
559+
if auto_convert_test_cases_to_goldens:
560+
goldens.extend(convert_test_cases_to_goldens(self.test_cases))
555561
api_dataset = APIDataset(
556562
alias=alias,
557563
overwrite=overwrite,

0 commit comments

Comments
 (0)