Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions ddtrace/llmobs/_llmobs.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,7 @@
from ddtrace.llmobs._experiment import Dataset
from ddtrace.llmobs._experiment import DatasetRecord
from ddtrace.llmobs._experiment import DatasetRecordInputType
from ddtrace.llmobs._experiment import DatasetRecordRaw
from ddtrace.llmobs._experiment import Experiment
from ddtrace.llmobs._experiment import ExperimentConfigType
from ddtrace.llmobs._experiment import JSONType
Expand Down Expand Up @@ -819,7 +820,7 @@ def create_dataset(
dataset_name: str,
project_name: Optional[str] = None,
description: str = "",
records: Optional[List[DatasetRecord]] = None,
records: Optional[List[DatasetRecordRaw]] = None,
) -> Dataset:
if records is None:
records = []
Expand Down Expand Up @@ -880,11 +881,10 @@ def create_dataset_from_csv(

for row in rows:
records.append(
DatasetRecord(
DatasetRecordRaw(
input_data={col: row[col] for col in input_data_columns},
expected_output={col: row[col] for col in expected_output_columns},
metadata={col: row[col] for col in metadata_columns},
record_id="",
)
)

Expand Down
64 changes: 32 additions & 32 deletions tests/llmobs/test_experiments.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@

import ddtrace
from ddtrace.llmobs._experiment import Dataset
from ddtrace.llmobs._experiment import DatasetRecord
from ddtrace.llmobs._experiment import DatasetRecordRaw
from ddtrace.llmobs._experiment import _ExperimentRunInfo
from tests.utils import override_global_config

Expand Down Expand Up @@ -78,7 +78,7 @@ def run_info_with_stable_id(iteration: int, run_id: Optional[str] = None) -> _Ex


@pytest.fixture
def test_dataset_records() -> List[DatasetRecord]:
def test_dataset_records() -> List[DatasetRecordRaw]:
return []


Expand Down Expand Up @@ -108,7 +108,7 @@ def test_dataset(llmobs, test_dataset_records, test_dataset_name) -> Generator[D
@pytest.fixture
def test_dataset_one_record(llmobs):
records = [
DatasetRecord(
DatasetRecordRaw(
input_data={"prompt": "What is the capital of France?"},
expected_output={"answer": "Paris"},
)
Expand All @@ -124,7 +124,7 @@ def test_dataset_one_record(llmobs):
@pytest.fixture
def test_dataset_one_record_w_metadata(llmobs):
records = [
DatasetRecord(
DatasetRecordRaw(
input_data={"prompt": "What is the capital of France?"},
expected_output={"answer": "Paris"},
metadata={"difficulty": "easy"},
Expand All @@ -141,7 +141,7 @@ def test_dataset_one_record_w_metadata(llmobs):
@pytest.fixture
def test_dataset_one_record_separate_project(llmobs):
records = [
DatasetRecord(
DatasetRecordRaw(
input_data={"prompt": "What is the capital of Massachusetts?"},
expected_output={"answer": "Boston"},
)
Expand Down Expand Up @@ -528,7 +528,7 @@ def test_dataset_pull_exists_with_record(llmobs, test_dataset_one_record):
"test_dataset_records",
[
[
DatasetRecord(
DatasetRecordRaw(
input_data={"prompt": "What is the capital of France?"},
expected_output={"answer": "Paris"},
)
Expand Down Expand Up @@ -576,7 +576,7 @@ def test_dataset_pull_w_versions(llmobs, test_dataset, test_dataset_records):
"test_dataset_records",
[
[
DatasetRecord(
DatasetRecordRaw(
input_data={"prompt": "What is the capital of France?"},
expected_output={"answer": "Paris"},
)
Expand Down Expand Up @@ -611,11 +611,11 @@ def test_dataset_pull_from_project(llmobs, test_dataset_one_record_separate_proj
"test_dataset_records",
[
[
DatasetRecord(
DatasetRecordRaw(
input_data={"prompt": "What is the capital of France?"},
expected_output={"answer": "Paris"},
),
DatasetRecord(
DatasetRecordRaw(
input_data={"prompt": "What is the capital of China?"},
expected_output={"answer": "Beijing"},
),
Expand All @@ -628,7 +628,7 @@ def test_dataset_modify_records_multiple_times(llmobs, test_dataset, test_datase

test_dataset.update(
0,
DatasetRecord(input_data={"prompt": "What is the capital of Germany?"}),
DatasetRecordRaw(input_data={"prompt": "What is the capital of Germany?"}),
)

assert test_dataset[0]["input_data"] == {"prompt": "What is the capital of Germany?"}
Expand Down Expand Up @@ -706,7 +706,7 @@ def test_dataset_modify_records_multiple_times(llmobs, test_dataset, test_datase
"test_dataset_records",
[
[
DatasetRecord(
DatasetRecordRaw(
input_data={"prompt": "What is the capital of France?"},
expected_output={"answer": "Paris"},
)
Expand All @@ -719,7 +719,7 @@ def test_dataset_modify_single_record(llmobs, test_dataset, test_dataset_records

test_dataset.update(
0,
DatasetRecord(
DatasetRecordRaw(
input_data={"prompt": "What is the capital of Germany?"},
expected_output={"answer": "Berlin"},
),
Expand Down Expand Up @@ -756,7 +756,7 @@ def test_dataset_modify_single_record(llmobs, test_dataset, test_dataset_records
"test_dataset_records",
[
[
DatasetRecord(
DatasetRecordRaw(
input_data={"prompt": "What is the capital of France?"},
expected_output={"answer": "Paris"},
)
Expand Down Expand Up @@ -789,7 +789,7 @@ def test_dataset_estimate_size(llmobs, test_dataset):
"test_dataset_records",
[
[
DatasetRecord(
DatasetRecordRaw(
input_data={"prompt": "What is the capital of France?"},
expected_output={"answer": "Paris"},
)
Expand Down Expand Up @@ -833,7 +833,7 @@ def test_dataset_modify_record_on_optional(llmobs, test_dataset, test_dataset_re
"test_dataset_records",
[
[
DatasetRecord(
DatasetRecordRaw(
input_data={"prompt": "What is the capital of France?"},
expected_output={"answer": "Paris"},
metadata={"difficulty": "easy"},
Expand Down Expand Up @@ -879,7 +879,7 @@ def test_dataset_modify_record_on_input(llmobs, test_dataset, test_dataset_recor
"test_dataset_records",
[
[
DatasetRecord(
DatasetRecordRaw(
input_data={"prompt": "What is the capital of France?"},
expected_output={"answer": "Paris"},
)
Expand All @@ -888,7 +888,7 @@ def test_dataset_modify_record_on_input(llmobs, test_dataset, test_dataset_recor
)
def test_dataset_append(llmobs, test_dataset):
test_dataset.append(
DatasetRecord(
DatasetRecordRaw(
input_data={"prompt": "What is the capital of Italy?"},
expected_output={"answer": "Rome"},
)
Expand Down Expand Up @@ -926,7 +926,7 @@ def test_dataset_append(llmobs, test_dataset):
"test_dataset_records",
[
[
DatasetRecord(
DatasetRecordRaw(
input_data={"prompt": "What is the capital of France?"},
expected_output={"answer": "Paris"},
)
Expand All @@ -936,11 +936,11 @@ def test_dataset_append(llmobs, test_dataset):
def test_dataset_extend(llmobs, test_dataset):
test_dataset.extend(
[
DatasetRecord(
DatasetRecordRaw(
input_data={"prompt": "What is the capital of Italy?"},
expected_output={"answer": "Rome"},
),
DatasetRecord(
DatasetRecordRaw(
input_data={"prompt": "What is the capital of Sweden?"},
expected_output={"answer": "Stockholm"},
),
Expand Down Expand Up @@ -982,15 +982,15 @@ def test_dataset_extend(llmobs, test_dataset):
"test_dataset_records",
[
[
DatasetRecord(
DatasetRecordRaw(
input_data={"prompt": "What is the capital of France?"},
expected_output={"answer": "Paris"},
)
]
],
)
def test_dataset_append_no_expected_output(llmobs, test_dataset):
test_dataset.append(DatasetRecord(input_data={"prompt": "What is the capital of Sealand?"}))
test_dataset.append(DatasetRecordRaw(input_data={"prompt": "What is the capital of Sealand?"}))
assert len(test_dataset) == 2
assert test_dataset.latest_version == 1
assert test_dataset.version == 1
Expand Down Expand Up @@ -1026,11 +1026,11 @@ def test_dataset_append_no_expected_output(llmobs, test_dataset):
"test_dataset_records",
[
[
DatasetRecord(
DatasetRecordRaw(
input_data={"prompt": "What is the capital of France?"},
expected_output={"answer": "Paris"},
),
DatasetRecord(
DatasetRecordRaw(
input_data={"prompt": "What is the capital of Italy?"},
expected_output={"answer": "Rome"},
),
Expand Down Expand Up @@ -1067,8 +1067,8 @@ def test_dataset_delete(llmobs, test_dataset):
"test_dataset_records",
[
[
DatasetRecord(input_data={"prompt": "What is the capital of Nauru?"}),
DatasetRecord(input_data={"prompt": "What is the capital of Sealand?"}),
DatasetRecordRaw(input_data={"prompt": "What is the capital of Nauru?"}),
DatasetRecordRaw(input_data={"prompt": "What is the capital of Sealand?"}),
],
],
)
Expand Down Expand Up @@ -1102,11 +1102,11 @@ def test_dataset_delete_no_expected_output(llmobs, test_dataset):
"test_dataset_records",
[
[
DatasetRecord(
DatasetRecordRaw(
input_data={"prompt": "What is the capital of France?"},
expected_output={"answer": "Paris"},
),
DatasetRecord(
DatasetRecordRaw(
input_data={"prompt": "What is the capital of Italy?"},
expected_output={"answer": "Rome"},
),
Expand Down Expand Up @@ -1149,11 +1149,11 @@ def test_dataset_delete_after_update(llmobs, test_dataset):
"test_dataset_records",
[
[
DatasetRecord(
DatasetRecordRaw(
input_data={"prompt": "What is the capital of France?"},
expected_output={"answer": "Paris"},
),
DatasetRecord(
DatasetRecordRaw(
input_data={"prompt": "What is the capital of Italy?"},
expected_output={"answer": "Rome"},
),
Expand Down Expand Up @@ -1390,11 +1390,11 @@ def test_experiment_create(llmobs, test_dataset_one_record):
"test_dataset_records",
[
[
DatasetRecord(
DatasetRecordRaw(
input_data={"prompt": "What is the capital of France?"},
expected_output={"answer": "Paris"},
),
DatasetRecord(
DatasetRecordRaw(
input_data={"prompt": "What is the capital of Canada?"},
expected_output={"answer": "Ottawa"},
),
Expand Down
Loading