Skip to content

Commit

Permalink
Fixes: Factual Accuracy and Response Completeness Tests and Add json5…
Browse files Browse the repository at this point in the history
… to dependencies (#661)

* Fix response consistency test

* Fix factual accuracy explanation

* Add json5 dependency
  • Loading branch information
Dominastorm authored Mar 20, 2024
1 parent 8d525e7 commit b691308
Show file tree
Hide file tree
Showing 3 changed files with 9 additions and 16 deletions.
5 changes: 3 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ requires = ["setuptools", "wheel"]

[project]
name = "uptrain"
version = "0.6.8"
version = "0.6.9"
description = "UpTrain - tool to evaluate LLM applications on aspects like factual accuracy, response quality, retrieval quality, tonality, etc."
readme = "README.md"
maintainers = [{ name = "UpTrain AI Team", email = "[email protected]" }]
Expand Down Expand Up @@ -33,7 +33,8 @@ dependencies = [
"openai>=1.6.1",
"fsspec",
"litellm",
"pyyaml"
"pyyaml",
"json5"
]

[project.urls]
Expand Down
15 changes: 4 additions & 11 deletions tests/test_builtins.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,25 +164,18 @@ def test_check_response_consistency():
assert isinstance(output, pl.DataFrame)
assert (
"score_response_consistency" in output.columns
and "argument_response_consistency" in output.columns
and "explanation_response_consistency" in output.columns
)
assert (
output["score_response_consistency"].dtype == pl.Float64
and len(output["score_response_consistency"])
- output["score_response_consistency"].null_count() > 0
)
assert (
output["argument_response_consistency"].dtype == pl.Utf8
and len(output["argument_response_consistency"])
- output["argument_response_consistency"].null_count() > 0
output["explanation_response_consistency"].dtype == pl.Utf8
and len(output["explanation_response_consistency"])
- output["explanation_response_consistency"].null_count() > 0
)
if settings.eval_type == "cot":
assert "reasoning_response_consistency" in output.columns
assert (
output["reasoning_response_consistency"].dtype == pl.Utf8
and len(output["reasoning_response_consistency"])
- output["reasoning_response_consistency"].null_count() > 0
)


response_matching_dataset = pl.DataFrame(
Expand Down
5 changes: 2 additions & 3 deletions uptrain/operators/language/factual_accuracy.py
Original file line number Diff line number Diff line change
Expand Up @@ -230,11 +230,10 @@ def evaluate_local(self, data):
"explanation_factual_accuracy": None,
}
try:
result = parse_json(res.response.choices[0].message.content)["Result"]
judgements = [x["Judgement"] for x in result]
judgements = [x["Judgement"] for x in parse_json(res.response.choices[0].message.content)["Result"]]
score = np.mean([self.score_mapping[x.lower()] for x in judgements])
output["score_factual_accuracy"] = float(score)
output["explanation_factual_accuracy"] = result
output["explanation_factual_accuracy"] = res.response.choices[0].message.content
except Exception:
logger.error(
f"Error when processing payload at index {idx}: {res.error}"
Expand Down

0 comments on commit b691308

Please sign in to comment.