Skip to content

Commit

Permalink
Publish comet metrics (#621)
Browse files Browse the repository at this point in the history
* Configure evaluation tasks

* Extract w&b code into module

* Do not check taskcluwter when publication is disabled

* Publish evaluation metrics to W&B

* Fix running eval tracking on CI

* Use args.wandb_run_name instead of default teacher

* Remove duplicated arguments

* Retrieve dataset from Taskcluster directly

* Add missing calls to publisher and logging

* Allow publishing metrics as a table on existing runs (i.e. previous trainings)

* Update regex to parse labels ending with '-1'

* Generic support for train/eval different naming

* Update tests

* Support disabled publication

* Support COMET metric in online publication

* Enable publication

* Run linter

* Revert "Enable publication"

This reverts commit a1ef893.

---------

Co-authored-by: Bastien Abadie <[email protected]>
Co-authored-by: Bastien Abadie <[email protected]>
Co-authored-by: Evgeny Pavlov <[email protected]>
Co-authored-by: Evgeny Pavlov <[email protected]>
  • Loading branch information
5 people authored May 22, 2024
1 parent 8a1d8ef commit 419ff93
Show file tree
Hide file tree
Showing 4 changed files with 18 additions and 5 deletions.
4 changes: 3 additions & 1 deletion pipeline/eval/eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -356,7 +356,9 @@ def main(args_list: Optional[list[str]] = None) -> None:
# Allow publishing metrics as a table on existing runs (i.e. previous trainings)
wandb.open(resume=True)
logger.info(f"Publishing metrics to Weight & Biases ({wandb.extra_kwargs})")
metric = metric_from_tc_context(chrf=chrf_details["score"], bleu=bleu_details["score"])
metric = metric_from_tc_context(
chrf=chrf_details["score"], bleu=bleu_details["score"], comet=comet_score
)
wandb.handle_metrics(metrics=[metric])
wandb.close()

Expand Down
10 changes: 8 additions & 2 deletions tracking/translations_parser/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ class Metric:
# Scores
chrf: float
bleu_detok: float
comet: float | None = None # optional

@classmethod
def from_file(
Expand All @@ -79,10 +80,14 @@ def from_file(
values.append(float(line))
except ValueError:
continue
assert len(values) == 2, "file must contain exactly 2 float values"
assert len(values) in (2, 3), "file must contain 2 or 3 lines with a float value"
except Exception as e:
raise ValueError(f"Metrics file could not be parsed: {e}")
bleu_detok, chrf = values
if len(values) == 2:
bleu_detok, chrf = values
comet = None
if len(values) == 3:
bleu_detok, chrf, comet = values
if importer is None:
_, importer, dataset, augmentation = parse_task_label(metrics_file.stem)
return cls(
Expand All @@ -91,6 +96,7 @@ def from_file(
augmentation=augmentation,
chrf=chrf,
bleu_detok=bleu_detok,
comet=comet,
)

@classmethod
Expand Down
6 changes: 5 additions & 1 deletion tracking/translations_parser/publishers.py
Original file line number Diff line number Diff line change
Expand Up @@ -176,7 +176,11 @@ def handle_metrics(self, metrics: Sequence[Metric]) -> None:
title: wandb.plot.bar(
wandb.Table(
columns=["Metric", "Value"],
data=[[key, getattr(metric, key)] for key in ("bleu_detok", "chrf")],
data=[
[key, getattr(metric, key)]
for key in ("bleu_detok", "chrf", "comet")
if getattr(metric, key) is not None
],
),
"Metric",
"Value",
Expand Down
3 changes: 2 additions & 1 deletion tracking/translations_parser/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,7 +164,7 @@ def build_task_name(task: dict):
return prefix, label.model


def metric_from_tc_context(chrf: float, bleu: float):
def metric_from_tc_context(chrf: float, bleu: float, comet: float):
"""
Find the various names needed to build a metric directly from a Taskcluster task
"""
Expand All @@ -185,4 +185,5 @@ def metric_from_tc_context(chrf: float, bleu: float):
augmentation=parsed.augmentation,
chrf=chrf,
bleu_detok=bleu,
comet=comet,
)

0 comments on commit 419ff93

Please sign in to comment.