Skip to content

Commit

Permalink
Training continuation (#226)
Browse files Browse the repository at this point in the history
* Add taskcluster training script

* Update parameter schema

* Update task kinds and dependencies

* Update train action schema

* Add `training_continuation` transform

* Add parameter sanity check to train.py
  • Loading branch information
gabrielBusta authored Dec 7, 2023
1 parent a773563 commit 2bfe3a4
Show file tree
Hide file tree
Showing 14 changed files with 251 additions and 50 deletions.
3 changes: 0 additions & 3 deletions taskcluster/ci/evaluate-teacher-ensemble/kind.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@ transforms:
kind-dependencies:
- dataset
- train-teacher
- train-vocab
- alignments
- toolchain

Expand Down Expand Up @@ -110,13 +109,11 @@ tasks:
unique-kinds: false
kinds:
- train-teacher
- train-vocab
fetches:
train-teacher:
- artifact: final.model.npz.best-{best_model}.npz
dest: model{this_chunk}
- artifact: final.model.npz.best-chrf.npz.decoder.yml
train-vocab:
- artifact: vocab.spm
extract: false

Expand Down
9 changes: 0 additions & 9 deletions taskcluster/ci/evaluate/kind.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@ kind-dependencies:
- dataset
- train-backwards
- train-teacher
- train-vocab
- train-student
- finetune-student
- alignments
Expand Down Expand Up @@ -112,7 +111,6 @@ tasks:
dependencies:
dataset: dataset-{provider}-{dataset_sanitized}-{src_locale}-{trg_locale}
train-backwards: train-backwards-{src_locale}-{trg_locale}
train-vocab: train-vocab-{src_locale}-{trg_locale}
fetches:
dataset:
- artifact: "{dataset_sanitized}.{src_locale}.zst"
Expand All @@ -124,7 +122,6 @@ tasks:
extract: false
- artifact: final.model.npz.best-{best_model}.npz.decoder.yml
extract: false
train-vocab:
- artifact: vocab.spm
extract: false
toolchain:
Expand Down Expand Up @@ -162,7 +159,6 @@ tasks:
dependencies:
dataset: dataset-{provider}-{dataset_sanitized}-{src_locale}-{trg_locale}
train-teacher: train-teacher-{src_locale}-{trg_locale}-{this_chunk}/{total_chunks}
train-vocab: train-vocab-{src_locale}-{trg_locale}
fetches:
dataset:
- artifact: "{dataset_sanitized}.{src_locale}.zst"
Expand All @@ -174,7 +170,6 @@ tasks:
extract: false
- artifact: final.model.npz.best-{best_model}.npz.decoder.yml
extract: false
train-vocab:
- artifact: vocab.spm
extract: false
toolchain:
Expand All @@ -201,7 +196,6 @@ tasks:
dependencies:
dataset: dataset-{provider}-{dataset_sanitized}-{src_locale}-{trg_locale}
train-student: train-student-{src_locale}-{trg_locale}
train-vocab: train-vocab-{src_locale}-{trg_locale}
fetches:
dataset:
- artifact: "{dataset_sanitized}.{src_locale}.zst"
Expand All @@ -213,7 +207,6 @@ tasks:
extract: false
- artifact: final.model.npz.best-{best_model}.npz.decoder.yml
extract: false
train-vocab:
- artifact: vocab.spm
extract: false
toolchain:
Expand All @@ -240,7 +233,6 @@ tasks:
dependencies:
dataset: dataset-{provider}-{dataset_sanitized}-{src_locale}-{trg_locale}
finetune-student: finetune-student-{src_locale}-{trg_locale}
train-vocab: train-vocab-{src_locale}-{trg_locale}
fetches:
dataset:
- artifact: "{dataset_sanitized}.{src_locale}.zst"
Expand All @@ -252,7 +244,6 @@ tasks:
extract: false
- artifact: final.model.npz.best-{best_model}.npz.decoder.yml
extract: false
train-vocab:
- artifact: vocab.spm
extract: false
toolchain:
Expand Down
5 changes: 3 additions & 2 deletions taskcluster/ci/finetune-student/kind.yml
Original file line number Diff line number Diff line change
Expand Up @@ -84,17 +84,18 @@ tasks:
pip3 install -r $VCS_PATH/pipeline/train/requirements/train.txt &&
export PATH="$HOME/.local/bin:$PATH" &&
export MARIAN=$MOZ_FETCHES_DIR &&
$VCS_PATH/pipeline/train/train.sh
$VCS_PATH/taskcluster/scripts/pipeline/train-taskcluster.sh
student
finetune
{src_locale}
{trg_locale}
$MOZ_FETCHES_DIR/corpus
$MOZ_FETCHES_DIR/devset
$TASK_WORKDIR/artifacts
$MOZ_FETCHES_DIR/vocab.spm
{best_model}
$MOZ_FETCHES_DIR/corpus.aln.zst
None
None
--pretrained-model
$MOZ_FETCHES_DIR/final.model.npz.best-{best_model}.npz
{marian_args}
Expand Down
3 changes: 0 additions & 3 deletions taskcluster/ci/score/kind.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@ transforms:

kind-dependencies:
- train-backwards
- train-vocab
- merge-translated
- toolchain

Expand Down Expand Up @@ -83,7 +82,6 @@ tasks:
dependencies:
train-backwards: train-backwards-{src_locale}-{trg_locale}
train-vocab: train-vocab-{src_locale}-{trg_locale}
merge-translated: merge-translated-{src_locale}-{trg_locale}

fetches:
Expand All @@ -92,7 +90,6 @@ tasks:
train-backwards:
- artifact: final.model.npz.best-{best_model}.npz
extract: false
train-vocab:
- artifact: vocab.spm
extract: false
merge-translated:
Expand Down
10 changes: 8 additions & 2 deletions taskcluster/ci/train-backwards/kind.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
loader: taskgraph.loader.transform:loader

transforms:
- translations_taskgraph.transforms.training_continuation:transforms
- translations_taskgraph.transforms.marian_args:transforms
- taskgraph.transforms.task_context
- taskgraph.transforms.job:transforms
Expand All @@ -29,16 +30,20 @@ tasks:
type: train-backwards
resources:
- pipeline/train/train.sh
- taskcluster/scripts/pipeline/train-taskcluster.sh
- pipeline/train/configs/model/backward.yml
- pipeline/train/configs/opustrainer/backward.yml
- pipeline/train/configs/training/backward.train.yml
from-parameters:
marian_args: training_config.marian-args.training-backward
pretrained_backward: training_config.experiment.pretrained-models.train-backwards
task-context:
from-parameters:
best_model: training_config.experiment.best-model
src_locale: training_config.experiment.src
trg_locale: training_config.experiment.trg
pretrained_backward_mode: training_config.experiment.pretrained-models.train-backwards.mode
pretrained_backward_type: training_config.experiment.pretrained-models.train-backwards.type
substitution-fields:
- description
- name
Expand Down Expand Up @@ -81,17 +86,18 @@ tasks:
pip3 install -r $VCS_PATH/pipeline/train/requirements/train.txt &&
export PATH="$HOME/.local/bin:$PATH" &&
export MARIAN=$MOZ_FETCHES_DIR &&
$VCS_PATH/pipeline/train/train.sh
$VCS_PATH/taskcluster/scripts/pipeline/train-taskcluster.sh
backward
train
{trg_locale}
{src_locale}
$MOZ_FETCHES_DIR/corpus
$MOZ_FETCHES_DIR/devset
$TASK_WORKDIR/artifacts
$MOZ_FETCHES_DIR/vocab.spm
{best_model}
None
{pretrained_backward_mode}
{pretrained_backward_type}
{marian_args}
dependencies:
Expand Down
5 changes: 3 additions & 2 deletions taskcluster/ci/train-student/kind.yml
Original file line number Diff line number Diff line change
Expand Up @@ -81,17 +81,18 @@ tasks:
pip3 install -r $VCS_PATH/pipeline/train/requirements/train.txt &&
export PATH="$HOME/.local/bin:$PATH" &&
export MARIAN=$MOZ_FETCHES_DIR &&
$VCS_PATH/pipeline/train/train.sh
$VCS_PATH/taskcluster/scripts/pipeline/train-taskcluster.sh
student
train
{src_locale}
{trg_locale}
$MOZ_FETCHES_DIR/corpus
$MOZ_FETCHES_DIR/devset
$TASK_WORKDIR/artifacts
$MOZ_FETCHES_DIR/vocab.spm
{best_model}
$MOZ_FETCHES_DIR/corpus.aln.zst
None
None
{marian_args}
dependencies:
Expand Down
10 changes: 8 additions & 2 deletions taskcluster/ci/train-teacher/kind.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
loader: taskgraph.loader.transform:loader

transforms:
- translations_taskgraph.transforms.training_continuation:transforms
- translations_taskgraph.transforms.marian_args:transforms
- taskgraph.transforms.task_context
- translations_taskgraph.transforms.cast_to
Expand Down Expand Up @@ -41,6 +42,8 @@ tasks:
trg_locale: training_config.experiment.trg
best_model: training_config.experiment.best-model
teacher_ensemble: training_config.experiment.teacher-ensemble
pretrained_teacher_mode: training_config.experiment.pretrained-models.train-teacher.mode
pretrained_teacher_type: training_config.experiment.pretrained-models.train-teacher.type
substitution-fields:
- description
- name
Expand All @@ -61,9 +64,11 @@ tasks:
- pipeline/train/configs/opustrainer/teacher.yml
- pipeline/train/configs/training/teacher.train.yml
- pipeline/train/train.sh
- taskcluster/scripts/pipeline/train-taskcluster.sh
from-parameters:
marian_args: training_config.marian-args.training-teacher
teacher-ensemble: training_config.experiment.teacher-ensemble
pretrained_teacher: training_config.experiment.pretrained-models.train-teacher
worker-type: b-linux-v100-gpu-4-1tb
expires-after: "90 days"
worker:
Expand Down Expand Up @@ -99,17 +104,18 @@ tasks:
pip3 install -r $VCS_PATH/pipeline/train/requirements/train.txt &&
export PATH="$HOME/.local/bin:$PATH" &&
export MARIAN=$MOZ_FETCHES_DIR &&
$VCS_PATH/pipeline/train/train.sh
$VCS_PATH/taskcluster/scripts/pipeline/train-taskcluster.sh
teacher
train
{src_locale}
{trg_locale}
$MOZ_FETCHES_DIR/corpus,$MOZ_FETCHES_DIR/mono
$MOZ_FETCHES_DIR/devset
$TASK_WORKDIR/artifacts
$MOZ_FETCHES_DIR/vocab.spm
{best_model}
None
{pretrained_teacher_mode}
{pretrained_teacher_type}
{marian_args}
dependencies:
Expand Down
7 changes: 2 additions & 5 deletions taskcluster/ci/translate-corpus/kind.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@ transforms:
kind-dependencies:
- split-corpus
- train-teacher
- train-vocab
- toolchain

tasks:
Expand Down Expand Up @@ -59,17 +58,15 @@ tasks:
kinds:
- train-teacher
- split-corpus
- train-vocab
fetches:
split-corpus:
- artifact: src-file.{this_chunk}.zst
extract: true
train-vocab:
- artifact: vocab.spm
extract: false
train-teacher:
- artifact: final.model.npz.best-{best_model}.npz
dest: model{this_chunk}
- artifact: vocab.spm
extract: false

task-context:
from-parameters:
Expand Down
7 changes: 2 additions & 5 deletions taskcluster/ci/translate-mono-src/kind.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@ transforms:
kind-dependencies:
- split-mono-src
- train-teacher
- train-vocab
- toolchain

task-defaults:
Expand Down Expand Up @@ -114,14 +113,12 @@ tasks:
kinds:
- train-teacher
- split-mono-src
- train-vocab
fetches:
split-mono-src:
- artifact: out-file.{this_chunk}.zst
extract: true
train-vocab:
- artifact: vocab.spm
extract: false
train-teacher:
- artifact: final.model.npz.best-{best_model}.npz
dest: model{this_chunk}
- artifact: vocab.spm
extract: false
5 changes: 2 additions & 3 deletions taskcluster/ci/translate-mono-trg/kind.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@ transforms:
kind-dependencies:
- split-mono-trg
- train-backwards
- train-vocab
- toolchain

task-defaults:
Expand Down Expand Up @@ -84,13 +83,13 @@ task-defaults:
{marian_args}
dependencies:
train-vocab: train-vocab-{src_locale}-{trg_locale}
train-backwards: train-backwards-{src_locale}-{trg_locale}

fetches:
toolchain:
- marian
- cuda-toolkit
train-vocab:
train-backwards:
- artifact: vocab.spm
extract: false

Expand Down
Loading

0 comments on commit 2bfe3a4

Please sign in to comment.