diff --git a/taskcluster/ci/evaluate-teacher-ensemble/kind.yml b/taskcluster/ci/evaluate-teacher-ensemble/kind.yml
index cc48c0515..19321a3dc 100644
--- a/taskcluster/ci/evaluate-teacher-ensemble/kind.yml
+++ b/taskcluster/ci/evaluate-teacher-ensemble/kind.yml
@@ -16,7 +16,6 @@ transforms:
 kind-dependencies:
     - dataset
     - train-teacher
-    - train-vocab
     - alignments
     - toolchain
 
@@ -110,13 +109,11 @@ tasks:
             unique-kinds: false
             kinds:
                 - train-teacher
-                - train-vocab
             fetches:
                 train-teacher:
                     - artifact: final.model.npz.best-{best_model}.npz
                       dest: model{this_chunk}
                     - artifact: final.model.npz.best-chrf.npz.decoder.yml
-                train-vocab:
                     - artifact: vocab.spm
                       extract: false
 
diff --git a/taskcluster/ci/evaluate/kind.yml b/taskcluster/ci/evaluate/kind.yml
index b05c7ad9f..2f383267d 100644
--- a/taskcluster/ci/evaluate/kind.yml
+++ b/taskcluster/ci/evaluate/kind.yml
@@ -18,7 +18,6 @@ kind-dependencies:
     - dataset
     - train-backwards
     - train-teacher
-    - train-vocab
     - train-student
     - finetune-student
     - alignments
@@ -112,7 +111,6 @@ tasks:
         dependencies:
             dataset: dataset-{provider}-{dataset_sanitized}-{src_locale}-{trg_locale}
             train-backwards: train-backwards-{src_locale}-{trg_locale}
-            train-vocab: train-vocab-{src_locale}-{trg_locale}
         fetches:
             dataset:
                 - artifact: "{dataset_sanitized}.{src_locale}.zst"
@@ -124,7 +122,6 @@ tasks:
                   extract: false
                 - artifact: final.model.npz.best-{best_model}.npz.decoder.yml
                   extract: false
-            train-vocab:
                 - artifact: vocab.spm
                   extract: false
             toolchain:
@@ -162,7 +159,6 @@ tasks:
         dependencies:
             dataset: dataset-{provider}-{dataset_sanitized}-{src_locale}-{trg_locale}
             train-teacher: train-teacher-{src_locale}-{trg_locale}-{this_chunk}/{total_chunks}
-            train-vocab: train-vocab-{src_locale}-{trg_locale}
         fetches:
             dataset:
                 - artifact: "{dataset_sanitized}.{src_locale}.zst"
@@ -174,7 +170,6 @@ tasks:
                   extract: false
                 - artifact: final.model.npz.best-{best_model}.npz.decoder.yml
                   extract: false
-            train-vocab:
                 - artifact: vocab.spm
                   extract: false
             toolchain:
@@ -201,7 +196,6 @@ tasks:
         dependencies:
             dataset: dataset-{provider}-{dataset_sanitized}-{src_locale}-{trg_locale}
             train-student: train-student-{src_locale}-{trg_locale}
-            train-vocab: train-vocab-{src_locale}-{trg_locale}
         fetches:
             dataset:
                 - artifact: "{dataset_sanitized}.{src_locale}.zst"
@@ -213,7 +207,6 @@ tasks:
                   extract: false
                 - artifact: final.model.npz.best-{best_model}.npz.decoder.yml
                   extract: false
-            train-vocab:
                 - artifact: vocab.spm
                   extract: false
             toolchain:
@@ -240,7 +233,6 @@ tasks:
         dependencies:
             dataset: dataset-{provider}-{dataset_sanitized}-{src_locale}-{trg_locale}
             finetune-student: finetune-student-{src_locale}-{trg_locale}
-            train-vocab: train-vocab-{src_locale}-{trg_locale}
         fetches:
             dataset:
                 - artifact: "{dataset_sanitized}.{src_locale}.zst"
@@ -252,7 +244,6 @@ tasks:
                   extract: false
                 - artifact: final.model.npz.best-{best_model}.npz.decoder.yml
                   extract: false
-            train-vocab:
                 - artifact: vocab.spm
                   extract: false
             toolchain:
diff --git a/taskcluster/ci/finetune-student/kind.yml b/taskcluster/ci/finetune-student/kind.yml
index 85a74e820..a4ea43ef1 100644
--- a/taskcluster/ci/finetune-student/kind.yml
+++ b/taskcluster/ci/finetune-student/kind.yml
@@ -84,7 +84,7 @@ tasks:
                     pip3 install -r $VCS_PATH/pipeline/train/requirements/train.txt &&
                     export PATH="$HOME/.local/bin:$PATH" &&
                     export MARIAN=$MOZ_FETCHES_DIR &&
-                    $VCS_PATH/pipeline/train/train.sh
+                    $VCS_PATH/taskcluster/scripts/pipeline/train-taskcluster.sh
                     student
                     finetune
                     {src_locale}
@@ -92,9 +92,10 @@ tasks:
                     $MOZ_FETCHES_DIR/corpus
                     $MOZ_FETCHES_DIR/devset
                     $TASK_WORKDIR/artifacts
-                    $MOZ_FETCHES_DIR/vocab.spm
                     {best_model}
                     $MOZ_FETCHES_DIR/corpus.aln.zst
+                    None
+                    None
                     --pretrained-model
                     $MOZ_FETCHES_DIR/final.model.npz.best-{best_model}.npz
                     {marian_args}
diff --git a/taskcluster/ci/score/kind.yml b/taskcluster/ci/score/kind.yml
index c80ea5aff..e2602e638 100644
--- a/taskcluster/ci/score/kind.yml
+++ b/taskcluster/ci/score/kind.yml
@@ -13,7 +13,6 @@ transforms:
 
 kind-dependencies:
     - train-backwards
-    - train-vocab
     - merge-translated
     - toolchain
 
@@ -83,7 +82,6 @@ tasks:
 
         dependencies:
             train-backwards: train-backwards-{src_locale}-{trg_locale}
-            train-vocab: train-vocab-{src_locale}-{trg_locale}
             merge-translated: merge-translated-{src_locale}-{trg_locale}
 
         fetches:
@@ -92,7 +90,6 @@ tasks:
             train-backwards:
                 - artifact: final.model.npz.best-{best_model}.npz
                   extract: false
-            train-vocab:
                 - artifact: vocab.spm
                   extract: false
             merge-translated:
diff --git a/taskcluster/ci/train-backwards/kind.yml b/taskcluster/ci/train-backwards/kind.yml
index a9b12847c..1ecb86fd0 100644
--- a/taskcluster/ci/train-backwards/kind.yml
+++ b/taskcluster/ci/train-backwards/kind.yml
@@ -6,6 +6,7 @@
 loader: taskgraph.loader.transform:loader
 
 transforms:
+    - translations_taskgraph.transforms.training_continuation:transforms
     - translations_taskgraph.transforms.marian_args:transforms
     - taskgraph.transforms.task_context
     - taskgraph.transforms.job:transforms
@@ -29,16 +30,20 @@ tasks:
                 type: train-backwards
                 resources:
                     - pipeline/train/train.sh
+                    - taskcluster/scripts/pipeline/train-taskcluster.sh
                     - pipeline/train/configs/model/backward.yml
                     - pipeline/train/configs/opustrainer/backward.yml
                     - pipeline/train/configs/training/backward.train.yml
                 from-parameters:
                     marian_args: training_config.marian-args.training-backward
+                    pretrained_backward: training_config.experiment.pretrained-models.train-backwards
         task-context:
             from-parameters:
                 best_model: training_config.experiment.best-model
                 src_locale: training_config.experiment.src
                 trg_locale: training_config.experiment.trg
+                pretrained_backward_mode: training_config.experiment.pretrained-models.train-backwards.mode
+                pretrained_backward_type: training_config.experiment.pretrained-models.train-backwards.type
             substitution-fields:
                 - description
                 - name
@@ -81,7 +86,7 @@ tasks:
                     pip3 install -r $VCS_PATH/pipeline/train/requirements/train.txt &&
                     export PATH="$HOME/.local/bin:$PATH" &&
                     export MARIAN=$MOZ_FETCHES_DIR &&
-                    $VCS_PATH/pipeline/train/train.sh
+                    $VCS_PATH/taskcluster/scripts/pipeline/train-taskcluster.sh
                     backward
                     train
                     {trg_locale}
@@ -89,9 +94,10 @@ tasks:
                     $MOZ_FETCHES_DIR/corpus
                     $MOZ_FETCHES_DIR/devset
                     $TASK_WORKDIR/artifacts
-                    $MOZ_FETCHES_DIR/vocab.spm
                     {best_model}
                     None
+                    {pretrained_backward_mode}
+                    {pretrained_backward_type}
                     {marian_args}
 
         dependencies:
diff --git a/taskcluster/ci/train-student/kind.yml b/taskcluster/ci/train-student/kind.yml
index 0e24b3933..168cfa88b 100644
--- a/taskcluster/ci/train-student/kind.yml
+++ b/taskcluster/ci/train-student/kind.yml
@@ -81,7 +81,7 @@ tasks:
                     pip3 install -r $VCS_PATH/pipeline/train/requirements/train.txt &&
                     export PATH="$HOME/.local/bin:$PATH" &&
                     export MARIAN=$MOZ_FETCHES_DIR &&
-                    $VCS_PATH/pipeline/train/train.sh
+                    $VCS_PATH/taskcluster/scripts/pipeline/train-taskcluster.sh
                     student
                     train
                     {src_locale}
@@ -89,9 +89,10 @@ tasks:
                     $MOZ_FETCHES_DIR/corpus
                     $MOZ_FETCHES_DIR/devset
                     $TASK_WORKDIR/artifacts
-                    $MOZ_FETCHES_DIR/vocab.spm
                     {best_model}
                     $MOZ_FETCHES_DIR/corpus.aln.zst
+                    None
+                    None
                     {marian_args}
 
         dependencies:
diff --git a/taskcluster/ci/train-teacher/kind.yml b/taskcluster/ci/train-teacher/kind.yml
index 7d5dda039..a403e99ae 100644
--- a/taskcluster/ci/train-teacher/kind.yml
+++ b/taskcluster/ci/train-teacher/kind.yml
@@ -6,6 +6,7 @@
 loader: taskgraph.loader.transform:loader
 
 transforms:
+    - translations_taskgraph.transforms.training_continuation:transforms
     - translations_taskgraph.transforms.marian_args:transforms
     - taskgraph.transforms.task_context
     - translations_taskgraph.transforms.cast_to
@@ -41,6 +42,8 @@ tasks:
                 trg_locale: training_config.experiment.trg
                 best_model: training_config.experiment.best-model
                 teacher_ensemble: training_config.experiment.teacher-ensemble
+                pretrained_teacher_mode: training_config.experiment.pretrained-models.train-teacher.mode
+                pretrained_teacher_type: training_config.experiment.pretrained-models.train-teacher.type
             substitution-fields:
                 - description
                 - name
@@ -61,9 +64,11 @@ tasks:
                     - pipeline/train/configs/opustrainer/teacher.yml
                     - pipeline/train/configs/training/teacher.train.yml
                     - pipeline/train/train.sh
+                    - taskcluster/scripts/pipeline/train-taskcluster.sh
                 from-parameters:
                     marian_args: training_config.marian-args.training-teacher
                     teacher-ensemble: training_config.experiment.teacher-ensemble
+                    pretrained_teacher: training_config.experiment.pretrained-models.train-teacher
         worker-type: b-linux-v100-gpu-4-1tb
         expires-after: "90 days"
         worker:
@@ -99,7 +104,7 @@ tasks:
                     pip3 install -r $VCS_PATH/pipeline/train/requirements/train.txt &&
                     export PATH="$HOME/.local/bin:$PATH" &&
                     export MARIAN=$MOZ_FETCHES_DIR &&
-                    $VCS_PATH/pipeline/train/train.sh
+                    $VCS_PATH/taskcluster/scripts/pipeline/train-taskcluster.sh
                     teacher
                     train
                     {src_locale}
@@ -107,9 +112,10 @@ tasks:
                     $MOZ_FETCHES_DIR/corpus,$MOZ_FETCHES_DIR/mono
                     $MOZ_FETCHES_DIR/devset
                     $TASK_WORKDIR/artifacts
-                    $MOZ_FETCHES_DIR/vocab.spm
                     {best_model}
                     None
+                    {pretrained_teacher_mode}
+                    {pretrained_teacher_type}
                     {marian_args}
 
         dependencies:
diff --git a/taskcluster/ci/translate-corpus/kind.yml b/taskcluster/ci/translate-corpus/kind.yml
index 61409576d..95bdf3ed7 100644
--- a/taskcluster/ci/translate-corpus/kind.yml
+++ b/taskcluster/ci/translate-corpus/kind.yml
@@ -17,7 +17,6 @@ transforms:
 kind-dependencies:
     - split-corpus
     - train-teacher
-    - train-vocab
     - toolchain
 
 tasks:
@@ -59,17 +58,15 @@ tasks:
             kinds:
                 - train-teacher
                 - split-corpus
-                - train-vocab
             fetches:
                 split-corpus:
                     - artifact: src-file.{this_chunk}.zst
                       extract: true
-                train-vocab:
-                    - artifact: vocab.spm
-                      extract: false
                 train-teacher:
                     - artifact: final.model.npz.best-{best_model}.npz
                       dest: model{this_chunk}
+                    - artifact: vocab.spm
+                      extract: false
 
         task-context:
             from-parameters:
diff --git a/taskcluster/ci/translate-mono-src/kind.yml b/taskcluster/ci/translate-mono-src/kind.yml
index d4d1ef2fc..d96381b12 100644
--- a/taskcluster/ci/translate-mono-src/kind.yml
+++ b/taskcluster/ci/translate-mono-src/kind.yml
@@ -16,7 +16,6 @@ transforms:
 kind-dependencies:
     - split-mono-src
     - train-teacher
-    - train-vocab
     - toolchain
 
 task-defaults:
@@ -114,14 +113,12 @@ tasks:
             kinds:
                 - train-teacher
                 - split-mono-src
-                - train-vocab
             fetches:
                 split-mono-src:
                     - artifact: out-file.{this_chunk}.zst
                       extract: true
-                train-vocab:
-                    - artifact: vocab.spm
-                      extract: false
                 train-teacher:
                     - artifact: final.model.npz.best-{best_model}.npz
                       dest: model{this_chunk}
+                    - artifact: vocab.spm
+                      extract: false
diff --git a/taskcluster/ci/translate-mono-trg/kind.yml b/taskcluster/ci/translate-mono-trg/kind.yml
index 9ec38a531..d32440037 100644
--- a/taskcluster/ci/translate-mono-trg/kind.yml
+++ b/taskcluster/ci/translate-mono-trg/kind.yml
@@ -16,7 +16,6 @@ transforms:
 kind-dependencies:
     - split-mono-trg
     - train-backwards
-    - train-vocab
     - toolchain
 
 task-defaults:
@@ -84,13 +83,13 @@ task-defaults:
                 {marian_args}
 
     dependencies:
-        train-vocab: train-vocab-{src_locale}-{trg_locale}
+        train-backwards: train-backwards-{src_locale}-{trg_locale}
 
     fetches:
         toolchain:
             - marian
             - cuda-toolkit
-        train-vocab:
+        train-backwards:
             - artifact: vocab.spm
               extract: false
 
diff --git a/taskcluster/scripts/pipeline/train-taskcluster.sh b/taskcluster/scripts/pipeline/train-taskcluster.sh
new file mode 100755
index 000000000..e4e303d1e
--- /dev/null
+++ b/taskcluster/scripts/pipeline/train-taskcluster.sh
@@ -0,0 +1,63 @@
+#!/bin/bash
+
+set -x
+set -euo pipefail
+
+[[ -v MOZ_FETCHES_DIR ]] || { echo "MOZ_FETCHES_DIR is not set"; exit 1; }
+
+pushd `dirname $0`/../../.. &>/dev/null
+VCS_ROOT=$(pwd)
+popd &>/dev/null
+
+if [ "$#" -lt 10 ]; then
+    echo "Usage: $0 <model_type> <training_type> <src_locale> <trg_locale> <train_set_prefix> <valid_set_prefix> <model_dir> <best_model_metric> <alignments> <pretrained_model_mode> <pretrained_model_type> [extra_params...]"
+    exit 1
+fi
+
+model_type=$1
+training_type=$2
+src=$3
+trg=$4
+train_set_prefix=$5
+valid_set_prefix=$6
+model_dir=$7
+best_model_metric=$8
+alignments=$9
+pretrained_model_mode=${10}
+pretrained_model_type=${11}
+extra_params=( "${@:12}" )
+
+if [ "$pretrained_model_mode" == "None" ]; then
+    vocab="$MOZ_FETCHES_DIR/vocab.spm"
+else
+    vocab="$TASK_WORKDIR/artifacts/vocab.spm"
+fi
+
+export MARIAN=$MOZ_FETCHES_DIR
+
+case "$pretrained_model_mode" in
+    "use")
+        echo "The training mode is 'use', using existing model without further training."
+        exit 0
+        ;;
+    "continue"|"init"|"None")
+        if [ "$pretrained_model_mode" == "init" ]; then
+            extra_params+=("--pretrained-model" "$TASK_WORKDIR/artifacts/final.model.npz.best-$best_model_metric.npz" "--no-restore-corpus")
+        fi
+        $VCS_ROOT/pipeline/train/train.sh \
+        "$model_type" \
+        "$training_type" \
+        "$src" \
+        "$trg" \
+        "$train_set_prefix" \
+        "$valid_set_prefix" \
+        "$model_dir" \
+        "$vocab" \
+        "$best_model_metric" \
+        "$alignments" \
+        "${extra_params[@]}"
+        if [ "$pretrained_model_mode" == "None" ]; then
+            cp "$vocab" "$model_dir"
+        fi
+        ;;
+esac
diff --git a/taskcluster/translations_taskgraph/actions/train.py b/taskcluster/translations_taskgraph/actions/train.py
index abf82a29a..05e87daa1 100644
--- a/taskcluster/translations_taskgraph/actions/train.py
+++ b/taskcluster/translations_taskgraph/actions/train.py
@@ -21,6 +21,27 @@ def can_train(parameters):
 defaults = get_defaults("")["training_config"]
 
 
+def validate_pretrained_models(params):
+    pretrained_models = params["training_config"]["experiment"].get("pretrained-models", {})
+    train_teacher = pretrained_models.get("train-teacher")
+    if train_teacher:
+        teacher_ensemble = params["training_config"]["experiment"]["teacher-ensemble"]
+        if len(train_teacher["urls"]) != teacher_ensemble:
+            raise Exception(
+                f"The experiment's 'teacher-ensemble' ({teacher_ensemble}) "
+                f"does not match the number of provided model 'urls' ({len(train_teacher['urls'])}) "
+                f"for the pretrained 'train-teacher' ensemble."
+            )
+    train_backwards = pretrained_models.get("train-backwards")
+    if train_backwards:
+        if len(train_backwards["urls"]) != 1:
+            raise Exception(
+                f"The experiment's 'pretrained-models.backward.urls' ({len(train_backwards['urls'])}) "
+                f"must be equal to one (1). "
+                f"The pipeline's backward model is _not_ an ensemble."
+            )
+
+
 @register_callback_action(
     name="train",
     title="Train",
@@ -96,14 +117,6 @@ def can_train(parameters):
                         "type": "number",
                         "description": "Number of teachers to train",
                     },
-                    "backward-model": {
-                        "type": "string",
-                        "description": "???",
-                    },
-                    "vocab": {
-                        "type": "string",
-                        "description": "???",
-                    },
                     "mono-max-sentences-src": {
                         "type": "number",
                         "description": "limits per downloaded src dataset",
@@ -149,6 +162,53 @@ def can_train(parameters):
                             "default-threshold",
                         ],
                     },
+                    # We are using urls because pretrained-models should be flexible enough
+                    # to point at model (ensembles) that are not in taskcluster.
+                    # Models could be in a long-term storage bucket, or we may use
+                    # pretrained models hosted elsewhere.
+                    "pretrained-models": {
+                        "type": "object",
+                        "properties": {
+                            "train-teacher": {
+                                "type": "object",
+                                "properties": {
+                                    "urls": {
+                                        "type": "array",
+                                        "items": {"type": "string", "format": "uri"},
+                                        "minItems": 1,
+                                    },
+                                    "mode": {
+                                        "type": "string",
+                                        "enum": ["continue", "init", "use"],
+                                    },
+                                    "type": {
+                                        "type": "string",
+                                        "enum": ["default", "opusmt"],
+                                    },
+                                },
+                                "required": ["urls", "mode", "type"],
+                            },
+                            "train-backwards": {
+                                "type": "object",
+                                "properties": {
+                                    "urls": {
+                                        "type": "array",
+                                        "items": {"type": "string", "format": "uri"},
+                                        "minItems": 1,
+                                    },
+                                    "mode": {
+                                        "type": "string",
+                                        "enum": ["continue", "init", "use"],
+                                    },
+                                    "type": {
+                                        "type": "string",
+                                        "enum": ["default", "opusmt"],
+                                    },
+                                },
+                                "required": ["urls", "mode", "type"],
+                            },
+                        },
+                    },
                 },
                 "required": [
                     "name",
@@ -291,5 +351,7 @@ def train_action(parameters, graph_config, input, task_group_id, task_id):
     parameters["tasks_for"] = "action"
     parameters["training_config"] = input
 
+    validate_pretrained_models(parameters)
+
     parameters = Parameters(**parameters)
     taskgraph_decision({"root": graph_config.root_dir}, parameters=parameters)
diff --git a/taskcluster/translations_taskgraph/parameters.py b/taskcluster/translations_taskgraph/parameters.py
index aa4ac500f..965a48043 100644
--- a/taskcluster/translations_taskgraph/parameters.py
+++ b/taskcluster/translations_taskgraph/parameters.py
@@ -20,10 +20,6 @@ def get_defaults(_):
                 "src": "ru",
                 "trg": "en",
                 "teacher-ensemble": 1,
-                # Used for providing a pretrained backward model. We do not support this yet.
-                "backward-model": "NOT-YET-SUPPORTED",
-                # Used for providing a pretrained vocab. We do not support this yet.
-                "vocab": "NOT-YET-SUPPORTED",
                 "mono-max-sentences-trg": 10000,
                 "mono-max-sentences-src": 10000,
                 "split-length": 5000,
@@ -124,8 +120,6 @@ def get_defaults(_):
                 Required("src"): str,
                 Required("trg"): str,
                 Required("teacher-ensemble"): int,
-                Required("backward-model"): str,
-                Required("vocab"): str,
                 Required("mono-max-sentences-trg"): int,
                 Required("mono-max-sentences-src"): int,
                 Required("split-length"): int,
@@ -139,6 +133,18 @@ def get_defaults(_):
                         str: float,
                     },
                 },
+                Optional("pretrained-models"): {
+                    Optional("train-teacher"): {
+                        Required("urls"): [str],
+                        Required("mode"): str,
+                        Required("type"): str,
+                    },
+                    Optional("train-backwards"): {
+                        Required("urls"): [str],
+                        Required("mode"): str,
+                        Required("type"): str,
+                    },
+                },
             },
             Optional("datasets"): {
                 str: [str],
diff --git a/taskcluster/translations_taskgraph/transforms/training_continuation.py b/taskcluster/translations_taskgraph/transforms/training_continuation.py
new file mode 100644
index 000000000..588ecefab
--- /dev/null
+++ b/taskcluster/translations_taskgraph/transforms/training_continuation.py
@@ -0,0 +1,78 @@
+from taskgraph.transforms.base import TransformSequence
+from urllib.parse import urljoin
+import os
+
+CONTINUE_TRAINING_ARTIFACTS = (
+    "devset.out",
+    "model.npz",
+    "model.npz.best-bleu-detok.npz",
+    "model.npz.best-bleu-detok.npz.decoder.yml",
+    "model.npz.best-ce-mean-words.npz",
+    "model.npz.best-ce-mean-words.npz.decoder.yml",
+    "final.model.npz.best-chrf.npz",
+    "model.npz.best-chrf.npz",
+    "final.model.npz.best-chrf.npz.decoder.yml",
+    "model.npz.best-chrf.npz.decoder.yml",
+    "model.npz.decoder.yml",
+    "model.npz.optimizer.npz",
+    "model.npz.progress.yml",
+    "model.npz.yml",
+    "train.log",
+    "valid.log",
+    "vocab.spm",
+)
+
+INITIALIZE_MODEL_ARTIFACTS = (
+    "model.npz.best-bleu-detok.npz",
+    "model.npz.best-ce-mean-words.npz",
+    "final.model.npz.best-chrf.npz",
+    "model.npz.best-chrf.npz",
+)
+
+
+def get_artifact_mount(url, directory, artifact_name):
+    normalized_url = f"{url}/" if not url.endswith("/") else url
+    artifact_url = urljoin(normalized_url, artifact_name)
+    return {
+        "content": {
+            "url": artifact_url,
+        },
+        "file": os.path.join(directory, artifact_name),
+    }
+
+
+def get_artifact_mounts(urls, directory, artifact_names):
+    for url in urls:
+        artifact_mounts = []
+        for artifact_name in artifact_names:
+            artifact_mounts.append(get_artifact_mount(url, directory, artifact_name))
+        yield artifact_mounts
+
+
+transforms = TransformSequence()
+
+
+@transforms.add
+def add_pretrained_model_mounts(config, jobs):
+    pretrained_models = config.params["training_config"]["experiment"].get("pretrained-models", {})
+    for job in jobs:
+        pretrained_models_training_artifact_mounts = {
+            pretrained_model: get_artifact_mounts(
+                pretrained_models[pretrained_model]["urls"],
+                "./artifacts",
+                INITIALIZE_MODEL_ARTIFACTS
+                if pretrained_models[pretrained_model]["mode"] == "init"
+                else CONTINUE_TRAINING_ARTIFACTS,
+            )
+            for pretrained_model in pretrained_models
+        }
+        pretrained_model_training_artifact_mounts = next(
+            pretrained_models_training_artifact_mounts.get(config.kind, iter((None,)))
+        )
+        if pretrained_model_training_artifact_mounts:
+            mounts = job["worker"].get("mounts", [])
+            mounts.extend(pretrained_model_training_artifact_mounts)
+            job["worker"]["mounts"] = mounts
+            job["dependencies"].pop("train-vocab")
+            job["fetches"].pop("train-vocab")
+        yield job