dmlc · sxjscience · Jul 30, 2020 · Jul 27, 2020 · Jul 27, 2020 · Jul 27, 2020
diff --git a/scripts/conversion_toolkits/README.md b/scripts/conversion_toolkits/README.md
@@ -12,6 +12,8 @@ The testing step mentioned above are controlled by the flag `--test`, in which t
 tolerance of 1e-3 between gluon model with converted weights and original tensorflow model.
 In addition, we can use GPU in all converting scripts by adding `--gpu 0`.
 
+For RoBERTa XLM-R and BART model, please instal the [fairseq](https://github.com/pytorch/fairseq#requirements-and-installation) package locally before conversion.
+
 ## BERT
 Convert model from [BERT LIST](https://tfhub.dev/google/collections/bert/1).
 
@@ -37,25 +39,42 @@ do
 done
 ```
 
-## RoBERTa
+## ELECTRA
+The TF Hub is not available for ELECTRA model currently.
+Thus, you will need to clone the [electra repository](https://github.com/ZheyuYe/electra)
+and download the checkpoint. The parameters are converted from local checkpoints.
+By running the following command, you can convert + verify the ELECTRA model with both the discriminator and the generator.
+
+Notice: pleas set up the `--electra_path` with the cloned path ~~or get this electra repository packaged by `pip install -e .`.~~
+
+```bash
+# Need to use TF 1.13.2 to use contrib layer
+pip uninstall tensorflow
+pip install tensorflow==1.13.2
+
+# Actual conversion
+bash convert_electra.sh
+```
 
+## Mobile Bert
 ```bash
-pip install fairseq==0.9.0
+bash convert_mobilebert.sh
+```
 
+## RoBERTa
+```bash
 for model in base large
 do
     mkdir roberta_${model}
     wget "https://dl.fbaipublicfiles.com/fairseq/models/roberta.${model}.tar.gz"
     tar zxf roberta.${model}.tar.gz --directory roberta_${model}
-    python convert_fairseq_roberta.py --fairseq_model_path roberta_${model}/roberta.${model} --model_size ${model} --test
+    python convert_fairseq_roberta.py --fairseq_model_path roberta_${model}/roberta.${model} --test
 done
 ```
 
 ## XLM-R
 
 ```bash
-pip install fairseq==0.9.0
-
 for model in base large
 do
     mkdir xlmr_${model}
@@ -65,23 +84,13 @@ do
 done
 ```
 
-## ELECTRA
-The TF Hub is not available for ELECTRA model currently.
-Thus, you will need to clone the [electra repository](https://github.com/ZheyuYe/electra)
-and download the checkpoint. The parameters are converted from local checkpoints.
-By running the following command, you can convert + verify the ELECTRA model with both the discriminator and the generator.
-
-Notice: pleas set up the `--electra_path` with the cloned path or get this electra repository packaged by `pip install -e .`.
-
+## BART
 ```bash
-# Need to use TF 1.13.2 to use contrib layer
-pip install tensorflow==1.13.2 --upgrade --force-reinstall
-
-# Actual conversion
-bash convert_electra.sh
-```
-
-## Mobile Bert
-```bash
-bash convert_mobilebert.sh
+for model in base large
+do  
+    mkdir bart_${model}
+    wget  "https://dl.fbaipublicfiles.com/fairseq/models/bart.${model}.tar.gz"
+    tar zxf bart.${model}.tar.gz --directory bart_${model}
+    python convert_fairseq_bart.py --fairseq_model_path bart_${model}/bart.${model} --test
+done
 ```
diff --git a/scripts/conversion_toolkits/convert_electra.py b/scripts/conversion_toolkits/convert_electra.py
@@ -53,7 +53,9 @@ def read_tf_checkpoint(path):
     return tensors
 
 
-def get_dict_config(model_size, electra_dir):
+def get_dict_config(model_size, electra_path):
+    sys.path.append(electra_path)
+    electra_dir = os.path.abspath(os.path.join(os.path.dirname(electra_path), os.path.pardir))
     sys.path.append(electra_dir)
     from electra.util.training_utils import get_bert_config
     from electra.configure_pretraining import PretrainingConfig
@@ -100,7 +102,7 @@ def convert_tf_config(config_dict, vocab_size):
     return cfg
 
 
-def convert_tf_assets(tf_assets_dir, model_size, electra_dir):
+def convert_tf_assets(tf_assets_dir, model_size, electra_path):
     """Convert the assets file including config, vocab and tokenizer model"""
     file_names = os.listdir(tf_assets_dir)
     vocab_path = None
@@ -113,7 +115,7 @@ def convert_tf_assets(tf_assets_dir, model_size, electra_dir):
     if vocab_path:
         vocab_path = os.path.join(tf_assets_dir, vocab_path)
         vocab_size = len(open(vocab_path, 'rU').readlines())
-    config_dict = get_dict_config(model_size, electra_dir)
+    config_dict = get_dict_config(model_size, electra_path)
     cfg = convert_tf_config(config_dict, vocab_size)
     return cfg, vocab_path
 
@@ -190,12 +192,12 @@ def get_name_map(tf_names, convert_type='backbone'):
     return name_map
 
 
-def convert_tf_model(model_dir, save_dir, test_conversion, model_size, gpu, electra_dir):
+def convert_tf_model(model_dir, save_dir, test_conversion, model_size, gpu, electra_path):
     ctx = mx.gpu(gpu) if gpu is not None else mx.cpu()
     if not os.path.exists(save_dir):
         os.makedirs(save_dir)
 
-    cfg, vocab_path = convert_tf_assets(model_dir, model_size, electra_dir)
+    cfg, vocab_path = convert_tf_assets(model_dir, model_size, electra_path)
     with open(os.path.join(save_dir, 'model.yml'), 'w') as of:
         of.write(cfg.dump())
     new_vocab = HuggingFaceWordPieceTokenizer(
@@ -234,6 +236,8 @@ def convert_tf_model(model_dir, save_dir, test_conversion, model_size, gpu, elec
     tf_names = list(tf_names)
 
     # reload the electra module for this local scope
+    sys.path.append(electra_path)
+    electra_dir = os.path.abspath(os.path.join(os.path.dirname(electra_path), os.path.pardir))
     sys.path.append(electra_dir)
     from electra.util.training_utils import get_bert_config
     from electra.configure_pretraining import PretrainingConfig
@@ -426,11 +430,10 @@ def convert_qkv_weights(tf_prefix, mx_prefix):
     logging_config()
     save_dir = args.save_dir if args.save_dir is not None else os.path.basename(
         args.tf_model_path) + '_gluon'
-    electra_dir = os.path.abspath(os.path.join(os.path.dirname(args.electra_path), os.path.pardir))
     convert_tf_model(
         args.tf_model_path,
         save_dir,
         args.test,
         args.model_size,
         args.gpu,
-        electra_dir)
+        args.electra_path)