Skip to content
This repository has been archived by the owner on Jan 15, 2024. It is now read-only.

[Numpy Refactor] BART #1282

Merged
merged 37 commits into from
Jul 30, 2020
Merged
Show file tree
Hide file tree
Changes from 34 commits
Commits
Show all changes
37 commits
Select commit Hold shift + click to select a range
5274e63
init
zheyuye Jul 27, 2020
886b1ac
fix convert roberta
zheyuye Jul 27, 2020
f6d0d35
rename TransformerNMTModel as TransformerModel
zheyuye Jul 27, 2020
db7d8db
update bart
zheyuye Jul 27, 2020
01d9945
fix
zheyuye Jul 27, 2020
dc31a44
fix
zheyuye Jul 27, 2020
5807642
update init
zheyuye Jul 27, 2020
6bb255e
add layernorm_embedding for transformer
zheyuye Jul 27, 2020
41be41e
convert script
zheyuye Jul 27, 2020
5fa5108
encoder
zheyuye Jul 27, 2020
a06293d
fix
zheyuye Jul 27, 2020
8a112d5
fix vocab
zheyuye Jul 27, 2020
6f8335a
fix roberta
zheyuye Jul 27, 2020
dc1e79b
fix
zheyuye Jul 28, 2020
52121c3
fix electra
zheyuye Jul 28, 2020
be3b2d9
add conversion bash for roberta and xlmr
zheyuye Jul 28, 2020
4afc84f
ELECTRA SETUP
zheyuye Jul 28, 2020
179ee73
convert bart decoder
zheyuye Jul 28, 2020
4c54128
fix
zheyuye Jul 28, 2020
cf1e20d
update
zheyuye Jul 28, 2020
a44b03a
testing output
zheyuye Jul 28, 2020
041bf8d
remove arange_like for embeddings
zheyuye Jul 28, 2020
b47ca86
fix
zheyuye Jul 29, 2020
917dcce
update
zheyuye Jul 29, 2020
0bda11d
use_pooler for bart
zheyuye Jul 29, 2020
e3c61f3
fix
zheyuye Jul 29, 2020
a5a91e0
upload params for bart
zheyuye Jul 29, 2020
3366cf3
add test_models_bart
zheyuye Jul 29, 2020
6c62a29
Merge remote-tracking branch 'upstream/numpy' into bart
zheyuye Jul 29, 2020
5bab516
fix cfg
zheyuye Jul 29, 2020
1f75b26
test bart
zheyuye Jul 29, 2020
e49fbe1
update
zheyuye Jul 29, 2020
d9c4140
fix transformer
zheyuye Jul 29, 2020
a53b9f4
Squashed commit of the following:
zheyuye Jul 29, 2020
cab6282
Squashed commit of the following:
zheyuye Jul 30, 2020
e9db27c
fix comment
zheyuye Jul 30, 2020
6512f9d
use xavier for embedding initializer
zheyuye Jul 30, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
55 changes: 32 additions & 23 deletions scripts/conversion_toolkits/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@ The testing step mentioned above are controlled by the flag `--test`, in which t
tolerance of 1e-3 between gluon model with converted weights and original tensorflow model.
In addition, we can use GPU in all converting scripts by adding `--gpu 0`.

For RoBERTa XLM-R and BART model, please instal the [fairseq](https://github.com/pytorch/fairseq#requirements-and-installation) package locally before conversion.

## BERT
Convert model from [BERT LIST](https://tfhub.dev/google/collections/bert/1).

Expand All @@ -37,25 +39,42 @@ do
done
```

## RoBERTa
## ELECTRA
The TF Hub is not available for ELECTRA model currently.
Thus, you will need to clone the [electra repository](https://github.com/ZheyuYe/electra)
and download the checkpoint. The parameters are converted from local checkpoints.
By running the following command, you can convert + verify the ELECTRA model with both the discriminator and the generator.

Notice: pleas set up the `--electra_path` with the cloned path ~~or get this electra repository packaged by `pip install -e .`.~~

```bash
# Need to use TF 1.13.2 to use contrib layer
pip uninstall tensorflow
pip install tensorflow==1.13.2

# Actual conversion
bash convert_electra.sh
```

## Mobile Bert
```bash
pip install fairseq==0.9.0
bash convert_mobilebert.sh
```

## RoBERTa
```bash
for model in base large
do
mkdir roberta_${model}
wget "https://dl.fbaipublicfiles.com/fairseq/models/roberta.${model}.tar.gz"
tar zxf roberta.${model}.tar.gz --directory roberta_${model}
python convert_fairseq_roberta.py --fairseq_model_path roberta_${model}/roberta.${model} --model_size ${model} --test
python convert_fairseq_roberta.py --fairseq_model_path roberta_${model}/roberta.${model} --test
done
```

## XLM-R

```bash
pip install fairseq==0.9.0

for model in base large
do
mkdir xlmr_${model}
Expand All @@ -65,23 +84,13 @@ do
done
```

## ELECTRA
The TF Hub is not available for ELECTRA model currently.
Thus, you will need to clone the [electra repository](https://github.com/ZheyuYe/electra)
and download the checkpoint. The parameters are converted from local checkpoints.
By running the following command, you can convert + verify the ELECTRA model with both the discriminator and the generator.

Notice: pleas set up the `--electra_path` with the cloned path or get this electra repository packaged by `pip install -e .`.

## BART
```bash
# Need to use TF 1.13.2 to use contrib layer
pip install tensorflow==1.13.2 --upgrade --force-reinstall

# Actual conversion
bash convert_electra.sh
```

## Mobile Bert
```bash
bash convert_mobilebert.sh
for model in base large
do
mkdir bart_${model}
wget "https://dl.fbaipublicfiles.com/fairseq/models/bart.${model}.tar.gz"
tar zxf bart.${model}.tar.gz --directory bart_${model}
python convert_fairseq_bart.py --fairseq_model_path bart_${model}/bart.${model} --test
done
```
17 changes: 10 additions & 7 deletions scripts/conversion_toolkits/convert_electra.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,9 @@ def read_tf_checkpoint(path):
return tensors


def get_dict_config(model_size, electra_dir):
def get_dict_config(model_size, electra_path):
sys.path.append(electra_path)
electra_dir = os.path.abspath(os.path.join(os.path.dirname(electra_path), os.path.pardir))
sys.path.append(electra_dir)
from electra.util.training_utils import get_bert_config
from electra.configure_pretraining import PretrainingConfig
Expand Down Expand Up @@ -100,7 +102,7 @@ def convert_tf_config(config_dict, vocab_size):
return cfg


def convert_tf_assets(tf_assets_dir, model_size, electra_dir):
def convert_tf_assets(tf_assets_dir, model_size, electra_path):
"""Convert the assets file including config, vocab and tokenizer model"""
file_names = os.listdir(tf_assets_dir)
vocab_path = None
Expand All @@ -113,7 +115,7 @@ def convert_tf_assets(tf_assets_dir, model_size, electra_dir):
if vocab_path:
vocab_path = os.path.join(tf_assets_dir, vocab_path)
vocab_size = len(open(vocab_path, 'rU').readlines())
config_dict = get_dict_config(model_size, electra_dir)
config_dict = get_dict_config(model_size, electra_path)
cfg = convert_tf_config(config_dict, vocab_size)
return cfg, vocab_path

Expand Down Expand Up @@ -190,12 +192,12 @@ def get_name_map(tf_names, convert_type='backbone'):
return name_map


def convert_tf_model(model_dir, save_dir, test_conversion, model_size, gpu, electra_dir):
def convert_tf_model(model_dir, save_dir, test_conversion, model_size, gpu, electra_path):
ctx = mx.gpu(gpu) if gpu is not None else mx.cpu()
if not os.path.exists(save_dir):
os.makedirs(save_dir)

cfg, vocab_path = convert_tf_assets(model_dir, model_size, electra_dir)
cfg, vocab_path = convert_tf_assets(model_dir, model_size, electra_path)
with open(os.path.join(save_dir, 'model.yml'), 'w') as of:
of.write(cfg.dump())
new_vocab = HuggingFaceWordPieceTokenizer(
Expand Down Expand Up @@ -234,6 +236,8 @@ def convert_tf_model(model_dir, save_dir, test_conversion, model_size, gpu, elec
tf_names = list(tf_names)

# reload the electra module for this local scope
sys.path.append(electra_path)
electra_dir = os.path.abspath(os.path.join(os.path.dirname(electra_path), os.path.pardir))
sys.path.append(electra_dir)
from electra.util.training_utils import get_bert_config
from electra.configure_pretraining import PretrainingConfig
Expand Down Expand Up @@ -426,11 +430,10 @@ def convert_qkv_weights(tf_prefix, mx_prefix):
logging_config()
save_dir = args.save_dir if args.save_dir is not None else os.path.basename(
args.tf_model_path) + '_gluon'
electra_dir = os.path.abspath(os.path.join(os.path.dirname(args.electra_path), os.path.pardir))
convert_tf_model(
args.tf_model_path,
save_dir,
args.test,
args.model_size,
args.gpu,
electra_dir)
args.electra_path)
Loading