Skip to content

Commit d5a222e

Browse files
authored
Dev update (Oneflow-Inc#472)
* update doc in projects * rename: model_utils -> model_loader * update comment in preprocess_data.py * add conv1d layer to libai * refine * refine * add dist infer gpt * refine model test and update Features.md * refine train_net.py * refine models datasets * refine * Recommended mmmap data preprocess method * reformat * remove whitespace * modify accord comments
1 parent da6dcf6 commit d5a222e

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

52 files changed

+417
-41
lines changed

configs/bert_large_pretrain.py

+2
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@
1212
tokenization.tokenizer.vocab_file = vocab_file
1313
dataloader.train.dataset[0].data_prefix = data_prefix
1414
dataloader.train.dataset[0].indexed_dataset.data_prefix = data_prefix
15+
dataloader.test[0].dataset.data_prefix = data_prefix
16+
dataloader.test[0].dataset.indexed_dataset.data_prefix = data_prefix
1517

1618
# Bert-large model config
1719
model.cfg.num_attention_heads = 16

configs/common/data/bert_dataset.py

+22
Original file line numberDiff line numberDiff line change
@@ -42,3 +42,25 @@
4242
weights=[1.0],
4343
num_workers=4,
4444
)
45+
46+
dataloader.test = [
47+
LazyCall(build_nlp_test_loader)(
48+
dataset=LazyCall(BertDataset)(
49+
name="bert",
50+
data_prefix="/workspace/data/libai_dataset/loss_compara_content_sentence",
51+
indexed_dataset=LazyCall(get_indexed_dataset)(
52+
data_prefix="/workspace/data/libai_dataset/loss_compara_content_sentence",
53+
data_impl="mmap",
54+
skip_warmup=False,
55+
),
56+
max_num_samples=10,
57+
max_seq_length=512,
58+
mask_lm_prob=0.15,
59+
short_seq_prob=0.1,
60+
binary_head=True,
61+
seed=1234,
62+
masking_style="bert-cn-wwm",
63+
),
64+
test_batch_size=4,
65+
)
66+
]

configs/common/data/gpt_dataset.py

+18
Original file line numberDiff line numberDiff line change
@@ -39,3 +39,21 @@
3939
weights=[1.0],
4040
num_workers=4,
4141
)
42+
43+
dataloader.test = [
44+
LazyCall(build_nlp_test_loader)(
45+
dataset=LazyCall(GPT2Dataset)(
46+
name="gpt-2",
47+
data_prefix="/workspace/data/libai_dataset/loss_compara_content_sentence",
48+
indexed_dataset=LazyCall(get_indexed_dataset)(
49+
data_prefix="/workspace/data/libai_dataset/loss_compara_content_sentence",
50+
data_impl="mmap",
51+
skip_warmup=False,
52+
),
53+
max_seq_length=1024,
54+
max_num_samples=10,
55+
seed=1234,
56+
),
57+
test_batch_size=4,
58+
)
59+
]

configs/common/data/roberta_dataset.py

+22-1
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@
2727
data_impl="mmap",
2828
skip_warmup=False,
2929
),
30-
max_seq_length=512,
30+
max_seq_length=514,
3131
mask_lm_prob=0.15,
3232
short_seq_prob=0.0,
3333
seed=1234,
@@ -39,3 +39,24 @@
3939
weights=[1.0],
4040
num_workers=4,
4141
)
42+
43+
dataloader.test = [
44+
LazyCall(build_nlp_test_loader)(
45+
dataset=LazyCall(RobertaDataset)(
46+
name="roberta",
47+
data_prefix="/workspace/data/libai_dataset/loss_compara_content_sentence",
48+
indexed_dataset=LazyCall(get_indexed_dataset)(
49+
data_prefix="/workspace/data/libai_dataset/loss_compara_content_sentence",
50+
data_impl="mmap",
51+
skip_warmup=False,
52+
),
53+
max_num_samples=10,
54+
max_seq_length=514,
55+
mask_lm_prob=0.15,
56+
short_seq_prob=0.1,
57+
seed=1234,
58+
masking_style="bert",
59+
),
60+
test_batch_size=4,
61+
)
62+
]

configs/common/data/t5_dataset.py

+22-1
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@
3333
name="t5",
3434
data_prefix="/workspace/data/libai_dataset/loss_compara_content_sentence",
3535
indexed_dataset=LazyCall(get_indexed_dataset)(
36-
data_prefix="/workspace/data/libai_dataset/" "/loss_compara_content_sentence",
36+
data_prefix="/workspace/data/libai_dataset/loss_compara_content_sentence",
3737
data_impl="mmap",
3838
skip_warmup=False,
3939
),
@@ -49,3 +49,24 @@
4949
weights=[1.0],
5050
num_workers=4,
5151
)
52+
53+
dataloader.test = [
54+
LazyCall(build_nlp_test_loader)(
55+
dataset=LazyCall(T5Dataset)(
56+
name="t5",
57+
data_prefix="/workspace/data/libai_dataset/loss_compara_content_sentence",
58+
indexed_dataset=LazyCall(get_indexed_dataset)(
59+
data_prefix="/workspace/data/libai_dataset/loss_compara_content_sentence",
60+
data_impl="mmap",
61+
skip_warmup=False,
62+
),
63+
max_num_samples=10,
64+
max_seq_length=512,
65+
max_seq_length_dec=128,
66+
masked_lm_prob=0.15,
67+
short_seq_prob=0.1,
68+
seed=1234,
69+
),
70+
test_batch_size=4,
71+
)
72+
]

configs/gpt2_pretrain.py

+2
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@
1515
tokenization.tokenizer.merges_file = merge_files
1616
dataloader.train.dataset[0].data_prefix = data_prefix
1717
dataloader.train.dataset[0].indexed_dataset.data_prefix = data_prefix
18+
dataloader.test[0].dataset.data_prefix = data_prefix
19+
dataloader.test[0].dataset.indexed_dataset.data_prefix = data_prefix
1820

1921
# GPT-2 model config
2022
model.cfg.embedding_dropout_prob = 0.1

configs/roberta_pretrain.py

+2
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@
1515
tokenization.tokenizer.merges_file = merge_files
1616
dataloader.train.dataset[0].data_prefix = data_prefix
1717
dataloader.train.dataset[0].indexed_dataset.data_prefix = data_prefix
18+
dataloader.test[0].dataset.data_prefix = data_prefix
19+
dataloader.test[0].dataset.indexed_dataset.data_prefix = data_prefix
1820

1921
# RoBERTa model config
2022
model.cfg.num_attention_heads = 12

configs/t5_large_pretrain.py

+2
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,8 @@
1313
tokenization.tokenizer.vocab_file = vocab_file
1414
dataloader.train.dataset[0].data_prefix = data_prefix
1515
dataloader.train.dataset[0].indexed_dataset.data_prefix = data_prefix
16+
dataloader.test[0].dataset.data_prefix = data_prefix
17+
dataloader.test[0].dataset.indexed_dataset.data_prefix = data_prefix
1618

1719
# T5-large model config
1820
model.cfg.num_attention_heads = 12

dev/model_loader_test.sh

+8-8
Original file line numberDiff line numberDiff line change
@@ -7,20 +7,20 @@ export TEST_OUTPUT=output_unittest
77
export ONEFLOW_TEST_DEVICE_NUM=4
88
export ONEFLOW_EP_CUDA_ENABLE_TF32_EXECUTION=0
99

10-
python3 -m oneflow.distributed.launch --nproc_per_node 4 -m pytest -s --disable-warnings tests/model_utils/test_bert_loader.py
10+
python3 -m oneflow.distributed.launch --nproc_per_node 4 -m pytest -s --disable-warnings tests/model_loader/test_bert_loader.py
1111

12-
python3 -m oneflow.distributed.launch --nproc_per_node 4 -m pytest -s --disable-warnings tests/model_utils/test_roberta_loader.py
12+
python3 -m oneflow.distributed.launch --nproc_per_node 4 -m pytest -s --disable-warnings tests/model_loader/test_roberta_loader.py
1313

14-
python3 -m oneflow.distributed.launch --nproc_per_node 4 -m pytest -s --disable-warnings tests/model_utils/test_gpt_loader.py
14+
python3 -m oneflow.distributed.launch --nproc_per_node 4 -m pytest -s --disable-warnings tests/model_loader/test_gpt_loader.py
1515

16-
python3 -m oneflow.distributed.launch --nproc_per_node 4 -m pytest -s --disable-warnings tests/model_utils/test_mt5_loader.py
16+
python3 -m oneflow.distributed.launch --nproc_per_node 4 -m pytest -s --disable-warnings tests/model_loader/test_mt5_loader.py
1717

18-
python3 -m oneflow.distributed.launch --nproc_per_node 4 -m pytest -s --disable-warnings tests/model_utils/test_t5_loader.py
18+
python3 -m oneflow.distributed.launch --nproc_per_node 4 -m pytest -s --disable-warnings tests/model_loader/test_t5_loader.py
1919

20-
python3 -m oneflow.distributed.launch --nproc_per_node 4 -m pytest -s --disable-warnings tests/model_utils/test_swin_loader.py
20+
python3 -m oneflow.distributed.launch --nproc_per_node 4 -m pytest -s --disable-warnings tests/model_loader/test_swin_loader.py
2121

22-
python3 -m oneflow.distributed.launch --nproc_per_node 4 -m pytest -s --disable-warnings tests/model_utils/test_swinv2_loader.py
22+
python3 -m oneflow.distributed.launch --nproc_per_node 4 -m pytest -s --disable-warnings tests/model_loader/test_swinv2_loader.py
2323

24-
python3 -m oneflow.distributed.launch --nproc_per_node 4 -m pytest -s --disable-warnings tests/model_utils/test_vit_loader.py
24+
python3 -m oneflow.distributed.launch --nproc_per_node 4 -m pytest -s --disable-warnings tests/model_loader/test_vit_loader.py
2525

2626
rm -rf $TEST_OUTPUT

docs/source/notes/How_to_load_huggingface's_pretrained_model_in_libai.md

+2-2
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ bert = loader.load()
5656
# Use Custom ModelLoader
5757

5858
## Model Loader for HuggerFace
59-
If you want to define your own HuggerFace's model loader, you can inherit the base `ModelLoaderHuggerFace` in `libai.models.utils.model_utils.base_loader`.
59+
If you want to define your own HuggerFace's model loader, you can inherit the base `ModelLoaderHuggerFace` in `libai.models.utils.model_loader.base_loader`.
6060

6161
Then you need to overwrite the `_convert_state_dict` and `_load_config_from_json` method to load HuggingFace's pretrained model in LiBai.
6262

@@ -99,7 +99,7 @@ class ToyModelLoaderHuggerFace(ModelLoaderHuggerFace):
9999
```
100100

101101
## Model Loader for LiBai
102-
If you want to define your own LiBai's model loader, you can inherit the base `ModelLoaderLiBai` class in `libai.models.utils.model_utils.base_loader`.
102+
If you want to define your own LiBai's model loader, you can inherit the base `ModelLoaderLiBai` class in `libai.models.utils.model_loader.base_loader`.
103103

104104
You just need to set `base_model_prefix_2` argument to load LiBai's pretrained model.
105105

docs/source/tutorials/basics/Features.md

-2
Original file line numberDiff line numberDiff line change
@@ -112,8 +112,6 @@ Unlike normal data parallelism, where model states and gradients are replicated
112112

113113
- Level 2: The reduced 32-bit gradients for updating the model weights are also partitioned so that each process retains only the gradients corresponding to its portion of the optimizer states.
114114

115-
> **Note:** ZeRO only supports data parallel and pipeline parallel, or the combination of them. If you use tensor parallel in your training, make sure ZeRO is disabled.
116-
117115
### Usage
118116

119117
```python

docs/source/tutorials/basics/Preprocessing_Dataset.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ Then, Process the JSON file into a binary format for training. To conver the jso
1818
```bash
1919
#!/bin/bash
2020

21-
IMPL=lazy
21+
IMPL=mmap
2222
KEYS=text
2323

2424
python tools/preprocess_data.py \

libai/inference/basic.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -123,7 +123,7 @@ def load_pretrain_weight(
123123
set it to `random` for quickly debugging by random initialized model
124124
"""
125125
if mode == "libai":
126-
from libai.models.utils.model_utils.base_loader import ModelLoaderLiBai
126+
from libai.models.utils.model_loader.base_loader import ModelLoaderLiBai
127127

128128
model_loader = ModelLoaderLiBai(libai_cfg_model, libai_cfg_model.cfg, model_path)
129129
model_loader.base_model_prefix_1 = None

libai/layers/__init__.py

+2
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
from .embedding import Embedding, SinePositionalEmbedding, VocabEmbedding, PatchEmbedding
1919
from .layer_norm import LayerNorm, RMSLayerNorm
2020
from .linear import Linear, Linear1D
21+
from .conv import Conv1D
2122
from .lm_logits import LMLogits
2223
from .mlp import MLP
2324
from .transformer_layer import TransformerLayer
@@ -32,6 +33,7 @@
3233
"build_activation",
3334
"Linear",
3435
"Linear1D",
36+
"Conv1D",
3537
"MLP",
3638
"LayerNorm",
3739
"RMSLayerNorm",

libai/layers/conv.py

+127
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,127 @@
1+
# coding=utf-8
2+
# Copyright 2021 The OneFlow Authors. All rights reserved.
3+
#
4+
# Licensed under the Apache License, Version 2.0 (the "License");
5+
# you may not use this file except in compliance with the License.
6+
# You may obtain a copy of the License at
7+
#
8+
# http://www.apache.org/licenses/LICENSE-2.0
9+
#
10+
# Unless required by applicable law or agreed to in writing, software
11+
# distributed under the License is distributed on an "AS IS" BASIS,
12+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
# See the License for the specific language governing permissions and
14+
# limitations under the License.
15+
16+
17+
import os
18+
19+
import oneflow as flow
20+
from oneflow import nn
21+
22+
from libai.utils import distributed as dist
23+
24+
25+
class Conv1D(nn.Module):
26+
def __init__(
27+
self,
28+
in_features,
29+
out_features,
30+
bias=True,
31+
parallel="data",
32+
init_method=nn.init.xavier_normal_,
33+
skip_bias_add=False,
34+
dtype=flow.float32,
35+
*,
36+
layer_idx=0,
37+
):
38+
super().__init__()
39+
self.in_features = in_features
40+
self.out_features = out_features
41+
self.parallel = parallel
42+
self.skip_bias_add = skip_bias_add
43+
44+
if parallel == "col":
45+
weight_sbp = dist.get_nd_sbp([flow.sbp.broadcast, flow.sbp.split(1)])
46+
bias_sbp = dist.get_nd_sbp([flow.sbp.broadcast, flow.sbp.broadcast])
47+
48+
elif parallel == "row":
49+
weight_sbp = dist.get_nd_sbp([flow.sbp.broadcast, flow.sbp.split(0)])
50+
bias_sbp = dist.get_nd_sbp([flow.sbp.broadcast, flow.sbp.split(0)])
51+
52+
elif parallel == "data":
53+
weight_sbp = dist.get_nd_sbp([flow.sbp.broadcast, flow.sbp.broadcast])
54+
bias_sbp = dist.get_nd_sbp([flow.sbp.broadcast, flow.sbp.broadcast])
55+
56+
else:
57+
raise KeyError(f"{parallel} is not supported! Only support ('data', 'row' and 'col')")
58+
59+
self.weight = flow.nn.Parameter(
60+
flow.empty(
61+
(in_features, out_features),
62+
dtype=dtype,
63+
placement=dist.get_layer_placement(layer_idx), # for pipeline parallelism placement
64+
sbp=weight_sbp,
65+
)
66+
)
67+
if os.getenv("ONEFLOW_LINEAR_EMBEDDING_SKIP_INIT", "0") != "1":
68+
init_method(self.weight)
69+
70+
self.bias = (
71+
flow.nn.Parameter(
72+
flow.zeros(
73+
(out_features,),
74+
dtype=dtype,
75+
placement=dist.get_layer_placement(layer_idx),
76+
sbp=bias_sbp,
77+
)
78+
)
79+
if bias
80+
else None
81+
)
82+
83+
def forward(self, x):
84+
if dist.same_sbp(self.weight.sbp, dist.get_nd_sbp([flow.sbp.broadcast, flow.sbp.split(1)])):
85+
if self.weight.sbp[-1] == flow.sbp.split(1):
86+
x_sbp = x.sbp[:-1] + (flow.sbp.broadcast,)
87+
x = x.to_global(sbp=x_sbp)
88+
89+
x = x.to_global(grad_sbp=x.sbp)
90+
x = flow.matmul(x, self.weight)
91+
92+
elif dist.same_sbp(
93+
self.weight.sbp, dist.get_nd_sbp([flow.sbp.broadcast, flow.sbp.split(0)])
94+
):
95+
if self.weight.sbp[-1] == flow.sbp.split(0):
96+
x_sbp = x.sbp[:-1] + (flow.sbp.split(x.ndim - 1),)
97+
x = x.to_global(sbp=x_sbp)
98+
out_sbp = x.sbp[:-1] + (flow.sbp.broadcast,)
99+
else:
100+
out_sbp = x.sbp
101+
102+
x = flow.matmul(x, self.weight)
103+
x = x.to_global(sbp=out_sbp)
104+
105+
elif dist.same_sbp(
106+
self.weight.sbp, dist.get_nd_sbp([flow.sbp.broadcast, flow.sbp.broadcast])
107+
):
108+
x = x.to_global(grad_sbp=x.sbp)
109+
x = flow.matmul(x, self.weight)
110+
else:
111+
x = flow.matmul(x, self.weight)
112+
113+
if self.bias is not None:
114+
if self.skip_bias_add:
115+
return x, self.bias
116+
else:
117+
return x + self.bias
118+
else:
119+
return x
120+
121+
def extra_repr(self) -> str:
122+
return "in_features={}, out_features={}, bias={}, parallel={}".format(
123+
self.in_features,
124+
self.out_features,
125+
self.bias is not None,
126+
self.parallel,
127+
)

libai/models/utils/__init__.py

+7-7
Original file line numberDiff line numberDiff line change
@@ -15,10 +15,10 @@
1515

1616
from .graph_base import GraphBase
1717
from .weight_init import init_method_normal, scaled_init_method_normal
18-
from .model_utils.base_loader import ModelLoaderHuggerFace, ModelLoaderLiBai
19-
from .model_utils.bert_loader import BertLoaderHuggerFace, BertLoaderLiBai
20-
from .model_utils.roberta_loader import RobertaLoaderHuggerFace, RobertaLoaderLiBai
21-
from .model_utils.gpt_loader import GPT2LoaderHuggerFace, GPT2LoaderLiBai
22-
from .model_utils.swin_loader import SwinLoaderHuggerFace, SwinLoaderLiBai
23-
from .model_utils.swinv2_loader import SwinV2LoaderHuggerFace, SwinV2LoaderLiBai
24-
from .model_utils.vit_loader import ViTLoaderHuggerFace, ViTLoaderLiBai
18+
from .model_loader.base_loader import ModelLoaderHuggerFace, ModelLoaderLiBai
19+
from .model_loader.bert_loader import BertLoaderHuggerFace, BertLoaderLiBai
20+
from .model_loader.roberta_loader import RobertaLoaderHuggerFace, RobertaLoaderLiBai
21+
from .model_loader.gpt_loader import GPT2LoaderHuggerFace, GPT2LoaderLiBai
22+
from .model_loader.swin_loader import SwinLoaderHuggerFace, SwinLoaderLiBai
23+
from .model_loader.swinv2_loader import SwinV2LoaderHuggerFace, SwinV2LoaderLiBai
24+
from .model_loader.vit_loader import ViTLoaderHuggerFace, ViTLoaderLiBai

libai/models/utils/model_utils/base_loader.py libai/models/utils/model_loader/base_loader.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -320,7 +320,7 @@ def load(self):
320320
321321
>>> import libai
322322
>>> from libai.config.configs.common.models.bert import cfg
323-
>>> from model_utils import BertLoaderLiBai
323+
>>> from model_loader import BertLoaderLiBai
324324
325325
>>> loder = BertLoaderLiBai(
326326
libai.models.BertModel,

0 commit comments

Comments
 (0)