You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
File "/gpt-neox/train.py", line 34, in <module>
main()
File "/gpt-neox/train.py", line 30, in main
pretrain(neox_args=neox_args)
File "/gpt-neox/megatron/training.py", line 228, in pretrain
iteration = train(
File "/gpt-neox/megatron/training.py", line 913, in train
loss_dict, skipped_iter = train_step(
File "/gpt-neox/megatron/training.py", line 793, in train_step
loss = forward_step(
File "/gpt-neox/megatron/training.py", line 391, in forward_step
maybe_tuple = model((tokens, position_ids, attention_mask), neox_args=neox_args)
File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1520, in _call_impl
return forward_call(*args, **kwargs)
File "/usr/local/lib/python3.10/dist-packages/deepspeed/utils/nvtx.py", line 15, in wrapped_fn
ret_val = func(*args, **kwargs)
File "/usr/local/lib/python3.10/dist-packages/deepspeed/runtime/engine.py", line 1822, in forward
loss = self.module(*inputs, **kwargs)
File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1520, in _call_impl
return forward_call(*args, **kwargs)
File "/gpt-neox/megatron/model/utils.py", line 190, in forward
x = func(forward_input)
File "/gpt-neox/megatron/model/utils.py", line 181, in exec_func
inputs = layer(inputs)
File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1520, in _call_impl
return forward_call(*args, **kwargs)
File "/gpt-neox/megatron/model/transformer.py", line 1167, in forward
output, moe_loss = super().forward(hidden_states, attention_mask)
File "/gpt-neox/megatron/model/transformer.py", line 1155, in forward
return output, moe_loss
Running pythia 14M on master:
The
moe_loss
variable is not defined inside theforward()
forParallelTransformerLayer
, in the gpt-j-residual==true branchThe variable was introduced when #1129 was merged. I am not too familiar with MoE, maybe @yang can comment on this?
The text was updated successfully, but these errors were encountered: