Skip to content

Commit

Permalink
fixes for codeql
Browse files Browse the repository at this point in the history
  • Loading branch information
HuiyingLi committed Dec 15, 2023
1 parent e7476e8 commit 9028555
Show file tree
Hide file tree
Showing 2 changed files with 49 additions and 47 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -69,35 +69,36 @@ def __init__(
raise ImportError(
"megatron-core was not found. Please see the NeMo README for installation instructions: https://github.com/NVIDIA/NeMo#megatron-gpt."
)
super().__init__(config=config, submodules=submodules, layer_number=layer_number)

if hasattr(self.config, 'new_decoder_architecture'):
self.new_decoder_architecture = self.config.new_decoder_architecture
else:
self.new_decoder_architecture = None
if hasattr(self.config, 'parallel_attention'):
self.parallel_attention = self.config.parallel_attention
else:
self.parallel_attention = None
super().__init__(config=config, submodules=submodules, layer_number=layer_number)

if self.new_decoder_architecture or self.parallel_attention:
self.post_self_attn_layernorm = None
else:
self.post_self_attn_layernorm = build_module(
submodules.post_self_attn_layernorm,
config=self.config,
hidden_size=self.config.hidden_size,
eps=self.config.layernorm_epsilon,
)
if self.new_decoder_architecture:
self.pre_mlp_layernorm = build_module(
submodules.pre_mlp_layernorm,
config=self.config,
hidden_size=self.config.hidden_size,
eps=self.config.layernorm_epsilon,
)
else:
self.pre_mlp_layernorm = None
if hasattr(self.config, 'new_decoder_architecture'):
self.new_decoder_architecture = self.config.new_decoder_architecture
else:
self.new_decoder_architecture = None
if hasattr(self.config, 'parallel_attention'):
self.parallel_attention = self.config.parallel_attention
else:
self.parallel_attention = None

if self.new_decoder_architecture or self.parallel_attention:
self.post_self_attn_layernorm = None
else:
self.post_self_attn_layernorm = build_module(
submodules.post_self_attn_layernorm,
config=self.config,
hidden_size=self.config.hidden_size,
eps=self.config.layernorm_epsilon,
)
if self.new_decoder_architecture:
self.pre_mlp_layernorm = build_module(
submodules.pre_mlp_layernorm,
config=self.config,
hidden_size=self.config.hidden_size,
eps=self.config.layernorm_epsilon,
)
else:
self.pre_mlp_layernorm = None

def forward(
self,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,24 +45,25 @@ def get_falcon_layer_spec() -> ModuleSpec:
raise ImportError(
"megatron-core was not found. Please see the NeMo README for installation instructions: https://github.com/NVIDIA/NeMo#megatron-gpt."
)
falcon_submodules = TransformerLayerSubmodules(
input_layernorm=TENorm,
self_attention=ModuleSpec(
module=SelfAttention,
params={"attn_mask_type": AttnMaskType.causal},
submodules=SelfAttentionSubmodules(
linear_qkv=TEColumnParallelLinear,
core_attention=TEDotProductAttention,
linear_proj=TERowParallelLinear,
else:
falcon_submodules = TransformerLayerSubmodules(
input_layernorm=TENorm,
self_attention=ModuleSpec(
module=SelfAttention,
params={"attn_mask_type": AttnMaskType.causal},
submodules=SelfAttentionSubmodules(
linear_qkv=TEColumnParallelLinear,
core_attention=TEDotProductAttention,
linear_proj=TERowParallelLinear,
),
),
),
self_attn_bda=get_bias_dropout_add,
pre_mlp_layernorm=TENorm,
mlp=ModuleSpec(
module=MLP, submodules=MLPSubmodules(linear_fc1=TEColumnParallelLinear, linear_fc2=TERowParallelLinear,),
),
mlp_bda=get_bias_dropout_add,
)
# Old falcon(prior to 7b/40b/180b) uses post_self_attn_layernorm that is not included in TransformerLayerModules.
falcon_submodules.post_self_attn_layernorm = TENorm
return ModuleSpec(module=FalconTransformerLayer, submodules=falcon_submodules)
self_attn_bda=get_bias_dropout_add,
pre_mlp_layernorm=TENorm,
mlp=ModuleSpec(
module=MLP, submodules=MLPSubmodules(linear_fc1=TEColumnParallelLinear, linear_fc2=TERowParallelLinear,),
),
mlp_bda=get_bias_dropout_add,
)
# Old falcon(prior to 7b/40b/180b) uses post_self_attn_layernorm that is not included in TransformerLayerModules.
falcon_submodules.post_self_attn_layernorm = TENorm
return ModuleSpec(module=FalconTransformerLayer, submodules=falcon_submodules)

0 comments on commit 9028555

Please sign in to comment.