-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathopenelm.config
91 lines (90 loc) · 1.42 KB
/
openelm.config
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
OpenELMConfig {
"_name_or_path": "apple/OpenELM-270M",
"activation_fn_name": "swish",
"architectures": [
"OpenELMForCausalLM"
],
"auto_map": {
"AutoConfig": "apple/OpenELM-270M--configuration_openelm.OpenELMConfig",
"AutoModelForCausalLM": "apple/OpenELM-270M--modeling_openelm.OpenELMForCausalLM"
},
"bos_token_id": 1,
"eos_token_id": 2,
"ffn_dim_divisor": 256,
"ffn_multipliers": [
0.5,
0.73,
0.97,
1.2,
1.43,
1.67,
1.9,
2.13,
2.37,
2.6,
2.83,
3.07,
3.3,
3.53,
3.77,
4.0
],
"ffn_with_glu": true,
"head_dim": 64,
"initializer_range": 0.02,
"max_context_length": 2048,
"model_dim": 1280,
"model_type": "openelm",
"normalization_layer_name": "rms_norm",
"normalize_qk_projections": true,
"num_gqa_groups": 4,
"num_kv_heads": [
3,
3,
3,
3,
3,
4,
4,
4,
4,
4,
4,
4,
5,
5,
5,
5
],
"num_query_heads": [
12,
12,
12,
12,
12,
16,
16,
16,
16,
16,
16,
16,
20,
20,
20,
20
],
"num_transformer_layers": 16,
"qkv_multipliers": [
0.5,
1.0
],
"rope_freq_constant": 10000,
"rope_max_length": 4096,
"share_input_output_layers": true,
"torch_dtype": "float32",
"transformers_version": "4.40.1",
"use_cache": true,
"vocab_size": 32000
}
write model to openelm_270M.bin, the keys of model is 130