-
Notifications
You must be signed in to change notification settings - Fork 260
/
Copy pathllama-7b.yaml
67 lines (61 loc) · 1.11 KB
/
llama-7b.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
# Model
alg_name: "SERAC"
archive: ./results/models/SERAC/llama-2-7b
device: 1
model_name: ./hugging_cache/llama-2-7b
model_class: LlamaForCausalLM
small_name: ./hugging_cache/llama-160m
tokenizer_class: LlamaTokenizer
tokenizer_name: ./hugging_cache/llama-2-7b
cls_name: ./hugging_cache/distilbert-base-cased
cls_class: AutoModel
inner_params: []
model_parallel: false
# Method
alg: SERAC
lr: 1e-5
edit_lr: 1e-2
seed: 0
lr_lr: 0.0
cedit: 0.1
cloc: 1.0
cbase: 1.0
dropout: 0.0
final_eval: True
supervised: false
train_base: False
no_grad_layers: null
soft_weighting: false
checkpoint_grad: false
cross_attend: false
cos: false
freeze: null
square: true
bound_embeds: false
use_all_negatives: false
freeze_cntr: false
dist_heads: 1
lora: null
batch_size: 1
model_save_pt: 500
edit_bs: 1
silent: False
#max_epochs: 1
max_iters: 10000
log_interval: 500
val_interval: 500
early_stop_patience: 40000
early_stop_key: "loss/total_edit_val"
eval_only: False
half: False
save: False
debug: False
log_errors: False
unlikelihood: True
val_batch_size: 1
accumulate_bs: 10
val_steps: 500
opt: Adam
grad_clip: 100.
# Output
results_dir: ./results