Skip to content

Commit

Permalink
Changes to make phi-3 tuning work
Browse files Browse the repository at this point in the history
  • Loading branch information
ErikTromp committed May 7, 2024
1 parent eacaf86 commit 1426045
Show file tree
Hide file tree
Showing 4 changed files with 13 additions and 10 deletions.
7 changes: 4 additions & 3 deletions finetune.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ def main():

if noquant:
# Load base model
model = AutoModelForCausalLM.from_pretrained(base_model, device_map={"": 0})
model = AutoModelForCausalLM.from_pretrained(base_model, device_map={"": 0}, trust_remote_code=True)
elif quant8:
quant_config = BitsAndBytesConfig(
load_in_8bit=True,
Expand All @@ -120,7 +120,7 @@ def main():
bnb_8bit_use_double_quant=False
)
# Load base model
model = AutoModelForCausalLM.from_pretrained(base_model, quantization_config=quant_config, device_map={"": 0})
model = AutoModelForCausalLM.from_pretrained(base_model, quantization_config=quant_config, device_map={"": 0}, trust_remote_code=True)
else:
# Set up quantization config
quant_config = BitsAndBytesConfig(
Expand All @@ -130,7 +130,7 @@ def main():
bnb_4bit_use_double_quant=True,
)
# Load base model
model = AutoModelForCausalLM.from_pretrained(base_model, quantization_config=quant_config, device_map={"": 0})
model = AutoModelForCausalLM.from_pretrained(base_model, quantization_config=quant_config, device_map={"": 0}, trust_remote_code=True)

model.config.use_cache = False
model.config.pretraining_tp = 1
Expand All @@ -150,6 +150,7 @@ def main():
r=64,
bias="none",
task_type="CAUSAL_LM",
target_modules = 'all-linear',
)

# Pass quant and lora to trainer
Expand Down
7 changes: 4 additions & 3 deletions finetune_dpo.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ def main():

if noquant:
# Load base model
model = AutoModelForCausalLM.from_pretrained(base_model, device_map={"": 0})
model = AutoModelForCausalLM.from_pretrained(base_model, device_map={"": 0}, trust_remote_code=True)
elif quant8:
quant_config = BitsAndBytesConfig(
load_in_8bit=True,
Expand All @@ -122,7 +122,7 @@ def main():
bnb_8bit_use_double_quant=False
)
# Load base model
model = AutoModelForCausalLM.from_pretrained(base_model, quantization_config=quant_config, device_map={"": 0})
model = AutoModelForCausalLM.from_pretrained(base_model, quantization_config=quant_config, device_map={"": 0}, trust_remote_code=True)
else:
# Set up quantization config
quant_config = BitsAndBytesConfig(
Expand All @@ -132,7 +132,7 @@ def main():
bnb_4bit_use_double_quant=True,
)
# Load base model
model = AutoModelForCausalLM.from_pretrained(base_model, quantization_config=quant_config, device_map={"": 0})
model = AutoModelForCausalLM.from_pretrained(base_model, quantization_config=quant_config, device_map={"": 0}, trust_remote_code=True)

model.config.use_cache = False
model.config.pretraining_tp = 1
Expand All @@ -154,6 +154,7 @@ def main():
r=64,
bias="none",
task_type="CAUSAL_LM",
target_modules='all-linear',
)

# Pass quant and lora to trainer
Expand Down
7 changes: 4 additions & 3 deletions finetune_orpo.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ def main():

if noquant:
# Load base model
model = AutoModelForCausalLM.from_pretrained(base_model, device_map={"": 0})
model = AutoModelForCausalLM.from_pretrained(base_model, device_map={"": 0}, trust_remote_code=True)
elif quant8:
quant_config = BitsAndBytesConfig(
load_in_8bit=True,
Expand All @@ -120,7 +120,7 @@ def main():
bnb_8bit_use_double_quant=False
)
# Load base model
model = AutoModelForCausalLM.from_pretrained(base_model, quantization_config=quant_config, device_map={"": 0})
model = AutoModelForCausalLM.from_pretrained(base_model, quantization_config=quant_config, device_map={"": 0}, trust_remote_code=True)
else:
# Set up quantization config
quant_config = BitsAndBytesConfig(
Expand All @@ -130,7 +130,7 @@ def main():
bnb_4bit_use_double_quant=True,
)
# Load base model
model = AutoModelForCausalLM.from_pretrained(base_model, quantization_config=quant_config, device_map={"": 0})
model = AutoModelForCausalLM.from_pretrained(base_model, quantization_config=quant_config, device_map={"": 0}, trust_remote_code=True)

model.config.use_cache = False
model.config.pretraining_tp = 1
Expand Down Expand Up @@ -162,6 +162,7 @@ def main():
report_to=None,
remove_unused_columns=False,
beta=0.1, # the lambda/alpha hyperparameter in the paper/code
target_modules='all-linear',
)

trainer = ORPOTrainer(
Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
accelerate==0.25.0
peft==0.6.2
peft==0.10.0
bitsandbytes==0.41.2.post2
transformers==4.36.2
trl==0.8.2
Expand Down

0 comments on commit 1426045

Please sign in to comment.