Skip to content

Commit 7f5bf66

Browse files
committed
Add hf access_token args
1 parent 40f43ea commit 7f5bf66

File tree

5 files changed

+16
-4
lines changed

5 files changed

+16
-4
lines changed

README.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ If you find our code useful for your research, please consider citing:
3737
pip install .
3838

3939
### 2. Steps to run:
40-
For the scripts here, set `output_rotation_path` `output_dir` `logging_dir` `optimized_rotation_path` to your own locations.
40+
For the scripts here, set `output_rotation_path` `output_dir` `logging_dir` `optimized_rotation_path` to your own locations. For gated repo such as meta-llama, you can set your HF token to `access_token`.
4141

4242
Step 1: Optimize Rotation Matrix
4343
* For LLaMA-2 7B/13B and LLaMA-3 8B models:

optimize_rotation.py

+5-1
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,9 @@ def train() -> None:
4747
log.info("the rank is {}".format(local_rank))
4848
torch.distributed.barrier()
4949

50-
config = transformers.AutoConfig.from_pretrained(model_args.input_model)
50+
config = transformers.AutoConfig.from_pretrained(
51+
model_args.input_model, token=model_args.access_token
52+
)
5153

5254
# Llama v3.2 specific: Spinquant is not compatiable with tie_word_embeddings, clone lm_head from embed_tokens
5355
process_word_embeddings = False
@@ -59,6 +61,7 @@ def train() -> None:
5961
pretrained_model_name_or_path=model_args.input_model,
6062
config=config,
6163
torch_dtype=dtype,
64+
token=model_args.access_token,
6265
)
6366
if process_word_embeddings:
6467
model.lm_head.weight.data = model.model.embed_tokens.weight.data.clone()
@@ -85,6 +88,7 @@ def train() -> None:
8588
use_fast=True,
8689
add_eos_token=False,
8790
add_bos_token=False,
91+
token=model_args.access_token,
8892
)
8993
log.info("Complete tokenizer loading...")
9094
model.config.use_cache = False

ptq.py

+5-1
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,9 @@ def train() -> None:
2828
log.info("the rank is {}".format(local_rank))
2929
torch.distributed.barrier()
3030

31-
config = transformers.AutoConfig.from_pretrained(model_args.input_model)
31+
config = transformers.AutoConfig.from_pretrained(
32+
model_args.input_model, token=model_args.access_token
33+
)
3234
# Llama v3.2 specific: Spinquant is not compatiable with tie_word_embeddings, clone lm_head from embed_tokens
3335
process_word_embeddings = False
3436
if config.tie_word_embeddings:
@@ -39,6 +41,7 @@ def train() -> None:
3941
pretrained_model_name_or_path=model_args.input_model,
4042
config=config,
4143
torch_dtype=dtype,
44+
token=model_args.access_token,
4245
)
4346
if process_word_embeddings:
4447
model.lm_head.weight.data = model.model.embed_tokens.weight.data.clone()
@@ -57,6 +60,7 @@ def train() -> None:
5760
use_fast=True,
5861
add_eos_token=False,
5962
add_bos_token=False,
63+
token=model_args.access_token,
6064
)
6165
log.info("Complete tokenizer loading...")
6266
model.config.use_cache = False

scripts/32_eval_ptq_executorch.sh

+1-1
Original file line numberDiff line numberDiff line change
@@ -21,5 +21,5 @@ torchrun --nnodes=1 --nproc_per_node=1 ptq.py \
2121
--a_asym \
2222
--rotate \
2323
--optimized_rotation_path "your_path/R.bin" \
24-
--save_qmodel_path "./your_output_model_path/executorch_model.bin" \
24+
--save_qmodel_path "./your_output_model_path/consolidated.00.pth" \
2525
--export_to_et

utils/process_args.py

+4
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,10 @@ class ModelArguments:
2626
optimized_rotation_path: Optional[str] = field(
2727
default=None, metadata={"help": "Optimized rotation checkpoint path"}
2828
)
29+
access_token: Optional[str] = field(
30+
default=None,
31+
metadata={"help": "Huggingface access token to access gated repo like Llama"},
32+
)
2933

3034

3135
@dataclass

0 commit comments

Comments
 (0)