Add hf access_token args

zxdmike · zxdmike · commit 7f5bf66a4f54 · 2024-09-26T18:37:44.000-07:00
diff --git a/README.md b/README.md
@@ -37,7 +37,7 @@ If you find our code useful for your research, please consider citing:
   pip install .
    
 ### 2. Steps to run:
-For the scripts here, set `output_rotation_path` `output_dir` `logging_dir` `optimized_rotation_path` to your own locations.
+For the scripts here, set `output_rotation_path` `output_dir` `logging_dir` `optimized_rotation_path` to your own locations. For gated repo such as meta-llama, you can set your HF token to `access_token`.
 
 Step 1: Optimize Rotation Matrix
 * For LLaMA-2 7B/13B and LLaMA-3 8B models:  
diff --git a/optimize_rotation.py b/optimize_rotation.py
@@ -47,7 +47,9 @@ def train() -> None:
     log.info("the rank is {}".format(local_rank))
     torch.distributed.barrier()
 
-    config = transformers.AutoConfig.from_pretrained(model_args.input_model)
+    config = transformers.AutoConfig.from_pretrained(
+        model_args.input_model, token=model_args.access_token
+    )
 
     # Llama v3.2 specific: Spinquant is not compatiable with tie_word_embeddings, clone lm_head from embed_tokens
     process_word_embeddings = False
@@ -59,6 +61,7 @@ def train() -> None:
         pretrained_model_name_or_path=model_args.input_model,
         config=config,
         torch_dtype=dtype,
+        token=model_args.access_token,
     )
     if process_word_embeddings:
         model.lm_head.weight.data = model.model.embed_tokens.weight.data.clone()
@@ -85,6 +88,7 @@ def train() -> None:
         use_fast=True,
         add_eos_token=False,
         add_bos_token=False,
+        token=model_args.access_token,
     )
     log.info("Complete tokenizer loading...")
     model.config.use_cache = False
diff --git a/ptq.py b/ptq.py
@@ -28,7 +28,9 @@ def train() -> None:
     log.info("the rank is {}".format(local_rank))
     torch.distributed.barrier()
 
-    config = transformers.AutoConfig.from_pretrained(model_args.input_model)
+    config = transformers.AutoConfig.from_pretrained(
+        model_args.input_model, token=model_args.access_token
+    )
     # Llama v3.2 specific: Spinquant is not compatiable with tie_word_embeddings, clone lm_head from embed_tokens
     process_word_embeddings = False
     if config.tie_word_embeddings:
@@ -39,6 +41,7 @@ def train() -> None:
         pretrained_model_name_or_path=model_args.input_model,
         config=config,
         torch_dtype=dtype,
+        token=model_args.access_token,
     )
     if process_word_embeddings:
         model.lm_head.weight.data = model.model.embed_tokens.weight.data.clone()
@@ -57,6 +60,7 @@ def train() -> None:
         use_fast=True,
         add_eos_token=False,
         add_bos_token=False,
+        token=model_args.access_token,
     )
     log.info("Complete tokenizer loading...")
     model.config.use_cache = False
diff --git a/scripts/32_eval_ptq_executorch.sh b/scripts/32_eval_ptq_executorch.sh
@@ -21,5 +21,5 @@ torchrun --nnodes=1 --nproc_per_node=1 ptq.py \
 --a_asym \
 --rotate \
 --optimized_rotation_path "your_path/R.bin" \
---save_qmodel_path "./your_output_model_path/executorch_model.bin" \
+--save_qmodel_path "./your_output_model_path/consolidated.00.pth" \
 --export_to_et
diff --git a/utils/process_args.py b/utils/process_args.py
@@ -26,6 +26,10 @@ class ModelArguments:
     optimized_rotation_path: Optional[str] = field(
         default=None, metadata={"help": "Optimized rotation checkpoint path"}
     )
+    access_token: Optional[str] = field(
+        default=None,
+        metadata={"help": "Huggingface access token to access gated repo like Llama"},
+    )
 
 
 @dataclass

Original file line number	Diff line number	Diff line change
`@@ -26,6 +26,10 @@ class ModelArguments:`
`26`	`26`	`optimized_rotation_path: Optional[str] = field(`
`27`	`27`	`default=None, metadata={"help": "Optimized rotation checkpoint path"}`
`28`	`28`	`)`
	`29`	`+ access_token: Optional[str] = field(`
	`30`	`+ default=None,`
	`31`	`+ metadata={"help": "Huggingface access token to access gated repo like Llama"},`
	`32`	`+ )`
`29`	`33`
`30`	`34`
`31`	`35`	`@dataclass`