diff --git a/.ci/scripts/gather_test_models.py b/.ci/scripts/gather_test_models.py index d02213b9faf..515bc97cca3 100755 --- a/.ci/scripts/gather_test_models.py +++ b/.ci/scripts/gather_test_models.py @@ -90,7 +90,7 @@ def model_should_run_on_event(model: str, event: str) -> bool: We put higher priority and fast models to pull request and rest to push. """ if event == "pull_request": - return model in ["mv3", "vit"] + return model in ["mv3", "vit", "qwen2_5"] # TODO: remove, just to test the ci elif event == "push": # These are super slow. Only run it periodically return model not in ["dl3", "edsr", "emformer_predict"] diff --git a/.ci/scripts/test_model.sh b/.ci/scripts/test_model.sh index 157449c0717..054ac02bc07 100755 --- a/.ci/scripts/test_model.sh +++ b/.ci/scripts/test_model.sh @@ -91,7 +91,17 @@ test_model() { # Install requirements for llama vision. bash examples/models/llama3_2_vision/install_requirements.sh fi - # python3 -m examples.portable.scripts.export --model_name="llama2" should works too + if [[ "${MODEL_NAME}" == "qwen2_5" ]]; then + # Install requirements for export_llama + bash examples/models/llama/install_requirements.sh + # Test export_llama script: python3 -m examples.models.llama.export_llama. + # Use Llama random checkpoint with Qwen 2.5 1.5b model configuration. + "${PYTHON_EXECUTABLE}" -m examples.models.llama.export_llama --model "${MODEL_NAME}" -c examples/models/llama/params/demo_rand_params.pth -p examples/models/qwen2_5/1_5b_config.json + rm "./${MODEL_NAME}.pte" + return # Skip running with portable executor runnner since portable doesn't support Qwen's biased linears. + fi + + # Export a basic .pte and run the model. "${PYTHON_EXECUTABLE}" -m examples.portable.scripts.export --model_name="${MODEL_NAME}" "${STRICT}" run_portable_executor_runner } diff --git a/examples/models/__init__.py b/examples/models/__init__.py index 822d55fc09d..55f5c449ca2 100644 --- a/examples/models/__init__.py +++ b/examples/models/__init__.py @@ -34,6 +34,7 @@ "resnet50": ("resnet", "ResNet50Model"), "llava": ("llava", "LlavaModel"), "efficient_sam": ("efficient_sam", "EfficientSAM"), + "qwen2_5": ("qwen2_5", "Qwen2_5Model"), } __all__ = [ diff --git a/examples/models/llama/export_llama_lib.py b/examples/models/llama/export_llama_lib.py index 4ad92903534..6d9ba750431 100644 --- a/examples/models/llama/export_llama_lib.py +++ b/examples/models/llama/export_llama_lib.py @@ -84,6 +84,7 @@ verbosity_setting = None +# All models that leverage the transformer architecture defined in llama_transformer.py. EXECUTORCH_DEFINED_MODELS = [ "stories110m", "llama2", @@ -91,6 +92,7 @@ "llama3_1", "llama3_2", "static_llama", + "qwen2_5", ] TORCHTUNE_DEFINED_MODELS = ["llama3_2_vision"] diff --git a/examples/models/llama/model.py b/examples/models/llama/model.py index 90582af4856..bc4fd6ccb11 100644 --- a/examples/models/llama/model.py +++ b/examples/models/llama/model.py @@ -236,14 +236,23 @@ def __init__(self, **kwargs): eviction_batch_size=eviction_batch_size, ) - # assign=True: load params/buffers by assignment instead of performing an in-place copy. - # Because we are using device="meta", tensors do not have memory associated with them - # and an in-place copy is a no-op. Use assign=True in load_state_dict for this scenario. - missing, unexpected = self.model_.load_state_dict( - checkpoint, - strict=False, - assign=True, - ) # self.model_ = Transformer(gptconf) + missing, unexpected = None, None + try: + # assign=True: load params/buffers by assignment instead of performing an in-place copy. + # Because we are using device="meta", tensors do not have memory associated with them + # and an in-place copy is a no-op. Use assign=True in load_state_dict for this scenario. + missing, unexpected = self.model_.load_state_dict( + checkpoint, + strict=False, + assign=True, + ) # self.model_ = Transformer(gptconf) + except RuntimeError as e: + print( + "Could not load checkpoint into mode, defaulting to random uninitialized weights." + ) + print(f"Error: {e}") + # Need to provide concrete (empty) values for meta-initialized tensors for quantization. + self.model_.to_empty(device="cpu") if missing: missing_weights = [fqn for fqn in missing if fqn.endswith(".weight")] diff --git a/examples/models/qwen2_5/__init__.py b/examples/models/qwen2_5/__init__.py new file mode 100644 index 00000000000..d86a97a114d --- /dev/null +++ b/examples/models/qwen2_5/__init__.py @@ -0,0 +1,14 @@ +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +from executorch.example.models.llama.model import Llama2Model + + +class Qwen2_5Model(Llama2Model): + def __init__(self, **kwargs): + super().__init__(**kwargs) + + +__all__ = [ + "Qwen2_5Model", +] diff --git a/examples/models/qwen2_5/convert_weights.py b/examples/models/qwen2_5/convert_weights.py index 6b6c0bbdfe2..9aada5b3e90 100644 --- a/examples/models/qwen2_5/convert_weights.py +++ b/examples/models/qwen2_5/convert_weights.py @@ -68,7 +68,6 @@ def main(): # Don't necessarily need to use TorchTune checkpointer, can just aggregate checkpoint files by ourselves. checkpointer = FullModelHFCheckpointer( - # checkpoint_dir="/home/jackzhxng/.cache/huggingface/hub/models--Qwen--Qwen2.5-1.5B/snapshots/8faed761d45a263340a0528343f099c05c9a4323/", checkpoint_dir=args.input_dir, checkpoint_files=["model.safetensors"], output_dir=".", @@ -80,7 +79,6 @@ def main(): print("Converting checkpoint...") sd = qwen_2_tune_to_meta(sd["model"]) - # torch.save(sd, "/home/jackzhxng/models/qwen2_5-1_5b.pth") torch.save(sd, args.output) print(f"Checkpoint saved to {args.output}")