Merge pull request #27 from dtnewman/main

bugfix in llm setup
risingsunomi · Oct 15, 2024 · fa24f46 · fa24f46
2 parents d5a02be + e7470b1
commit fa24f46
Showing 1 changed file with 6 additions and 5 deletions.
diff --git a/exo/inference/torch/model/hf.py b/exo/inference/torch/model/hf.py
@@ -92,17 +92,18 @@ def __init__(
         # this is needed because shard downloader just
         # appends and not redownloads the file
         os.remove(self.model_safetensors_path)
+
+        self.llm_model = AutoModelForCausalLM.from_config(self.llm_model_config).to(self.device)
+        self.model = self.llm_model.model.to(self.device)
       else:
-        self.llm_model_config = AutoConfig.from_pretrained(
+        self.llm_model = AutoModelForCausalLM.from_pretrained(
           pretrained_model_name_or_path=self.local_model_path,
           torch_dtype=self.dtype,
           device_map=self.device_map,
           offload_buffers=self.offload_buffers
         )
-
-      self.llm_model = AutoModelForCausalLM.from_config(self.llm_model_config).to(self.device)
-
-      self.model = self.llm_model.model.to(self.device)
+        self.model = self.llm_model.model
+
     except Exception as err:
       print(f"error loading and splitting model: {err}")
       raise