Lightning-AI · lantiga · May 10, 2023 · May 10, 2023 · May 10, 2023 · May 10, 2023
@@ -10,6 +10,10 @@
 import torch
 import tqdm
 
+# support running without installing as a package
+wd = Path(__file__).parent.parent.resolve()
+sys.path.append(str(wd))
+
 from lit_llama import Tokenizer
 from lit_llama.adapter import LLaMA
 from lit_llama.utils import EmptyInitOnDevice, lazy_load, llama_model_lookup

@@ -10,6 +10,10 @@
 import torch
 import tqdm
 
+# support running without installing as a package
+wd = Path(__file__).parent.parent.resolve()
+sys.path.append(str(wd))
+
 from lit_llama import LLaMA, Tokenizer
 from lit_llama.utils import EmptyInitOnDevice
 

@@ -10,6 +10,10 @@
 import torch
 import tqdm
 
+# support running without installing as a package
+wd = Path(__file__).parent.parent.resolve()
+sys.path.append(str(wd))
+
 from lit_llama import LLaMA, Tokenizer
 from lit_llama.utils import EmptyInitOnDevice, lazy_load, llama_model_lookup
 from lit_llama.lora import lora

@@ -12,6 +12,7 @@
 `torch.backends.cuda.enable_flash_sdp(False)` in the script below (see https://github.com/Lightning-AI/lit-llama/issues/101).
 """
 import os
+import sys
 import time
 from pathlib import Path
 import shutil
@@ -20,6 +21,10 @@
 import numpy as np
 import torch
 
+# support running without installing as a package
+wd = Path(__file__).parent.parent.resolve()
+sys.path.append(str(wd))
+
 from generate import generate
 from lit_llama.adapter import LLaMA, LLaMAConfig, mark_only_adapter_as_trainable, adapter_state_from_state_dict
 from lit_llama.tokenizer import Tokenizer

@@ -4,6 +4,8 @@
 Note: If you run into a CUDA error "Expected is_sm80 to be true, but got false", uncomment the line
 `torch.backends.cuda.enable_flash_sdp(False)` in the script below (see https://github.com/Lightning-AI/lit-llama/issues/101).
 """
+import sys
+from pathlib import Path
 import os
 import time
 from functools import partial
@@ -14,6 +16,10 @@
 import torch
 from torch.distributed.fsdp.wrap import transformer_auto_wrap_policy
 
+# support running without installing as a package
+wd = Path(__file__).parent.parent.resolve()
+sys.path.append(str(wd))
+
 from generate import generate
 from lit_llama.model import Block, LLaMA, LLaMAConfig
 from lit_llama.tokenizer import Tokenizer

@@ -4,13 +4,19 @@
 Note: If you run into a CUDA error "Expected is_sm80 to be true, but got false", uncomment the line
 `torch.backends.cuda.enable_flash_sdp(False)` in the script below (see https://github.com/Lightning-AI/lit-llama/issues/101).
 """
+import sys
+from pathlib import Path
 import os
 import time
 
 import lightning as L
 import numpy as np
 import torch
 
+# support running without installing as a package
+wd = Path(__file__).parent.parent.resolve()
+sys.path.append(str(wd))
+
 from generate import generate
 from lit_llama.lora import mark_only_lora_as_trainable, lora, lora_state_dict
 from lit_llama.model import LLaMA, LLaMAConfig

@@ -7,6 +7,10 @@
 import lightning as L
 import torch
 
+# support running without installing as a package
+wd = Path(__file__).parent.parent.resolve()
+sys.path.append(str(wd))
+
 from lit_llama import LLaMA, Tokenizer
 from lit_llama.utils import EmptyInitOnDevice, lazy_load, llama_model_lookup
 

@@ -7,6 +7,10 @@
 import lightning as L
 import torch
 
+# support running without installing as a package
+wd = Path(__file__).parent.parent.resolve()
+sys.path.append(str(wd))
+
 from generate import generate
 from lit_llama import Tokenizer
 from lit_llama.adapter import LLaMA
@@ -53,8 +57,7 @@ def main(
 
     print("Loading model ...", file=sys.stderr)
     t0 = time.time()
-    with (lazy_load(pretrained_path) as pretrained_checkpoint,
-          lazy_load(adapter_path) as adapter_checkpoint):
+    with lazy_load(pretrained_path) as pretrained_checkpoint, lazy_load(adapter_path) as adapter_checkpoint:
         name = llama_model_lookup(pretrained_checkpoint)
 
         with EmptyInitOnDevice(

@@ -7,6 +7,10 @@
 import lightning as L
 import torch
 
+# support running without installing as a package
+wd = Path(__file__).parent.parent.resolve()
+sys.path.append(str(wd))
+
 from generate import generate
 from lit_llama import Tokenizer, LLaMA
 from lit_llama.lora import lora

@@ -17,15 +17,15 @@ python scripts/convert_checkpoint.py --checkpoint_dir "data/checkpoints/foo"
 Note that this change will need to be passed along to subsequent steps, for example:
 
 ```shell
-python scripts/generate.py \
+python generate.py \
   --checkpoint_path "data/checkpoints/foo/7B/lit-llama.pth" \
   --tokenizer_path "data/checkpoints/foo/tokenizer.model"
 ```
 
 and
 
 ```shell
-python scripts/quantize.py \
+python quantize/gptq.py \
   --checkpoint_path "data/checkpoints/foo/7B/lit-llama.pth" \
   --tokenizer_path "data/checkpoints/foo/tokenizer.model"
 ```

@@ -24,14 +24,15 @@ The steps here only need to be done once:
 ## Running the finetuning
 
 ```bash
-python finetune_adapter.py
+python finetune/adapter.py
 ```
 
 The finetuning requires at least one GPU with ~24 GB memory (GTX 3090).
 You can speed up training by setting the `devices` variable in the script to utilize more GPUs if available.
 Depending on the available GPU memory, you can also tune the `micro_batch_size` parameter to utilize the GPU efficiently.
 
 For example, the following settings will let you finetune the model in under 1 hour using DeepSpeed Zero-2:
+
 ```python
 devices = 8
 micro_batch_size = 8
@@ -47,7 +48,7 @@ This script will save checkpoints periodically to the folder `out/`.
 You can test the finetuned model with your own instructions by running:
 
 ```bash
-python generate_adapter.py \
+python generate/adapter.py \
     --prompt "Recommend a movie to watch on the weekend." \
     --quantize llm.int8
 ```
@@ -89,10 +90,10 @@ With only a few modifications, you can prepare and train on your own instruction
     python scripts/prepare_mydata.py --destination_path data/mydata/
     ```
 
-5. Run `finetune_adapter.py` by passing in the location of your data (and optionally other parameters):
+5. Run `finetune/adapter.py` by passing in the location of your data (and optionally other parameters):
 
     ```bash
-    python finetune_adapter.py --data_dir data/mydata/ --out_dir out/myexperiment
+    python finetune/adapter.py --data_dir data/mydata/ --out_dir out/myexperiment
     ```
 
 

@@ -2,7 +2,7 @@
 
 Full finetuning updates all layers in the pretrained LLaMA model. This *regular* finetuning procedure is typically considered as the baseline for parameter-efficient alternatives such as Low-Rank Adaptation (LoRA) or LLaMA-Adapter.
 
-The current  [finetune_full.py](../scripts/finetune_full.py) we provide uses 4 A100 GPUs with a fully-sharded data parallel strategy to finetune Lit-LLaMA 7B on [Alpaca](https://github.com/tatsu-lab/stanford_alpaca) dataset. The A100 GPUs have 40 GB each, but it may require less memory to finetune this model.
+The current  [finetune/full.py](../finetune/full.py) we provide uses 4 A100 GPUs with a fully-sharded data parallel strategy to finetune Lit-LLaMA 7B on [Alpaca](https://github.com/tatsu-lab/stanford_alpaca) dataset. The A100 GPUs have 40 GB each, but it may require less memory to finetune this model.
 
 
 
@@ -25,7 +25,7 @@ The steps here only need to be done once:
 ## Running the finetuning
 
 ```bash
-python finetune_full.py
+python finetune/full.py
 ```
 
 
@@ -49,7 +49,7 @@ This script will save checkpoints periodically to the folder `out/`.
 You can test the finetuned model with your own instructions by running:
 
 ```bash
-python generate_full.py \
+python generate/full.py \
     --prompt "Recommend a movie to watch on the weekend." \
     --quantize llm.int8
 ```
@@ -91,10 +91,10 @@ With only a few modifications, you can prepare and train on your own instruction
     python scripts/prepare_mydata.py --destination_path data/mydata/
     ```
 
-5. Run `finetune_full.py` by passing in the location of your data (and optionally other parameters):
+5. Run `finetune/full.py` by passing in the location of your data (and optionally other parameters):
 
     ```bash
-    python finetune_full.py --data_dir data/mydata/ --out_dir out/myexperiment
+    python finetune/full.py --data_dir data/mydata/ --out_dir out/myexperiment
     ```