Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
140 changes: 0 additions & 140 deletions evaluate.py

This file was deleted.

4 changes: 4 additions & 0 deletions evaluate_adapter.py → evaluate/adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,10 @@
import torch
import tqdm

# support running without installing as a package
wd = Path(__file__).parent.parent.resolve()
sys.path.append(str(wd))

from lit_llama import Tokenizer
from lit_llama.adapter import LLaMA
from lit_llama.utils import EmptyInitOnDevice, lazy_load, llama_model_lookup
Expand Down
4 changes: 4 additions & 0 deletions evaluate_full.py → evaluate/full.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,10 @@
import torch
import tqdm

# support running without installing as a package
wd = Path(__file__).parent.parent.resolve()
sys.path.append(str(wd))

from lit_llama import LLaMA, Tokenizer
from lit_llama.utils import EmptyInitOnDevice

Expand Down
4 changes: 4 additions & 0 deletions evaluate_lora.py → evaluate/lora.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,10 @@
import torch
import tqdm

# support running without installing as a package
wd = Path(__file__).parent.parent.resolve()
sys.path.append(str(wd))

from lit_llama import LLaMA, Tokenizer
from lit_llama.utils import EmptyInitOnDevice, lazy_load, llama_model_lookup
from lit_llama.lora import lora
Expand Down
5 changes: 5 additions & 0 deletions finetune_adapter.py → finetune/adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
`torch.backends.cuda.enable_flash_sdp(False)` in the script below (see https://github.com/Lightning-AI/lit-llama/issues/101).
"""
import os
import sys
import time
from pathlib import Path
import shutil
Expand All @@ -20,6 +21,10 @@
import numpy as np
import torch

# support running without installing as a package
wd = Path(__file__).parent.parent.resolve()
sys.path.append(str(wd))

from generate import generate
from lit_llama.adapter import LLaMA, LLaMAConfig, mark_only_adapter_as_trainable, adapter_state_from_state_dict
from lit_llama.tokenizer import Tokenizer
Expand Down
6 changes: 6 additions & 0 deletions finetune_full.py → finetune/full.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
Note: If you run into a CUDA error "Expected is_sm80 to be true, but got false", uncomment the line
`torch.backends.cuda.enable_flash_sdp(False)` in the script below (see https://github.com/Lightning-AI/lit-llama/issues/101).
"""
import sys
from pathlib import Path
import os
import time
from functools import partial
Expand All @@ -14,6 +16,10 @@
import torch
from torch.distributed.fsdp.wrap import transformer_auto_wrap_policy

# support running without installing as a package
wd = Path(__file__).parent.parent.resolve()
sys.path.append(str(wd))

from generate import generate
from lit_llama.model import Block, LLaMA, LLaMAConfig
from lit_llama.tokenizer import Tokenizer
Expand Down
6 changes: 6 additions & 0 deletions finetune_lora.py → finetune/lora.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,19 @@
Note: If you run into a CUDA error "Expected is_sm80 to be true, but got false", uncomment the line
`torch.backends.cuda.enable_flash_sdp(False)` in the script below (see https://github.com/Lightning-AI/lit-llama/issues/101).
"""
import sys
from pathlib import Path
import os
import time

import lightning as L
import numpy as np
import torch

# support running without installing as a package
wd = Path(__file__).parent.parent.resolve()
sys.path.append(str(wd))

from generate import generate
from lit_llama.lora import mark_only_lora_as_trainable, lora, lora_state_dict
from lit_llama.model import LLaMA, LLaMAConfig
Expand Down
4 changes: 4 additions & 0 deletions generate.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,10 @@
import lightning as L
import torch

# support running without installing as a package
wd = Path(__file__).parent.parent.resolve()
sys.path.append(str(wd))

from lit_llama import LLaMA, Tokenizer
from lit_llama.utils import EmptyInitOnDevice, lazy_load, llama_model_lookup

Expand Down
7 changes: 5 additions & 2 deletions generate_adapter.py → generate/adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,10 @@
import lightning as L
import torch

# support running without installing as a package
wd = Path(__file__).parent.parent.resolve()
sys.path.append(str(wd))

from generate import generate
from lit_llama import Tokenizer
from lit_llama.adapter import LLaMA
Expand Down Expand Up @@ -53,8 +57,7 @@ def main(

print("Loading model ...", file=sys.stderr)
t0 = time.time()
with (lazy_load(pretrained_path) as pretrained_checkpoint,
lazy_load(adapter_path) as adapter_checkpoint):
with lazy_load(pretrained_path) as pretrained_checkpoint, lazy_load(adapter_path) as adapter_checkpoint:
name = llama_model_lookup(pretrained_checkpoint)

with EmptyInitOnDevice(
Expand Down
File renamed without changes.
4 changes: 4 additions & 0 deletions generate_lora.py → generate/lora.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,10 @@
import lightning as L
import torch

# support running without installing as a package
wd = Path(__file__).parent.parent.resolve()
sys.path.append(str(wd))

from generate import generate
from lit_llama import Tokenizer, LLaMA
from lit_llama.lora import lora
Expand Down
4 changes: 2 additions & 2 deletions howto/customize_paths.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,15 +17,15 @@ python scripts/convert_checkpoint.py --checkpoint_dir "data/checkpoints/foo"
Note that this change will need to be passed along to subsequent steps, for example:

```shell
python scripts/generate.py \
python generate.py \
--checkpoint_path "data/checkpoints/foo/7B/lit-llama.pth" \
--tokenizer_path "data/checkpoints/foo/tokenizer.model"
```

and

```shell
python scripts/quantize.py \
python quantize/gptq.py \
--checkpoint_path "data/checkpoints/foo/7B/lit-llama.pth" \
--tokenizer_path "data/checkpoints/foo/tokenizer.model"
```
Expand Down
9 changes: 5 additions & 4 deletions howto/finetune_adapter.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,14 +24,15 @@ The steps here only need to be done once:
## Running the finetuning

```bash
python finetune_adapter.py
python finetune/adapter.py
```

The finetuning requires at least one GPU with ~24 GB memory (GTX 3090).
You can speed up training by setting the `devices` variable in the script to utilize more GPUs if available.
Depending on the available GPU memory, you can also tune the `micro_batch_size` parameter to utilize the GPU efficiently.

For example, the following settings will let you finetune the model in under 1 hour using DeepSpeed Zero-2:

```python
devices = 8
micro_batch_size = 8
Expand All @@ -47,7 +48,7 @@ This script will save checkpoints periodically to the folder `out/`.
You can test the finetuned model with your own instructions by running:

```bash
python generate_adapter.py \
python generate/adapter.py \
--prompt "Recommend a movie to watch on the weekend." \
--quantize llm.int8
```
Expand Down Expand Up @@ -89,10 +90,10 @@ With only a few modifications, you can prepare and train on your own instruction
python scripts/prepare_mydata.py --destination_path data/mydata/
```

5. Run `finetune_adapter.py` by passing in the location of your data (and optionally other parameters):
5. Run `finetune/adapter.py` by passing in the location of your data (and optionally other parameters):

```bash
python finetune_adapter.py --data_dir data/mydata/ --out_dir out/myexperiment
python finetune/adapter.py --data_dir data/mydata/ --out_dir out/myexperiment
```


Expand Down
10 changes: 5 additions & 5 deletions howto/finetune_full.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

Full finetuning updates all layers in the pretrained LLaMA model. This *regular* finetuning procedure is typically considered as the baseline for parameter-efficient alternatives such as Low-Rank Adaptation (LoRA) or LLaMA-Adapter.

The current [finetune_full.py](../scripts/finetune_full.py) we provide uses 4 A100 GPUs with a fully-sharded data parallel strategy to finetune Lit-LLaMA 7B on [Alpaca](https://github.com/tatsu-lab/stanford_alpaca) dataset. The A100 GPUs have 40 GB each, but it may require less memory to finetune this model.
The current [finetune/full.py](../finetune/full.py) we provide uses 4 A100 GPUs with a fully-sharded data parallel strategy to finetune Lit-LLaMA 7B on [Alpaca](https://github.com/tatsu-lab/stanford_alpaca) dataset. The A100 GPUs have 40 GB each, but it may require less memory to finetune this model.



Expand All @@ -25,7 +25,7 @@ The steps here only need to be done once:
## Running the finetuning

```bash
python finetune_full.py
python finetune/full.py
```


Expand All @@ -49,7 +49,7 @@ This script will save checkpoints periodically to the folder `out/`.
You can test the finetuned model with your own instructions by running:

```bash
python generate_full.py \
python generate/full.py \
--prompt "Recommend a movie to watch on the weekend." \
--quantize llm.int8
```
Expand Down Expand Up @@ -91,10 +91,10 @@ With only a few modifications, you can prepare and train on your own instruction
python scripts/prepare_mydata.py --destination_path data/mydata/
```

5. Run `finetune_full.py` by passing in the location of your data (and optionally other parameters):
5. Run `finetune/full.py` by passing in the location of your data (and optionally other parameters):

```bash
python finetune_full.py --data_dir data/mydata/ --out_dir out/myexperiment
python finetune/full.py --data_dir data/mydata/ --out_dir out/myexperiment
```


Expand Down
Loading