Merge branch 'main' into moe2

EleutherAI · Mar 4, 2024 · 99eeaaf · 99eeaaf
2 parents 2d38fca + 19596b0
commit 99eeaaf
Show file tree

Hide file tree

Showing 141 changed files with 2,901 additions and 2,064 deletions.
diff --git a/.github/workflows/cpu_ci.yml b/.github/workflows/cpu_ci.yml
@@ -4,7 +4,8 @@ on: "push"
 
 jobs:
   run-tests:
-    runs-on: ubuntu-latest
+    #runs-on: ubuntu-latest
+    runs-on: [ 'test', 'self-hosted' ]
     steps:
       - uses: actions/checkout@v3
 

diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml
@@ -4,12 +4,12 @@ on: [pull_request]
 
 jobs:
   pre-commit:
-    runs-on: ubuntu-20.04
+    runs-on: ubuntu-22.04
     steps:
       - uses: actions/checkout@v2
       - uses: actions/setup-python@v4
         with:
-          python-version: 3.8
+          python-version: 3.10
           cache: "pip"
           cache-dependency-path: "**/requirements*.txt"
       # Need the right version of clang-format
@@ -24,7 +24,7 @@ jobs:
         uses: docker/build-push-action@v2
 
   update-documentation:
-    runs-on: ubuntu-20.04
+    runs-on: ubuntu-22.04
     steps:
       - uses: actions/checkout@v3
         with:

diff --git a/.gitignore b/.gitignore
@@ -137,6 +137,7 @@ data/**/*.bin
 data/**/*.json*
 data/**/*.txt
 data/**/*.gz
+data/**/*.zip
 data/**/*.np*
 data/**/*.npy
 checkpoints/
@@ -150,3 +151,7 @@ test_logs/
 logs/
 tensorboard/
 src/
+
+# test data files
+tests/data/*.bin
+tests/data/*.idx
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -16,10 +16,10 @@ repos:
             args: [--fix=lf]
           - id: requirements-txt-fixer
           - id: trailing-whitespace
-    - repo: https://gitlab.com/daverona/pre-commit-cpp
+    - repo: https://gitlab.com/daverona/pre-commit/cpp
       rev: 0.8.0
       hooks:
-          - id: clang-format # formatter of C/C++ code based on a style guide: LLVM, Google, Chromium, Mozilla, and WebKit available
+          - id: clang-format  # formatter of C/C++ code based on a style guide: LLVM, Google, Chromium, Mozilla, and WebKit available
             args: []
 
     - repo: https://github.com/psf/black

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -0,0 +1,86 @@
+# Contributing
+GPT-NeoX welcomes your contributions!
+
+## Prerequisites
+GPT-NeoX uses [pre-commit](https://pre-commit.com/) to ensure that formatting is
+consistent across GPT-NeoX. First, ensure that `pre-commit` is installed with
+`pip install pre-commit`. Next, the pre-commit hooks must be installed once
+before commits can be made:
+```bash
+pre-commit install
+```
+Please install `clang-format` from Conda:
+```bash
+conda install clang-format
+```
+
+Afterwards, our suite of formatting tests run automatically before each `git commit`. You
+can also run these manually:
+```bash
+pre-commit run --all-files
+```
+If a formatting test fails, it will fix the modified code in place and abort
+the `git commit`. After looking over the changes, you can `git add <modified files>`
+and then repeat the previous `git commit` command.
+
+
+## Testing
+GPT-NeoX tracks two types of tests: unit tests and more costly model convergence tests.
+Unit tests are found in `tests/unit/` and the model convergence tests are found in
+`tests/model/`.
+
+### Unit Tests
+[PyTest](https://docs.pytest.org/en/latest/) is used to execute tests. PyTest can be
+installed from PyPI via `pip install pytest`. Simply invoke `pytest --forked` to run the
+unit tests:
+```bash
+pytest --forked tests/unit/
+```
+You can also provide the `-v` flag to `pytest` to see additional information about the
+tests. Note that [pytest-forked](https://github.com/pytest-dev/pytest-forked) and the
+`--forked` flag are required to test CUDA functionality in distributed tests.
+
+### Model Tests
+To execute model tests, first install GPT-NeoX. Next, execute the model test driver:
+```bash
+cd tests/model/
+pytest run_sanity_check.py
+```
+Note that the `--forked` flag is not necessary for the model tests.
+
+## Contributor License Agreement
+This project welcomes contributions and suggestions. Most contributions require you to
+agree to a Contributor License Agreement (CLA) declaring that you have the right to, and
+actually do, grant us the rights to use your contribution. For details, visit
+https://cla-assistant.io/EleutherAI/gpt-neox.
+
+When you submit a pull request, a CLA bot will automatically determine whether you need
+to provide a CLA and decorate the PR appropriately (e.g., status check, comment). Simply
+follow the instructions provided by the bot. You will only need to do this once across
+all repos using our CLA.
+
+## New Feature Contribution Guidelines
+Unlike bug fix or improving existing feature (where users usually directly submit a PR and we review it), adding a new feature to GPT-NeoX requires several steps: (1) proposal and discussion, (2) implementation and verification, (3) release and maintenance. This general guideline applies to all new feature contributions. Core GPT-NeoX team member contributions may complete step 1 internally.
+
+### Step 1: Proposal and Discussion
+We ask users to first post your intended feature in an issue. This issue needs to include:
+
+* A description of the proposed feature.
+* A motivation of why it will be useful to GPT-NeoX users.
+* A rough design of how you implement the feature inside GPT-NeoX.
+* (Important) Results or planned experiments to demonstrate the effectiveness and correctness of the feature.
+  * If the feature only affects performance and does not affect training convergence, we require testing on a fraction of training to demonstrate that the training/validation loss are consistent with baseline, and that the performance is better than baseline.
+  * If the feature does affect training convergence, we require testing the whole training to demonstrate that the feature achieves better/on-par final model quality and training performance compared to baseline.
+
+Based on the issue we shall discuss the merit of the new feature and decide whether to accept or decline the proposal. Once accepted and after we confirm the design and implementation plan, we are ready for step 2.
+
+### Step 2: Implementation and Verification
+The contributor will proceed and implement the feature, and the GPT-NeoX team will provide guidance/helps as needed. The required deliverables include:
+
+* A PR to [EleutherAI/GPT-NeoX](https://github.com/EleutherAI/gpt-neox) including (1) the feature implementation (2) unit tests (3) documentation (4) example usage.
+* In the implementation (code, documentation, tutorial), we require the feature author to record their GitHub username as a contact method for future questions/maintenance.
+
+After receiving the PRs, we will review them and merge them after necessary tests/fixes.
+
+### Step 3: Release and Maintenance
+After the PRs are merged, we will announce the feature on our website (with credit to the feature author). We ask the feature author to commit to the maintenance of the feature.
diff --git a/Dockerfile b/Dockerfile
@@ -1,4 +1,4 @@
-# Copyright (c) 2021, EleutherAI
+# Copyright (c) 2024, EleutherAI
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-FROM nvidia/cuda:11.7.1-devel-ubuntu20.04
+FROM nvidia/cuda:12.1.1-devel-ubuntu22.04
 
 ENV DEBIAN_FRONTEND=noninteractive
 
@@ -21,20 +21,20 @@ LABEL org.opencontainers.image.version = "2.0"
 LABEL org.opencontainers.image.authors = "[email protected]"
 LABEL org.opencontainers.image.source = "https://www.github.com/eleutherai/gpt-neox"
 LABEL org.opencontainers.image.licenses = " Apache-2.0"
-LABEL org.opencontainers.image.base.name="docker.io/nvidia/cuda:11.7.1-devel-ubuntu20.04"
+LABEL org.opencontainers.image.base.name="docker.io/nvidia/cuda:12.1.1-devel-ubuntu22.04"
 
 #### System package (uses default Python 3 version in Ubuntu 20.04)
 RUN apt-get update -y && \
     apt-get install -y \
-    git python3.9 python3-dev libpython3-dev python3-pip sudo pdsh \
-    htop llvm-9-dev tmux zstd software-properties-common build-essential autotools-dev \
+    git python3-dev libpython3-dev python3-pip sudo pdsh \
+    htop tmux zstd software-properties-common build-essential autotools-dev \
     nfs-common pdsh cmake g++ gcc curl wget vim less unzip htop iftop iotop ca-certificates ssh \
     rsync iputils-ping net-tools libcupti-dev libmlx4-1 infiniband-diags ibutils ibverbs-utils \
     rdmacm-utils perftest rdma-core nano && \
     update-alternatives --install /usr/bin/python python /usr/bin/python3 1 && \
     update-alternatives --install /usr/bin/pip pip /usr/bin/pip3 1 && \
-    pip install --upgrade pip && \
-    pip install gpustat
+    python -m pip install --upgrade pip && \
+    python -m pip install gpustat
 
 ### SSH
 RUN mkdir /var/run/sshd && \
@@ -88,24 +88,31 @@ RUN mkdir -p /home/mchorse/.ssh /job && \
     echo 'export LD_LIBRARY_PATH=/usr/local/lib:/usr/local/mpi/lib:/usr/local/mpi/lib64:$LD_LIBRARY_PATH' >> /home/mchorse/.bashrc
 
 #### Python packages
-RUN pip install torch==1.13.0+cu117 torchvision==0.14.0+cu117 torchaudio==0.13.0 --extra-index-url https://download.pytorch.org/whl/cu117 && pip cache purge
-COPY requirements/requirements.txt .
-COPY requirements/requirements-wandb.txt .
-COPY requirements/requirements-onebitadam.txt .
-COPY requirements/requirements-sparseattention.txt .
-COPY requirements/requirements-flashattention.txt .
-RUN pip install -r requirements.txt && pip install -r requirements-onebitadam.txt
-RUN pip install -r requirements-sparseattention.txt
-RUN pip install -r requirements-flashattention.txt
-RUN pip install -r requirements-wandb.txt
-RUN pip install protobuf==3.20.*
-RUN pip cache purge
+RUN python -m pip install --no-cache-dir torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121
+COPY requirements/* ./
+RUN python -m pip install --no-cache-dir -r requirements.txt && pip install -r requirements-onebitadam.txt
+RUN python -m pip install -r requirements-sparseattention.txt
+RUN python -m pip install -r requirements-flashattention.txt
+RUN python -m pip install -r requirements-wandb.txt
+RUN python -m pip install protobuf==3.20.*
+RUN python -m pip cache purge
 
 ## Install APEX
-RUN pip install -v --disable-pip-version-check --no-cache-dir --global-option="--cpp_ext" --global-option="--cuda_ext" git+https://github.com/NVIDIA/apex.git@a651e2c24ecf97cbf367fd3f330df36760e1c597
+# Detect the architecture and install Apex accordingly
+RUN ARCH=$(uname -m) && \
+    if [ "$ARCH" = "x86_64" ]; then \
+        wget https://github.com/segyges/not-nvidia-apex/releases/download/jan-2024/apex-0.1-cp310-cp310-linux_x86_64.zip && \
+        unzip ./apex-0.1-cp310-cp310-linux_x86_64.zip && \
+        python -m pip install ./apex-0.1-cp310-cp310-linux_x86_64.whl; \
+    else \
+    # Install Apex directly from source for other architectures
+        python -m pip install -r requirements-apex-pip.txt && \
+        python -m pip install -v --disable-pip-version-check --no-cache-dir --no-build-isolation --config-settings --global-option=--cpp_ext --config-settings --global-option=--cuda_ext git+https://github.com/NVIDIA/apex.git@141bbf1cf362d4ca4d94f4284393e91dda5105a5; \
+    fi
 
 COPY megatron/fused_kernels/ megatron/fused_kernels
-RUN python megatron/fused_kernels/setup.py install
+WORKDIR /megatron/fused_kernels
+RUN python setup.py install
 
 # Clear staging
 RUN mkdir -p /tmp && chmod 0777 /tmp

diff --git a/LICENSE b/LICENSE
@@ -1,5 +1,5 @@
                                  Apache License
-                           Version 2.0, January 2004
+                           Version 2.0, January 2024
                         http://www.apache.org/licenses/
 
    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
@@ -211,7 +211,7 @@ used in those files, as indicated.
 ------------- LICENSE FOR NVIDIA code  --------------
 
 
-# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
+# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions
@@ -241,7 +241,7 @@ used in those files, as indicated.
 
 
                                  Apache License
-                           Version 2.0, January 2004
+                           Version 2.0, January 2024
                         http://www.apache.org/licenses/
 
    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION

diff --git a/README.md b/README.md
@@ -67,6 +67,7 @@ Prior to 3/9/2023, GPT-NeoX relied on [DeeperSpeed](https://github.com/EleutherA
   * [Weights and Biases](#weights-and-biases)
   * [TensorBoard](#tensorboard)
 - [Running on multi-node](#running-on-multi-node)
+- [Profiling](#profiling)
 - [Adoption and Publications](#adoption-and-publications)
   * [Publications](#publications)
   * [Models](#models)
@@ -76,6 +77,7 @@ Prior to 3/9/2023, GPT-NeoX relied on [DeeperSpeed](https://github.com/EleutherA
     + [Other Modalities](#other-modalities)
 - [Administrative Notes](#administrative-notes)
   * [Citing GPT-NeoX](#citing-gpt-neox)
+  * [Contributing](#contributing)
   * [Licensing](#licensing)
   * [Acknowledgements](#acknowledgements)
 
@@ -500,18 +502,21 @@ where `--eval_tasks` is a list of evaluation tasks followed by spaces, e.g `--ev
 
 # Exporting to Hugging Face
 
-GPT-NeoX is optimized heavily for training only, and GPT-NeoX model checkpoints are not compatible out of the box with other deep learning libraries. To make models easily loadable and shareable with end users, and for further exporting to various other frameworks, GPT-NeoX supports checkpoint conversion to the [Hugging Face Transformers](https://arxiv.org/abs/1910.03771) GPTNeoXModel format.
+GPT-NeoX is optimized heavily for training only, and GPT-NeoX model checkpoints are not compatible out of the box with other deep learning libraries. To make models easily loadable and shareable with end users, and for further exporting to various other frameworks, GPT-NeoX supports checkpoint conversion to the [Hugging Face Transformers](https://arxiv.org/abs/1910.03771) format.
 
-To convert a NeoX checkpoint (with pipeline-parallel-size>=1) to Hugging Face-loadable format, run:
-```bash
-python ./tools/ckpts/convert_module_to_hf.py --input_dir /path/to/model/global_stepXXX --config_file your_config.yml --output_dir hf_model/save/location
-```
+Though NeoX supports a number of different architectural configurations, including AliBi positional embeddings, not all of these configurations map cleanly onto the supported configurations within Hugging Face Transformers.
+
+NeoX supports export of compatible models into the following architectures:
+- GPTNeoXForCausalLM
+- LlamaForCausalLM
+- MistralForCausalLM
+
+Training a model which does not fit into one of these Hugging Face Transformers architectures cleanly will require writing custom modeling code for the exported model.
 
-To convert a sequential model to Hugging Face format, run:
+To convert a GPT-NeoX library checkpoint to Hugging Face-loadable format, run:
 ```bash
-python  ./tools/ckpts/convert_sequential_to_hf.py --input_dir /path/to/model/global_stepXXX --config_file your_config.yml --output_dir hf_model/save/location
+python ./tools/ckpts/convert_neox_to_hf.py --input_dir /path/to/model/global_stepXXX --config_file your_config.yml --output_dir hf_model/save/location --precision {auto,fp16,bf16,fp32} --architecture {neox,mistral,llama}
 ```
-(Note: this script should be used for v2.0 checkpoints saved on a v2.0 commit prior to https://github.com/EleutherAI/gpt-neox/pull/866 and which used `pipe-parallel-size=1`. Using `pipe-parallel-size=0` will also save models in this format.)
 
 Then to upload a model to [the Hugging Face Hub](https://huggingface.co/), run:
 ```bash
@@ -520,7 +525,27 @@ python ./tools/ckpts/upload.py
 ```
 and input the requested information, including HF hub user token.
 
-Note, however, that this compatibility is not one-to-one, and only certain configurations from GPT-NeoX are supported in the Hugging Face GPTNeoXModel class. Advanced features such as alternative positional embeddings may require new Transformers modeling code and new conversion script tweaks.
+### Importing Models Into GPT-NeoX
+
+NeoX supplies several utilities for converting a pretrained model checkpoint into a format that can be trained within the library.
+
+The following models or model families can be loaded in GPT-NeoX:
+- Llama 1
+- Llama 2
+- CodeLlama
+- Mistral-7b-v0.1
+
+We provide two utilities for converting from two different checkpoint formats into a format compatible with GPT-NeoX.
+
+To convert a Llama 1 or Llama 2 checkpoint distributed by Meta AI from its original file format (downloadable [here](https://github.com/facebookresearch/llama) or [here](https://huggingface.co/meta-llama/Llama-2-7b)) into the GPT-NeoX library, run
+
+```
+python tools/ckpts/convert_raw_llama_weights_to_neox.py --input_dir /path/to/model/parent/dir/7B --model_size 7B --output_dir /path/to/save/ckpt --num_output_shards <TENSOR_PARALLEL_SIZE> (--pipeline_parallel if pipeline-parallel-size >= 1)
+```
+
+
+To convert from a Hugging Face model into a NeoX-loadable, run `tools/ckpts/convert_hf_to_sequential.py`. See documentation within that file for further options.
+
 
 # Monitoring
 
@@ -538,6 +563,36 @@ We also support using TensorBoard via the <code><var>tensorboard-dir</var></code
 
 If you need to supply a hostfile for use with the MPI-based DeepSpeed launcher, you can set the environment variable `DLTS_HOSTFILE` to point to the hostfile.
 
+# Profiling
+
+We support profiling with Nsight Systems and PyTorch Memory Profiling.
+
+## Nsight Systems Profiling
+
+To use the Nsight Systems profiling, set config options `profile`, `profile_step_start`, and `profile_step_stop`. Launch training with:
+
+```
+nsys profile -s none -t nvtx,cuda -o <path/to/profiling/output> --force-overwrite true \
+--capture-range=cudaProfilerApi --capture-range-end=stop python $TRAIN_PATH/deepy.py \
+$TRAIN_PATH/train.py --conf_dir configs <config files>
+```
+
+The generated output file can then by viewed with the Nsight Systems GUI:
+
+![Alt text](images/nsight_profiling.png)
+
+## PyTorch Memory Profiling
+
+To use PyTorch Memory Profiling, set config options `memory_profiling` and `memory_profiling_path`.
+
+![Alt text](images/memory_profiling.png)
+
+View the generated profile with the [memory_viz.py](https://github.com/pytorch/pytorch/blob/main/torch/cuda/_memory_viz.py) script. Run with:
+
+```
+python _memory_viz.py trace_plot <generated_profile> -o trace.html
+```
+
 # Adoption and Publications
 
 The GPT-NeoX library was been widely adopted by academic and industry researchers and ported on to many HPC systems.
@@ -637,9 +692,14 @@ To cite the 20 billion parameter model named `GPT-NeoX-20B`, please use
 }
 ```
 
+## Contributing
+GPT-NeoX is built by the open-source AI community, and relies on our amazing contributors! Please see our
+[contributing](CONTRIBUTING.md) guide for more details on our CLA, code formatting, testing,
+etc.
+
 ## Licensing
 
-This repository hosts code that is part of EleutherAI's GPT-NeoX project. Copyright (c) 2021, EleutherAI. Licensed under the Apache License:
+This repository hosts code that is part of EleutherAI's GPT-NeoX project. Copyright (c) 2024, EleutherAI. Licensed under the Apache License:
 
     Licensed under the Apache License, Version 2.0 (the "License");
     you may not use this file except in compliance with the License.

diff --git a/configs/1-3B.yml b/configs/1-3B.yml
@@ -21,6 +21,7 @@
    "scaled_upper_triang_masked_softmax_fusion": false,
    "bias_gelu_fusion": false,
    "rope_fusion": false,
+   "layernorm_fusion": false,
 
    # init methods
    "init_method": "small_init",