From 76ce6c7548bc66b69fc04636cb374838a0852a91 Mon Sep 17 00:00:00 2001 From: Bill Teng <135061747+TT-billteng@users.noreply.github.com> Date: Tue, 14 Jan 2025 16:21:10 -0800 Subject: [PATCH] remove references to LFS (#16722) ### Tickets Closes https://github.com/tenstorrent/tt-metal/issues/2723 Closes https://github.com/tenstorrent/tt-metal/issues/4435 ### Problem description We get charged $$$$ for LFS usage, LFS clone has introduced intermittent failures in CI, and it adds complexity to our install process. ### What's changed Goodbye LFS ### Checklist - [x] Post commit CI passes - [ ] Blackhole Post commit (if applicable) - [ ] Model regression CI testing passes (if applicable) - [ ] Device performance regression CI testing passes (if applicable) - [ ] **(For models and ops writers)** Full [new models](https://github.com/tenstorrent/tt-metal/actions/workflows/full-new-models-suite.yaml) tests passes - [ ] New/Existing tests provide coverage for changes --- .../checkout-with-submodule-lfs/action.yml | 7 +- .../install-metal-dev-deps/dependencies.json | 4 +- .gitignore | 3 - .gitmodules | 3 - INSTALLING.md | 12 +- install_dependencies.sh | 1 - models/demos/t3000/llama3_70b/setup_llama.sh | 1 - .../synthetic_gradients/tt/sg_mnist.py | 249 ------------------ scripts/docker/requirements-20.04.txt | 1 - scripts/docker/requirements-22.04.txt | 1 - .../workflows/builld_and_test_all.yaml | 6 +- tt-train/init_repo.sh | 1 - tt_metal/third_party/lfs | 1 - 13 files changed, 6 insertions(+), 284 deletions(-) delete mode 100644 models/experimental/synthetic_gradients/tt/sg_mnist.py delete mode 160000 tt_metal/third_party/lfs diff --git a/.github/actions/checkout-with-submodule-lfs/action.yml b/.github/actions/checkout-with-submodule-lfs/action.yml index bda1986295a..e10d1cad0e6 100644 --- a/.github/actions/checkout-with-submodule-lfs/action.yml +++ b/.github/actions/checkout-with-submodule-lfs/action.yml @@ -1,4 +1,4 @@ -name: "Checkout recursively with submodules and LFS" +name: "Checkout recursively with submodules" description: "Installs tt-metal extra dev dependencies on ubuntu-20.04 GitHub Actions runners" inputs: @@ -27,12 +27,9 @@ runs: with: token: ${{ inputs.token }} fetch-depth: ${{ inputs.fetch-depth }} - lfs: true + lfs: false submodules: recursive clean: true - name: Clean each submodule shell: bash run: git submodule foreach 'git clean -xffd' - - name: Fetch and pull LFS objects for each submodule - shell: bash - run: git submodule foreach 'git lfs fetch && git lfs pull' diff --git a/.github/actions/install-metal-dev-deps/dependencies.json b/.github/actions/install-metal-dev-deps/dependencies.json index df22ed2607a..3c7353c72dd 100644 --- a/.github/actions/install-metal-dev-deps/dependencies.json +++ b/.github/actions/install-metal-dev-deps/dependencies.json @@ -1,15 +1,13 @@ { - "_comments": "We do not include git and git-lfs here along with the PPA command 'sudo add-apt-repository ppa:git-core/ppa' for registering the apt repository to install those packages because we were timing out the GPG fetching by hammering it", + "_comments": "We do not include git here along with the PPA command 'sudo add-apt-repository ppa:git-core/ppa' for registering the apt repository to install those packages because we were timing out the GPG fetching by hammering it", "ubuntu-20.04": [ "git", - "git-lfs", "pandoc", "pkg-config", "ninja-build" ], "ubuntu-22.04": [ "git", - "git-lfs", "pandoc", "pkg-config", "ninja-build", diff --git a/.gitignore b/.gitignore index 39973cf0c43..39124f4dafb 100644 --- a/.gitignore +++ b/.gitignore @@ -92,9 +92,6 @@ venv/ # releases dist -# exclude dtx generated files -tt_metal/third_party/lfs/dtx_transform_outputs - #exclude reports dir .reports/* diff --git a/.gitmodules b/.gitmodules index 4029f9918d6..06109bb0702 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,6 +1,3 @@ -[submodule "third_party/lfs"] - path = tt_metal/third_party/lfs - url = https://github.com/tenstorrent-metal/lfs.git [submodule "tt_metal/third_party/taskflow"] path = tt_metal/third_party/taskflow url = https://github.com/taskflow/taskflow diff --git a/INSTALLING.md b/INSTALLING.md index 5a20ff2de87..d68c75c8c96 100644 --- a/INSTALLING.md +++ b/INSTALLING.md @@ -48,21 +48,13 @@ sudo ./install_dependencies.sh > If you do not want to use the models or follow the tutorials and want to > immediately start using the API, you may install just the wheel or get the release Docker container. -1. Install git and git-lfs. - -```sh -sudo apt install git git-lfs -``` - -2. Clone the repo. +1. Clone the repo. ```sh git clone https://github.com/tenstorrent/tt-metal.git --recurse-submodules -cd tt-metal -git submodule foreach 'git lfs fetch --all && git lfs pull' ``` -3. Install either from source, or from our release wheel. Note that if you are +2. Install either from source, or from our release wheel. Note that if you are going to try using the model demos, we highly recommend you install from source. diff --git a/install_dependencies.sh b/install_dependencies.sh index 42b31ccfb54..f6bac852e38 100755 --- a/install_dependencies.sh +++ b/install_dependencies.sh @@ -54,7 +54,6 @@ ub_package_list() { UB_LIST=(\ git \ - git-lfs \ build-essential \ cmake \ software-properties-common \ diff --git a/models/demos/t3000/llama3_70b/setup_llama.sh b/models/demos/t3000/llama3_70b/setup_llama.sh index 0c6a7b52375..5b419d6757b 100644 --- a/models/demos/t3000/llama3_70b/setup_llama.sh +++ b/models/demos/t3000/llama3_70b/setup_llama.sh @@ -155,7 +155,6 @@ check_and_build_tt_metal() { if [[ ! -d "python_env" ]]; then git checkout "${TT_METAL_COMMIT_SHA_OR_TAG}" git submodule update --init --recursive - git submodule foreach 'git lfs fetch --all && git lfs pull' ./build_metal.sh ./create_venv.sh source python_env/bin/activate diff --git a/models/experimental/synthetic_gradients/tt/sg_mnist.py b/models/experimental/synthetic_gradients/tt/sg_mnist.py deleted file mode 100644 index 6b52c18fd55..00000000000 --- a/models/experimental/synthetic_gradients/tt/sg_mnist.py +++ /dev/null @@ -1,249 +0,0 @@ -# SPDX-FileCopyrightText: © 2023 Tenstorrent Inc. - -# SPDX-License-Identifier: Apache-2.0 - - -from pathlib import Path - -import torch -import ttnn -from torch import nn -from torch.utils.data import DataLoader -from torchvision import transforms, datasets - -import ttnn -from models.utility_functions import ( - pad_activation, - pad_weight, - tilize_to_list, - get_oom_of_float, - is_close, -) - -input_dim = 1024 -hidden_dim = 256 -output_dim = 10 -batch_size = 1 -eps = 1e-3 - - -class TtMnistModel(nn.Module): - def __init__(self, state_dict): - super().__init__() - - # Extract params from state dict - fc1_weight = pad_weight(state_dict["linear1.weight"]) - fc1_bias = pad_weight(state_dict["linear1.bias"]) - - fc2_weight = pad_weight(state_dict["linear2.weight"]) - fc2_bias = pad_weight(state_dict["linear2.bias"]) - - fc3_weight = pad_weight(state_dict["linear3.weight"]) - fc3_bias = pad_weight(state_dict["linear3.bias"]) - - # Get shapes - fc1_weight_shape = fc1_weight.shape - fc2_weight_shape = fc2_weight.shape - fc3_weight_shape = fc3_weight.shape - - # Tilize params - fc1_weight = tilize_to_list(fc1_weight) - fc1_bias = tilize_to_list(fc1_bias) - - fc2_weight = tilize_to_list(fc2_weight) - fc2_bias = tilize_to_list(fc2_bias) - - fc3_weight = tilize_to_list(fc3_weight) - fc3_bias = tilize_to_list(fc3_bias) - - # self.lin1 = TtLinear(*fc1_weight_shape[-2:], fc1_weight, fc1_bias, device) - # self.lin2 = TtLinear(*fc2_weight_shape[-2:], fc2_weight, fc2_bias, device) - # self.lin3 = TtLinear(*fc3_weight_shape[-2:], fc3_weight, fc3_bias, device) - - self.lin1 = TtLinear(800, 256, fc1_weight, fc1_bias, device) - self.lin2 = TtLinear(256, 256, fc2_weight, fc2_bias, device) - self.lin3 = TtLinear(256, 32, fc3_weight, fc3_bias, device) - - # Extract batch norm params from state dict - gamma1 = pad_weight(state_dict["batchnorm1d_1.weight"]) - beta1 = pad_weight(state_dict["batchnorm1d_1.bias"]) - - gamma2 = pad_weight(state_dict["batchnorm1d_2.weight"]) - beta2 = pad_weight(state_dict["batchnorm1d_2.bias"]) - - gamma3 = pad_weight(state_dict["batchnorm1d_3.weight"]) - beta3 = pad_weight(state_dict["batchnorm1d_3.bias"]) - - # Get shapes - - gamma1_shape = gamma1.shape[3] - gamma2_shape = gamma2.shape[3] - gamma3_shape = gamma3.shape[3] - - # Tilize params - gamma1 = tilize_to_list(gamma1) - beta1 = tilize_to_list(beta1) - - gamma2 = tilize_to_list(gamma2) - beta2 = tilize_to_list(beta2) - - gamma3 = tilize_to_list(gamma3) - beta3 = tilize_to_list(beta3) - - ### running mean and var - running_mean1 = pad_weight(state_dict["batchnorm1d_1.running_mean"]) - running_var1 = pad_weight(state_dict["batchnorm1d_1.running_var"]) - - running_mean2 = pad_weight(state_dict["batchnorm1d_2.running_mean"]) - running_var2 = pad_weight(state_dict["batchnorm1d_2.running_var"]) - - running_mean3 = pad_weight(state_dict["batchnorm1d_3.running_mean"]) - running_var3 = pad_weight(state_dict["batchnorm1d_3.running_var"]) - - # Get shapes - print( - "running mean size before padding:", - state_dict["batchnorm1d_1.running_mean"].shape, - ) - print("running mean size after padding:", running_mean1.shape) - - running_mean1_shape = running_mean1.shape - running_mean2_shape = running_mean2.shape - running_mean3_shape = running_mean3.shape - - # Tilize params - running_mean1 = tilize_to_list(running_mean1) - running_var1 = tilize_to_list(running_var1) - - running_mean2 = tilize_to_list(running_mean2) - running_var2 = tilize_to_list(running_var2) - - running_mean3 = tilize_to_list(running_mean3) - running_var3 = tilize_to_list(running_var3) - - ### defining batch norms - self.batchnorm1d_1 = batchnorm1d_inference( - gamma1, beta1, running_mean1, running_var1, eps, gamma1_shape, device - ) - self.batchnorm1d_2 = batchnorm1d_inference( - gamma2, beta2, running_mean2, running_var2, eps, gamma2_shape, device - ) - self.batchnorm1d_3 = batchnorm1d_inference( - gamma3, beta3, running_mean3, running_var3, eps, gamma3_shape, device - ) - - self.TtRelu = ttnn.relu - - # tt forwrd - def forward(self, X): - x, labels = X - - # Flatten tensor - x = x.view(x.shape[0], -1) - - # Pad to tile - x = pad_activation(x) - x_ = tilize_to_list(x) - - # x is a pytorch tensor,... need to convert to a buda tensor - inp = ttnn.Tensor(x_, x.shape, ttnn.bfloat16, ttnn.TILE_LAYOUT, device) - # breakpoint() - lin1_out = self.lin1(inp) - bn1_out = self.batchnorm1d_1(lin1_out) - relu1_out = self.TtRelu(lin1_out) - - lin2_out = self.lin2(relu1_out) - bn2_out = self.batchnorm1d_2(lin2_out) - relu2_out = self.TtRelu(lin2_out) - - lin3_out = self.lin3(relu2_out) - bn3_out = self.batchnorm1d_3(lin3_out) - relu3_out = self.TtRelu(lin3_out) - - # Softmax on CPU - lin3_out_cpu = relu3_out.cpu() - - # Make pytorch tensor... since we had to pad the output, we need - # to only retrieve the 10 values that represent actual classes - lin3_out_cpu_pytorch = torch.Tensor(lin3_out_cpu.to_torch())[:, 0, 0, :10] - out = nn.functional.softmax(lin3_out_cpu_pytorch) - - return out - - -class PytorchMnistModel(nn.Module): - def __init__(self, input_dim, hidden_dim, output_dim, state_dict): - super(PytorchMnistModel, self).__init__() - - self.linear1 = nn.Linear(input_dim, hidden_dim) - self.batchnorm1d_1 = nn.BatchNorm1d(hidden_dim) - self.relu1 = nn.ReLU() - - self.linear2 = nn.Linear(hidden_dim, hidden_dim) - self.batchnorm1d_2 = nn.BatchNorm1d(hidden_dim) - self.relu2 = nn.ReLU() - - self.linear3 = nn.Linear(hidden_dim, output_dim) - self.batchnorm1d_3 = nn.BatchNorm1d(output_dim) - self.relu3 = nn.ReLU() - - self.load_state_dict(state_dict) - - def forward(self, X): - x, labels = X - x = x.view(x.shape[0], -1) - - lin1_out = self.linear1(x) - bn1_out = self.batchnorm1d_1(lin1_out) - relu1_out = self.relu1(bn1_out) - - lin2_out = self.linear2(relu1_out) - bn2_out = self.batchnorm1d_2(lin2_out) - relu2_out = self.relu1(bn2_out) - - lin3_out = self.linear3(relu2_out) - bn3_out = self.batchnorm1d_3(lin3_out) - relu3_out = self.relu3(bn3_out) - - out = nn.functional.softmax(relu3_out) - - return out - - -def run_mnist_inference(): - # Data preprocessing/loading - transform = transforms.Compose([transforms.ToTensor()]) - test_dataset = datasets.MNIST(root="data", train=False, transform=transform, download=True) - dataloader = DataLoader(test_dataset, batch_size=batch_size) - - # Trained to 63% accuracy - state_dict = torch.load(f"{Path(__file__).parent}/lfs/synthetic_grads/bn1d_32.pt") - - tt_mnist_model = TtMnistModel(state_dict) - pytorch_mnist_model = PytorchMnistModel(input_dim, hidden_dim, output_dim, state_dict) - pytorch_mnist_model.eval() - - first_input = next(iter(dataloader)) - - # Run one input through the network - tt_out = tt_mnist_model(first_input) - pytorch_out = pytorch_mnist_model(first_input) - - print("tt_out:", tt_out) - print("pytorch_out:", pytorch_out) - # assert (tt_out.topk(10).indices == pytorch_out.topk(10).indices).all(), "The outputs from device and pytorch must have the same topk indices" - print("tt out topk:", tt_out.topk(10)) - print("pytorch out topk:", pytorch_out.topk(10)) - - # Check that the scale of each output is the same - tt_out_oom = get_oom_of_float(tt_out.tolist()[0]) - pytorch_out_oom = get_oom_of_float(pytorch_out.tolist()[0]) - - close_or_far = is_close(pytorch_out, tt_out) - print("close or far?", close_or_far) - # breakpoint() - # assert tt_out_oom == pytorch_out_oom, "The order of magnitudes of the outputs must be the same" - - -def test_run_mnist_inference(device): - run_mnist_inference() diff --git a/scripts/docker/requirements-20.04.txt b/scripts/docker/requirements-20.04.txt index d5401cc5bfe..fc6895f2dd8 100644 --- a/scripts/docker/requirements-20.04.txt +++ b/scripts/docker/requirements-20.04.txt @@ -3,7 +3,6 @@ dialog software-properties-common=0.99.9.12 build-essential=12.8ubuntu1.1 git -git-lfs pandoc libtbb-dev libcapstone-dev diff --git a/scripts/docker/requirements-22.04.txt b/scripts/docker/requirements-22.04.txt index 3800f75f030..6038b4e8f6a 100644 --- a/scripts/docker/requirements-22.04.txt +++ b/scripts/docker/requirements-22.04.txt @@ -4,7 +4,6 @@ build-essential gcc-12 g++-12 git -git-lfs pandoc libtbb-dev libcapstone-dev diff --git a/tt-train/.github/workflows/builld_and_test_all.yaml b/tt-train/.github/workflows/builld_and_test_all.yaml index 57f73d62cea..feafd8fd8c6 100644 --- a/tt-train/.github/workflows/builld_and_test_all.yaml +++ b/tt-train/.github/workflows/builld_and_test_all.yaml @@ -18,11 +18,7 @@ jobs: - uses: actions/checkout@v4 with: submodules: recursive - lfs: true - - - name: LFS pull - run: | - git submodule foreach --recursive git lfs pull + lfs: false # actions/checkout runs `git clean -ffdx && git reset --hard HEAD` before fetching # but `build`, `build_Release`, `built` (contains compiled kernels) dirs are not removed because they are in .gitignore diff --git a/tt-train/init_repo.sh b/tt-train/init_repo.sh index 80737f089f8..ec660ee9ca7 100755 --- a/tt-train/init_repo.sh +++ b/tt-train/init_repo.sh @@ -3,7 +3,6 @@ # SPDX-License-Identifier: Apache-2.0 # Description: Initialize the repository with the necessary configurations -git lfs install sudo apt install clang-tidy-17 sudo apt install clang-format-17 sudo ln -sf /usr/bin/clang-tidy-17 /usr/bin/clang-tidy diff --git a/tt_metal/third_party/lfs b/tt_metal/third_party/lfs deleted file mode 160000 index e82667eb9bd..00000000000 --- a/tt_metal/third_party/lfs +++ /dev/null @@ -1 +0,0 @@ -Subproject commit e82667eb9bdd42bcd9a0fe256e0081563624772a