From 3ee6a0f92c2cfad1e77c9a31d35604e8affdf6f9 Mon Sep 17 00:00:00 2001 From: Michael Gschwind <61328285+mikekgfb@users.noreply.github.com> Date: Fri, 31 Jan 2025 00:32:54 -0800 Subject: [PATCH 1/4] Update pull.yml aoti on gpu with all data types. (Might need some tweaking on the python to aoti code transition?) --- .github/workflows/pull.yml | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/.github/workflows/pull.yml b/.github/workflows/pull.yml index 5dbafee9f..632770450 100644 --- a/.github/workflows/pull.yml +++ b/.github/workflows/pull.yml @@ -291,6 +291,16 @@ jobs: bash .ci/scripts/validate.sh "./checkpoints/${REPO_NAME}/model.pth" "cuda" "aoti-bfloat16" echo "::endgroup::" + echo "::group::Run inference with quantize file" + if [ $(uname -s) != Darwin ]; then + python3 torchchat.py generate --quantize torchchat/quant_config/cuda.json --dtype bfloat16 --checkpoint "./checkpoints/${REPO_NAME}/model.pth" + + python3 torchchat.py export --output-aoti-package-path /tmp/model.pt2 --quantize torchchat/quant_config/cuda.json --dtype bfloat16 --checkpoint "./checkpoints/${REPO_NAME}/model.pth" + python3 torchchat.py generate --aoti-package-path /tmp/model.pt2 --dtype bfloat16--checkpoint "./checkpoints/${REPO_NAME}/model.pth"~ + + fi + echo "::endgroup::" + test-gpu-aoti-float32: permissions: id-token: write @@ -331,7 +341,11 @@ jobs: echo "::group::Run inference with quantize file" if [ $(uname -s) != Darwin ]; then - python3 torchchat.py generate --quantize torchchat/quant_config/cuda.json --checkpoint "./checkpoints/${REPO_NAME}/model.pth" + python3 torchchat.py generate --quantize torchchat/quant_config/cuda.json --dtype float32 --checkpoint "./checkpoints/${REPO_NAME}/model.pth" + + python3 torchchat.py export --output-aoti-package-path /tmp/model.pt2 --quantize torchchat/quant_config/cuda.json --dtype float32 --checkpoint "./checkpoints/${REPO_NAME}/model.pth" + python3 torchchat.py generate --aoti-package-path /tmp/model.pt2 --dtype float32--checkpoint "./checkpoints/${REPO_NAME}/model.pth"~ + fi echo "::endgroup::" @@ -374,9 +388,9 @@ jobs: echo "::endgroup::" echo "::group::Run inference with quantize file" - if [ $(uname -s) == Darwin ]; then - python3 torchchat.py export --output-aoti-package-path /tmp/model.pt2 --quantize torchchat/quant_config/cuda.json --checkpoint "./checkpoints/${REPO_NAME}/model.pth" - python3 torchchat.py generate --aoti-package-path /tmp/model.pt2 --checkpoint "./checkpoints/${REPO_NAME}/model.pth"~ + if [ $(uname -s) != Darwin ]; then + python3 torchchat.py export --output-aoti-package-path /tmp/model.pt2 --quantize torchchat/quant_config/cuda.json --dtype float16--checkpoint "./checkpoints/${REPO_NAME}/model.pth" + python3 torchchat.py generate --aoti-package-path /tmp/model.pt2 --dtype float16 --checkpoint "./checkpoints/${REPO_NAME}/model.pth"~ fi echo "::endgroup::" From 38e69e84f1bb1800a0c8b6dd249502663d14b95f Mon Sep 17 00:00:00 2001 From: Michael Gschwind <61328285+mikekgfb@users.noreply.github.com> Date: Fri, 31 Jan 2025 09:50:57 -0800 Subject: [PATCH 2/4] Update pull.yml Fixed typos Use gs=32 (padding was apparently disabled, so users will have to get everything "just right". Not the UX I recmmend.) --- .github/workflows/pull.yml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/pull.yml b/.github/workflows/pull.yml index 632770450..e64454c9a 100644 --- a/.github/workflows/pull.yml +++ b/.github/workflows/pull.yml @@ -295,8 +295,8 @@ jobs: if [ $(uname -s) != Darwin ]; then python3 torchchat.py generate --quantize torchchat/quant_config/cuda.json --dtype bfloat16 --checkpoint "./checkpoints/${REPO_NAME}/model.pth" - python3 torchchat.py export --output-aoti-package-path /tmp/model.pt2 --quantize torchchat/quant_config/cuda.json --dtype bfloat16 --checkpoint "./checkpoints/${REPO_NAME}/model.pth" - python3 torchchat.py generate --aoti-package-path /tmp/model.pt2 --dtype bfloat16--checkpoint "./checkpoints/${REPO_NAME}/model.pth"~ + python3 torchchat.py export --output-aoti-package-path /tmp/model.pt2 --quantize torchchat/quant_config/cuda-32.json --dtype bfloat16 --checkpoint "./checkpoints/${REPO_NAME}/model.pth" + python3 torchchat.py generate --aoti-package-path /tmp/model.pt2 --dtype bfloat16 --checkpoint "./checkpoints/${REPO_NAME}/model.pth"~ fi echo "::endgroup::" @@ -341,9 +341,9 @@ jobs: echo "::group::Run inference with quantize file" if [ $(uname -s) != Darwin ]; then - python3 torchchat.py generate --quantize torchchat/quant_config/cuda.json --dtype float32 --checkpoint "./checkpoints/${REPO_NAME}/model.pth" + python3 torchchat.py generate --quantize torchchat/quant_config/cuda-32.json --dtype float32 --checkpoint "./checkpoints/${REPO_NAME}/model.pth" - python3 torchchat.py export --output-aoti-package-path /tmp/model.pt2 --quantize torchchat/quant_config/cuda.json --dtype float32 --checkpoint "./checkpoints/${REPO_NAME}/model.pth" + python3 torchchat.py export --output-aoti-package-path /tmp/model.pt2 --quantize torchchat/quant_config/cuda-32.json --dtype float32 --checkpoint "./checkpoints/${REPO_NAME}/model.pth" python3 torchchat.py generate --aoti-package-path /tmp/model.pt2 --dtype float32--checkpoint "./checkpoints/${REPO_NAME}/model.pth"~ fi @@ -389,7 +389,7 @@ jobs: echo "::group::Run inference with quantize file" if [ $(uname -s) != Darwin ]; then - python3 torchchat.py export --output-aoti-package-path /tmp/model.pt2 --quantize torchchat/quant_config/cuda.json --dtype float16--checkpoint "./checkpoints/${REPO_NAME}/model.pth" + python3 torchchat.py export --output-aoti-package-path /tmp/model.pt2 --quantize torchchat/quant_config/cuda-32.json --dtype float16 --checkpoint "./checkpoints/${REPO_NAME}/model.pth" python3 torchchat.py generate --aoti-package-path /tmp/model.pt2 --dtype float16 --checkpoint "./checkpoints/${REPO_NAME}/model.pth"~ fi echo "::endgroup::" From f7f6dad015b06d54dc1fb215786006ec5347beaa Mon Sep 17 00:00:00 2001 From: Michael Gschwind <61328285+mikekgfb@users.noreply.github.com> Date: Thu, 6 Feb 2025 00:32:11 -0800 Subject: [PATCH 3/4] Update pull.yml --- .github/workflows/pull.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/pull.yml b/.github/workflows/pull.yml index e64454c9a..73381e1b5 100644 --- a/.github/workflows/pull.yml +++ b/.github/workflows/pull.yml @@ -390,7 +390,7 @@ jobs: echo "::group::Run inference with quantize file" if [ $(uname -s) != Darwin ]; then python3 torchchat.py export --output-aoti-package-path /tmp/model.pt2 --quantize torchchat/quant_config/cuda-32.json --dtype float16 --checkpoint "./checkpoints/${REPO_NAME}/model.pth" - python3 torchchat.py generate --aoti-package-path /tmp/model.pt2 --dtype float16 --checkpoint "./checkpoints/${REPO_NAME}/model.pth"~ + python3 torchchat.py generate --aoti-package-path /tmp/model.pt2 --dtype float16 --checkpoint "./checkpoints/${REPO_NAME}/model.pth" fi echo "::endgroup::" From caab0a7647b2159763ef68ab7157f48364f9e5c9 Mon Sep 17 00:00:00 2001 From: Michael Gschwind <61328285+mikekgfb@users.noreply.github.com> Date: Sat, 8 Feb 2025 13:19:55 -0800 Subject: [PATCH 4/4] Update pull.yml fix typo --- .github/workflows/pull.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/pull.yml b/.github/workflows/pull.yml index 73381e1b5..bca312a33 100644 --- a/.github/workflows/pull.yml +++ b/.github/workflows/pull.yml @@ -296,7 +296,7 @@ jobs: python3 torchchat.py generate --quantize torchchat/quant_config/cuda.json --dtype bfloat16 --checkpoint "./checkpoints/${REPO_NAME}/model.pth" python3 torchchat.py export --output-aoti-package-path /tmp/model.pt2 --quantize torchchat/quant_config/cuda-32.json --dtype bfloat16 --checkpoint "./checkpoints/${REPO_NAME}/model.pth" - python3 torchchat.py generate --aoti-package-path /tmp/model.pt2 --dtype bfloat16 --checkpoint "./checkpoints/${REPO_NAME}/model.pth"~ + python3 torchchat.py generate --aoti-package-path /tmp/model.pt2 --dtype bfloat16 --checkpoint "./checkpoints/${REPO_NAME}/model.pth" fi echo "::endgroup::" @@ -344,7 +344,7 @@ jobs: python3 torchchat.py generate --quantize torchchat/quant_config/cuda-32.json --dtype float32 --checkpoint "./checkpoints/${REPO_NAME}/model.pth" python3 torchchat.py export --output-aoti-package-path /tmp/model.pt2 --quantize torchchat/quant_config/cuda-32.json --dtype float32 --checkpoint "./checkpoints/${REPO_NAME}/model.pth" - python3 torchchat.py generate --aoti-package-path /tmp/model.pt2 --dtype float32--checkpoint "./checkpoints/${REPO_NAME}/model.pth"~ + python3 torchchat.py generate --aoti-package-path /tmp/model.pt2 --dtype float32--checkpoint "./checkpoints/${REPO_NAME}/model.pth" fi echo "::endgroup::"