Skip to content

Commit 36dc78b

Browse files
authored
Merge branch 'main' into nlp/cache-start-pos
2 parents 1a3af81 + 3ce9c8e commit 36dc78b

File tree

2 files changed

+34
-44
lines changed

2 files changed

+34
-44
lines changed

Diff for: .github/workflows/run-readme-pr-linuxaarch64.yml

+32-42
Original file line numberDiff line numberDiff line change
@@ -9,22 +9,20 @@ on:
99

1010
jobs:
1111
test-readme-cpu:
12-
uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
12+
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
13+
permissions:
14+
id-token: write
15+
contents: read
1316
with:
14-
runner: linux-aarch64
15-
gpu-arch-type: cuda
16-
gpu-arch-version: "12.1"
17+
runner: linux.arm64.2xlarge
18+
docker-image: "pytorch/manylinux2_28_aarch64-builder:cpu-aarch64-main"
19+
gpu-arch-type: cpu-aarch64
1720
timeout: 60
1821
script: |
1922
echo "::group::Print machine info"
2023
uname -a
2124
echo "::endgroup::"
2225
23-
echo "::group::Install newer objcopy that supports --set-section-alignment"
24-
yum install -y devtoolset-10-binutils
25-
export PATH=/opt/rh/devtoolset-10/root/usr/bin/:$PATH
26-
echo "::endgroup::"
27-
2826
TORCHCHAT_DEVICE=cpu .ci/scripts/run-docs readme
2927
3028
echo "::group::Completion"
@@ -33,41 +31,37 @@ jobs:
3331
echo "::endgroup::"
3432
3533
test-quantization-cpu:
36-
uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
34+
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
35+
permissions:
36+
id-token: write
37+
contents: read
3738
with:
38-
runner: linux-aarch64
39-
gpu-arch-type: cuda
40-
gpu-arch-version: "12.1"
39+
runner: linux.arm64.2xlarge
40+
docker-image: "pytorch/manylinux2_28_aarch64-builder:cpu-aarch64-main"
41+
gpu-arch-type: cpu-aarch64
4142
timeout: 60
4243
script: |
4344
echo "::group::Print machine info"
4445
uname -a
4546
echo "::endgroup::"
4647
47-
echo "::group::Install newer objcopy that supports --set-section-alignment"
48-
yum install -y devtoolset-10-binutils
49-
export PATH=/opt/rh/devtoolset-10/root/usr/bin/:$PATH
50-
echo "::endgroup::"
51-
5248
TORCHCHAT_DEVICE=cpu .ci/scripts/run-docs quantization
5349
5450
test-gguf-cpu:
5551
uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
52+
permissions:
53+
id-token: write
54+
contents: read
5655
with:
57-
runner: linux-aarch64
58-
gpu-arch-type: cuda
59-
gpu-arch-version: "12.1"
56+
runner: linux.arm64.2xlarge
57+
docker-image: "pytorch/manylinux2_28_aarch64-builder:cpu-aarch64-main"
58+
gpu-arch-type: cpu-aarch64
6059
timeout: 60
6160
script: |
6261
echo "::group::Print machine info"
6362
uname -a
6463
echo "::endgroup::"
6564
66-
echo "::group::Install newer objcopy that supports --set-section-alignment"
67-
yum install -y devtoolset-10-binutils
68-
export PATH=/opt/rh/devtoolset-10/root/usr/bin/:$PATH
69-
echo "::endgroup::"
70-
7165
TORCHCHAT_DEVICE=cpu .ci/scripts/run-docs gguf
7266
7367
echo "::group::Completion"
@@ -77,21 +71,19 @@ jobs:
7771
7872
test-advanced-cpu:
7973
uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
74+
permissions:
75+
id-token: write
76+
contents: read
8077
with:
81-
runner: linux-aarch64
82-
gpu-arch-type: cuda
83-
gpu-arch-version: "12.1"
78+
runner: linux.arm64.2xlarge
79+
docker-image: "pytorch/manylinux2_28_aarch64-builder:cpu-aarch64-main"
80+
gpu-arch-type: cpu-aarch64
8481
timeout: 60
8582
script: |
8683
echo "::group::Print machine info"
8784
uname -a
8885
echo "::endgroup::"
8986
90-
echo "::group::Install newer objcopy that supports --set-section-alignment"
91-
yum install -y devtoolset-10-binutils
92-
export PATH=/opt/rh/devtoolset-10/root/usr/bin/:$PATH
93-
echo "::endgroup::"
94-
9587
TORCHCHAT_DEVICE=cpu .ci/scripts/run-docs advanced
9688
9789
echo "::group::Completion"
@@ -101,21 +93,19 @@ jobs:
10193
10294
test-evaluation-cpu:
10395
uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
96+
permissions:
97+
id-token: write
98+
contents: read
10499
with:
105-
runner: linux-aarch64
106-
gpu-arch-type: cuda
107-
gpu-arch-version: "12.1"
100+
runner: linux.arm64.2xlarge
101+
docker-image: "pytorch/manylinux2_28_aarch64-builder:cpu-aarch64-main"
102+
gpu-arch-type: cpu-aarch64
108103
timeout: 60
109104
script: |
110105
echo "::group::Print machine info"
111106
uname -a
112107
echo "::endgroup::"
113108
114-
echo "::group::Install newer objcopy that supports --set-section-alignment"
115-
yum install -y devtoolset-10-binutils
116-
export PATH=/opt/rh/devtoolset-10/root/usr/bin/:$PATH
117-
echo "::endgroup::"
118-
119109
TORCHCHAT_DEVICE=cpu .ci/scripts/run-docs evaluation
120110
121111
echo "::group::Completion"

Diff for: torchchat/generate.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -685,7 +685,7 @@ def generate(
685685
sequential_prefill=True,
686686
callback=lambda x: x,
687687
max_seq_length: int,
688-
attention_backend: str = "math",
688+
attention_backend: SDPBackend = torch.nn.attention.SDPBackend.MATH,
689689
seed: Optional[int] = None,
690690
**sampling_kwargs,
691691
) -> torch.Tensor:
@@ -1126,7 +1126,7 @@ def chat(
11261126
messages_to_encode.append(
11271127
{"role": "system", "content": self.system_prompt}
11281128
)
1129-
messages_to_encode.append({"role": "system", "content": prompt})
1129+
messages_to_encode.append({"role": "user", "content": prompt})
11301130
encoded = self.chat_formatter.encode_dialog_prompt(
11311131
messages_to_encode, add_generation_prompt=True,
11321132
)

0 commit comments

Comments
 (0)