Skip to content

Commit 0bdb35c

Browse files
authored
Merge branch 'main' into int16_linear_support
2 parents 18c9985 + 0329a8a commit 0bdb35c

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

54 files changed

+580
-297
lines changed

.ci/scripts/setup-samsung-linux-deps.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ set -ex
1111

1212
download_ai_lite_core() {
1313
API_BASE="https://soc-developer.semiconductor.samsung.com/api/v1/resource/ai-litecore/download"
14-
API_KEY="kn10SoSY3hkC-9Qny5TqD2mnqVrlupv3krnjLeBt5cY"
14+
API_KEY=$SAMSUNG_AI_LITECORE_KEY
1515

1616
VERSION="0.5"
1717
OS_NAME="Ubuntu 22.04"

.ci/scripts/test-cuda-build.sh

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -9,14 +9,14 @@ set -exu
99

1010
CUDA_VERSION=${1:-"12.6"}
1111

12-
echo "=== Testing ExecutorTorch CUDA ${CUDA_VERSION} Build ==="
12+
echo "=== Testing ExecuTorch CUDA ${CUDA_VERSION} Build ==="
1313

14-
# Function to build and test ExecutorTorch with CUDA support
14+
# Function to build and test ExecuTorch with CUDA support
1515
test_executorch_cuda_build() {
1616
local cuda_version=$1
1717

18-
echo "Building ExecutorTorch with CUDA ${cuda_version} support..."
19-
echo "ExecutorTorch will automatically detect CUDA and install appropriate PyTorch wheel"
18+
echo "Building ExecuTorch with CUDA ${cuda_version} support..."
19+
echo "ExecuTorch will automatically detect CUDA and install appropriate PyTorch wheel"
2020

2121
# Check available resources before starting
2222
echo "=== System Information ==="
@@ -27,11 +27,11 @@ test_executorch_cuda_build() {
2727
nvcc --version || echo "nvcc not found"
2828
nvidia-smi || echo "nvidia-smi not found"
2929

30-
# Set CMAKE_ARGS to enable CUDA build - ExecutorTorch will handle PyTorch installation automatically
30+
# Set CMAKE_ARGS to enable CUDA build - ExecuTorch will handle PyTorch installation automatically
3131
export CMAKE_ARGS="-DEXECUTORCH_BUILD_CUDA=ON"
3232

33-
echo "=== Starting ExecutorTorch Installation ==="
34-
# Install ExecutorTorch with CUDA support with timeout and error handling
33+
echo "=== Starting ExecuTorch Installation ==="
34+
# Install ExecuTorch with CUDA support with timeout and error handling
3535
timeout 5400 ./install_executorch.sh || {
3636
local exit_code=$?
3737
echo "ERROR: install_executorch.sh failed with exit code: $exit_code"
@@ -41,15 +41,15 @@ test_executorch_cuda_build() {
4141
exit $exit_code
4242
}
4343

44-
echo "SUCCESS: ExecutorTorch CUDA build completed"
44+
echo "SUCCESS: ExecuTorch CUDA build completed"
4545

4646
# Verify the installation
47-
echo "=== Verifying ExecutorTorch CUDA Installation ==="
47+
echo "=== Verifying ExecuTorch CUDA Installation ==="
4848

49-
# Test that ExecutorTorch was built successfully
49+
# Test that ExecuTorch was built successfully
5050
python -c "
5151
import executorch
52-
print('SUCCESS: ExecutorTorch imported successfully')
52+
print('SUCCESS: ExecuTorch imported successfully')
5353
"
5454

5555
# Test CUDA availability and show details
@@ -60,7 +60,7 @@ try:
6060
print('INFO: CUDA available:', torch.cuda.is_available())
6161
6262
if torch.cuda.is_available():
63-
print('SUCCESS: CUDA is available for ExecutorTorch')
63+
print('SUCCESS: CUDA is available for ExecuTorch')
6464
print('INFO: CUDA version:', torch.version.cuda)
6565
print('INFO: GPU device count:', torch.cuda.device_count())
6666
print('INFO: Current GPU device:', torch.cuda.current_device())
@@ -74,16 +74,16 @@ try:
7474
print('SUCCESS: CUDA tensor operation completed on device:', z.device)
7575
print('INFO: Result tensor shape:', z.shape)
7676
77-
print('SUCCESS: ExecutorTorch CUDA integration verified')
77+
print('SUCCESS: ExecuTorch CUDA integration verified')
7878
else:
79-
print('WARNING: CUDA not detected, but ExecutorTorch built successfully')
79+
print('WARNING: CUDA not detected, but ExecuTorch built successfully')
8080
exit(1)
8181
except Exception as e:
82-
print('ERROR: ExecutorTorch CUDA test failed:', e)
82+
print('ERROR: ExecuTorch CUDA test failed:', e)
8383
exit(1)
8484
"
8585

86-
echo "SUCCESS: ExecutorTorch CUDA ${cuda_version} build and verification completed successfully"
86+
echo "SUCCESS: ExecuTorch CUDA ${cuda_version} build and verification completed successfully"
8787
}
8888

8989
# Main execution

.github/scripts/propose_ghstack_orig_pr.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,17 @@ def get_pr_stack_from_number(ref: str, repo: Repository) -> List[int]:
8686
return pr_stack
8787

8888

89+
def get_differential_revision(pr, repo: Repository) -> str:
90+
body = repo.get_pull(pr.number).body
91+
matches = re.findall(r"Differential Revision: .*", body)
92+
count = len(matches)
93+
if count == 1:
94+
# If there's more than one Differential Revision, let's just return empty
95+
# so that we can disambiguate manually.
96+
return matches[0]
97+
return ""
98+
99+
89100
def create_prs_for_orig_branch(pr_stack: List[int], repo: Repository):
90101
# For the first PR, we want to merge to `main` branch, and we will update
91102
# as we go through the stack
@@ -100,13 +111,15 @@ def create_prs_for_orig_branch(pr_stack: List[int], repo: Repository):
100111
# The PR we want to create is then "branch_to_merge" <- gh/user/x/orig
101112
# gh/user/x/orig is the clean diff between gh/user/x/base <- gh/user/x/head
102113
orig_branch_merge_head = pr.base.ref.replace("base", "orig")
114+
differential_revision_text = get_differential_revision(pr, repo)
103115
bot_metadata = f"""This PR was created by the merge bot to help merge the original PR into the main branch.
104116
ghstack PR number: https://github.com/pytorch/executorch/pull/{pr.number} by @{pr.user.login}
105117
^ Please use this as the source of truth for the PR details, comments, and reviews
106118
ghstack PR base: https://github.com/pytorch/executorch/tree/{pr.base.ref}
107119
ghstack PR head: https://github.com/pytorch/executorch/tree/{pr.head.ref}
108120
Merge bot PR base: https://github.com/pytorch/executorch/tree/{orig_branch_merge_base}
109121
Merge bot PR head: https://github.com/pytorch/executorch/tree/{orig_branch_merge_head}
122+
{differential_revision_text}
110123
@diff-train-skip-merge"""
111124

112125
existing_orig_pr = repo.get_pulls(

.github/workflows/pull.yml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -900,12 +900,14 @@ jobs:
900900
permissions:
901901
id-token: write
902902
contents: read
903+
secrets: inherit
903904
with:
904905
runner: linux.2xlarge
905906
docker-image: ci-image:executorch-ubuntu-22.04-clang12-android
906907
submodules: 'recursive'
907908
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
908909
timeout: 90
910+
secrets-env: SAMSUNG_AI_LITECORE_KEY
909911
script: |
910912
set -ex
911913
@@ -917,6 +919,7 @@ jobs:
917919
PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool "cmake"
918920
919921
# Setup Samsung SDK (AI Lite Core) and install enn backend
922+
export SAMSUNG_AI_LITECORE_KEY=$SECRET_SAMSUNG_AI_LITECORE_KEY
920923
source .ci/scripts/setup-samsung-linux-deps.sh
921924
922925
# Test models serially

.github/workflows/test-cuda-builds.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ jobs:
2424
strategy:
2525
fail-fast: false
2626
matrix:
27-
cuda-version: ["12.6", "12.8", "12.9"]
27+
cuda-version: ["12.6", "12.8", "13.0"]
2828

2929
name: test-executorch-cuda-build-${{ matrix.cuda-version }}
3030
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main

CONTRIBUTING.md

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -199,8 +199,7 @@ We use [`lintrunner`](https://pypi.org/project/lintrunner/) to help make sure th
199199
code follows our standards. Set it up with:
200200

201201
```
202-
pip install lintrunner==0.12.7
203-
pip install lintrunner-adapters==0.12.4
202+
./install_requirements.sh # (automatically run by install_executorch.sh)
204203
lintrunner init
205204
```
206205

backends/cortex_m/CMakeLists.txt

Lines changed: 2 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ set(CMSIS_NN_LOCAL_PATH
3636
# library is downloaded via FetchContent in the default/regular case.
3737
if(CMSIS_NN_LOCAL_PATH AND EXISTS "${CMSIS_NN_LOCAL_PATH}")
3838
message(STATUS "Using CMSIS-NN from specified path: ${CMSIS_NN_LOCAL_PATH}")
39-
add_subdirectory(${CMSIS_NN_LOCAL_PATH} cmsis_nn_build)
39+
add_subdirectory(${CMSIS_NN_LOCAL_PATH} _deps/cmsis_nn-build)
4040
else()
4141
# Use FetchContent with automatic fallback
4242
message(STATUS "Using CMSIS-NN via FetchContent")
@@ -48,23 +48,7 @@ else()
4848
GIT_SHALLOW TRUE
4949
)
5050

51-
FetchContent_GetProperties(cmsis_nn)
52-
if(NOT cmsis_nn_POPULATED)
53-
FetchContent_Populate(cmsis_nn)
54-
add_subdirectory(${cmsis_nn_SOURCE_DIR} ${cmsis_nn_BINARY_DIR})
55-
endif()
56-
endif()
57-
58-
# Add MVEI define to cmsis-nn target
59-
if(TARGET cmsis-nn)
60-
target_compile_definitions(cmsis-nn PUBLIC ARM_MATH_MVEI=1)
61-
get_target_property(CMSIS_NN_INCLUDES cmsis-nn INTERFACE_INCLUDE_DIRECTORIES)
62-
message(STATUS "CMSIS-NN include dirs: ${CMSIS_NN_INCLUDES}")
63-
else()
64-
message(
65-
FATAL_ERROR
66-
"CMSIS-NN target not found. Check your CMSIS_NN_LOCAL_PATH or network connection."
67-
)
51+
FetchContent_MakeAvailable(cmsis_nn)
6852
endif()
6953

7054
# Cortex-M ops kernel sources

backends/cortex_m/test/test_quantize_op_fusion_pass.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -313,7 +313,7 @@ def forward(self, x, y):
313313
# Apply passes
314314
transformed_program = self._apply_passes(edge_program)
315315

316-
# Generate ExecutorTorch program
316+
# Generate ExecuTorch program
317317
executorch_program = transformed_program.to_executorch()
318318

319319
# Verify the program contains the expected fused operator

backends/nxp/backend/edge_program_converter.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -134,6 +134,7 @@ def _process_nodes(self, nodes: list[Node], conversion_context: ConversionContex
134134

135135
qdq_related_functions = [
136136
exir_ops.edge.quantized_decomposed.dequantize_per_tensor.default,
137+
exir_ops.edge.quantized_decomposed.dequantize_per_channel.default,
137138
exir_ops.edge.quantized_decomposed.quantize_per_tensor.default,
138139
]
139140

@@ -203,7 +204,8 @@ def _convert_qdq_cluster_q_dq_nodes(
203204
:param conversion_context: ConversionContext instance.
204205
"""
205206
qdq_q_ops_converters = {
206-
exir_ops.edge.quantized_decomposed.dequantize_per_tensor.default: QDQDequantizeConverter, # noqa F405
207+
exir_ops.edge.quantized_decomposed.dequantize_per_tensor.default: QDQPerTensorDequantizeConverter, # noqa F405
208+
exir_ops.edge.quantized_decomposed.dequantize_per_channel.default: QDQPerChannelDequantizeConverter, # noqa F405
207209
exir_ops.edge.quantized_decomposed.quantize_per_tensor.default: QDQQuantizeConverter, # noqa F405
208210
}
209211

backends/nxp/backend/ir/converter/node_converters/ops_converters/__init__.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,8 @@
4141
PermuteCopyConverter,
4242
)
4343
from executorch.backends.nxp.backend.ir.converter.node_converters.ops_converters.qdq_dequantize_converter import (
44-
QDQDequantizeConverter,
44+
QDQPerChannelDequantizeConverter,
45+
QDQPerTensorDequantizeConverter,
4546
)
4647
from executorch.backends.nxp.backend.ir.converter.node_converters.ops_converters.qdq_quantize_converter import (
4748
QDQQuantizeConverter,
@@ -70,7 +71,8 @@
7071
"PermuteCopyConverter",
7172
"SoftmaxConverter",
7273
"ViewCopyConverter",
73-
"QDQDequantizeConverter",
74+
"QDQPerTensorDequantizeConverter",
75+
"QDQPerChannelDequantizeConverter",
7476
"QDQQuantizeConverter",
7577
"ConstantPadNDConverter",
7678
"ReLUConverter",

0 commit comments

Comments
 (0)