diff --git a/.ci/docker/ci_commit_pins/pytorch.txt b/.ci/docker/ci_commit_pins/pytorch.txt
index 44632703e32..ccee7739dc6 100644
--- a/.ci/docker/ci_commit_pins/pytorch.txt
+++ b/.ci/docker/ci_commit_pins/pytorch.txt
@@ -1 +1 @@
-0a038cf0cff2d071b7359ac0491fd2ba7798a438
+b1984237a0fb32b760c1b84d6d02d2f0f7ed293b
diff --git a/.ci/docker/requirements-ci.txt b/.ci/docker/requirements-ci.txt
index e6cd356e0e0..3a0cd57ddb5 100644
--- a/.ci/docker/requirements-ci.txt
+++ b/.ci/docker/requirements-ci.txt
@@ -6,7 +6,7 @@ sympy==1.12
timm==0.6.13
tomli==2.0.1
torchsr==1.0.4
-transformers==4.36.0
+transformers==4.38.0
zstd==1.5.5.1
pytest==7.2.0
pytest-cov==4.1.0
diff --git a/.ci/scripts/build_llama_android.sh b/.ci/scripts/build_llama_android.sh
index 04a9ee227da..eb1221620c1 100644
--- a/.ci/scripts/build_llama_android.sh
+++ b/.ci/scripts/build_llama_android.sh
@@ -26,6 +26,7 @@ install_executorch_and_backend_lib() {
-DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
-DEXECUTORCH_BUILD_XNNPACK=ON \
-DEXECUTORCH_BUILD_OPTIMIZED=ON \
+ -DEXECUTORCH_BUILD_QUANTIZED=ON \
-DXNNPACK_ENABLE_ARM_BF16=OFF \
-Bcmake-android-out .
diff --git a/.ci/scripts/test.sh b/.ci/scripts/test.sh
index 2d915506158..c29c09dc63d 100755
--- a/.ci/scripts/test.sh
+++ b/.ci/scripts/test.sh
@@ -37,7 +37,7 @@ build_cmake_executor_runner() {
(rm -rf ${CMAKE_OUTPUT_DIR} \
&& mkdir ${CMAKE_OUTPUT_DIR} \
&& cd ${CMAKE_OUTPUT_DIR} \
- && retry cmake -DBUCK2=buck2 -DCMAKE_BUILD_TYPE=Release \
+ && retry cmake -DCMAKE_BUILD_TYPE=Release \
-DPYTHON_EXECUTABLE="$PYTHON_EXECUTABLE" ..)
cmake --build ${CMAKE_OUTPUT_DIR} -j4
@@ -84,8 +84,7 @@ build_cmake_xnn_executor_runner() {
(rm -rf ${CMAKE_OUTPUT_DIR} \
&& mkdir ${CMAKE_OUTPUT_DIR} \
&& cd ${CMAKE_OUTPUT_DIR} \
- && retry cmake -DBUCK2=buck2 \
- -DCMAKE_BUILD_TYPE=Release \
+ && retry cmake -DCMAKE_BUILD_TYPE=Release \
-DEXECUTORCH_BUILD_XNNPACK=ON \
-DCMAKE_PREFIX_PATH="$CMAKE_PREFIX_PATH" \
-DPYTHON_EXECUTABLE="$PYTHON_EXECUTABLE" ..)
diff --git a/.ci/scripts/test_llama.sh b/.ci/scripts/test_llama.sh
index 90ea13281ba..94528613e33 100644
--- a/.ci/scripts/test_llama.sh
+++ b/.ci/scripts/test_llama.sh
@@ -12,7 +12,7 @@ source "$(dirname "${BASH_SOURCE[0]}")/utils.sh"
MODEL_NAME=$1 # stories110M.pt
BUILD_TOOL=$2 # buck2 or cmake
DTYPE=$3 # fp16 or fp32
-MODE=${4:-"xnnpack"} # portable or xnnpack
+MODE=${4:-"xnnpack+custom"} # portable or xnnpack+custom or xnnpack+custom+qe
if [[ $# -lt 4 ]]; then # Assuming 4 mandatory args
echo "Expecting atleast 4 positional arguments"
echo "Usage: [...]"
@@ -37,6 +37,24 @@ if [[ -z "${MODE:-}" ]]; then
exit 1
fi
+if [[ "${MODE}" =~ .*xnnpack.* ]]; then
+ XNNPACK=ON
+else
+ XNNPACK=OFF
+fi
+
+if [[ "${MODE}" =~ .*custom.* ]]; then
+ CUSTOM=ON
+else
+ CUSTOM=OFF
+fi
+
+if [[ "${MODE}" =~ .*qe.* ]]; then
+ QE=ON
+else
+ QE=OFF
+fi
+
if [[ -z "${BUCK:-}" ]]; then
BUCK=buck2
fi
@@ -47,25 +65,21 @@ fi
which "${PYTHON_EXECUTABLE}"
-
cmake_install_executorch_libraries() {
echo "Installing libexecutorch.a, libextension_module.so, libportable_ops_lib.a"
rm -rf cmake-out
- if [[ "${MODE}" == "xnnpack" ]]; then
- XNNPACK=ON
- else
- XNNPACK=OFF
- fi
retry cmake -DBUCK2="$BUCK" \
-DCMAKE_INSTALL_PREFIX=cmake-out \
- -DCMAKE_BUILD_TYPE=Release \
+ -DCMAKE_BUILD_TYPE=Debug \
-DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
-DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
+ -DEXECUTORCH_BUILD_CUSTOM="$CUSTOM" \
-DEXECUTORCH_BUILD_OPTIMIZED=ON \
+ -DEXECUTORCH_BUILD_QUANTIZED=ON \
-DEXECUTORCH_BUILD_XNNPACK="$XNNPACK" \
-DPYTHON_EXECUTABLE="$PYTHON_EXECUTABLE" \
-Bcmake-out .
- cmake --build cmake-out -j9 --target install --config Release
+ cmake --build cmake-out -j9 --target install --config Debug
}
cmake_build_llama_runner() {
@@ -73,12 +87,14 @@ cmake_build_llama_runner() {
dir="examples/models/llama2"
retry cmake -DBUCK2="$BUCK" \
-DCMAKE_INSTALL_PREFIX=cmake-out \
- -DCMAKE_BUILD_TYPE=Release \
+ -DCMAKE_BUILD_TYPE=Debug \
+ -DEXECUTORCH_BUILD_CUSTOM="$CUSTOM" \
-DEXECUTORCH_BUILD_OPTIMIZED=ON \
+ -DEXECUTORCH_BUILD_XNNPACK="$XNNPACK" \
-DPYTHON_EXECUTABLE="$PYTHON_EXECUTABLE" \
-Bcmake-out/${dir} \
${dir}
- cmake --build cmake-out/${dir} -j9 --config Release
+ cmake --build cmake-out/${dir} -j9 --config Debug
}
@@ -116,10 +132,17 @@ fi
# Export model.
EXPORTED_MODEL_NAME="${EXPORTED_MODEL_NAME}.pte"
echo "Exporting ${EXPORTED_MODEL_NAME}"
-EXPORT_ARGS="-c stories110M.pt -p ${PARAMS} -d ${DTYPE} -n ${EXPORTED_MODEL_NAME}"
-if [[ "${MODE}" == "xnnpack" ]]; then
- EXPORT_ARGS="${EXPORT_ARGS} -kv --use_sdpa_with_kv_cache -X -qmode 8da4w -G 128"
+EXPORT_ARGS="-c stories110M.pt -p ${PARAMS} -d ${DTYPE} -n ${EXPORTED_MODEL_NAME} -kv"
+if [[ "${XNNPACK}" == "ON" ]]; then
+ EXPORT_ARGS="${EXPORT_ARGS} -X -qmode 8da4w -G 128"
+fi
+if [[ "${CUSTOM}" == "ON" ]]; then
+ EXPORT_ARGS="${EXPORT_ARGS} --use_sdpa_with_kv_cache"
+fi
+if [[ "${QE}" == "ON" ]]; then
+ EXPORT_ARGS="${EXPORT_ARGS} --embedding-quantize 8,1024"
fi
+# Add dynamically linked library location
$PYTHON_EXECUTABLE -m examples.models.llama2.export_llama ${EXPORT_ARGS}
# Create tokenizer.bin.
diff --git a/.ci/scripts/test_quantized_aot_lib.sh b/.ci/scripts/test_quantized_aot_lib.sh
index ed9c789c5e4..610144f80d2 100755
--- a/.ci/scripts/test_quantized_aot_lib.sh
+++ b/.ci/scripts/test_quantized_aot_lib.sh
@@ -21,10 +21,9 @@ build_cmake_quantized_aot_lib() {
(rm -rf ${CMAKE_OUTPUT_DIR} \
&& mkdir ${CMAKE_OUTPUT_DIR} \
&& cd ${CMAKE_OUTPUT_DIR} \
- && retry cmake -DBUCK2=buck2 \
- -DCMAKE_BUILD_TYPE=Release \
+ && retry cmake -DCMAKE_BUILD_TYPE=Release \
-DCMAKE_PREFIX_PATH="$CMAKE_PREFIX_PATH" \
- -DEXECUTORCH_BUILD_QUANTIZED=ON \
+ -DEXECUTORCH_BUILD_QUANTIZED_OPS_AOT=ON \
-DPYTHON_EXECUTABLE="$PYTHON_EXECUTABLE" ..)
cmake --build ${CMAKE_OUTPUT_DIR} -j4
diff --git a/.ci/scripts/utils.sh b/.ci/scripts/utils.sh
index c7c00be2574..f0675f56cc7 100644
--- a/.ci/scripts/utils.sh
+++ b/.ci/scripts/utils.sh
@@ -99,7 +99,7 @@ build_executorch_runner_cmake() {
pushd "${CMAKE_OUTPUT_DIR}" || return
# This command uses buck2 to gather source files and buck2 could crash flakily
# on MacOS
- retry cmake -DBUCK2=buck2 -DPYTHON_EXECUTABLE="${PYTHON_EXECUTABLE}" -DCMAKE_BUILD_TYPE=Release ..
+ retry cmake -DPYTHON_EXECUTABLE="${PYTHON_EXECUTABLE}" -DCMAKE_BUILD_TYPE=Release ..
popd || return
if [ "$(uname)" == "Darwin" ]; then
diff --git a/.clang-format b/.clang-format
index 31a13c408fc..8ec7b569e24 100644
--- a/.clang-format
+++ b/.clang-format
@@ -2,22 +2,55 @@
Language: Cpp
AccessModifierOffset: -1
AlignAfterOpenBracket: AlwaysBreak
-AlignConsecutiveMacros: None
-AlignConsecutiveAssignments: None
-AlignConsecutiveBitFields: None
-AlignConsecutiveDeclarations: None
+AlignArrayOfStructures: None
+AlignConsecutiveAssignments:
+ Enabled: false
+ AcrossEmptyLines: false
+ AcrossComments: false
+ AlignCompound: false
+ AlignFunctionPointers: false
+ PadOperators: true
+AlignConsecutiveBitFields:
+ Enabled: false
+ AcrossEmptyLines: false
+ AcrossComments: false
+ AlignCompound: false
+ AlignFunctionPointers: false
+ PadOperators: true
+AlignConsecutiveDeclarations:
+ Enabled: false
+ AcrossEmptyLines: false
+ AcrossComments: false
+ AlignCompound: false
+ AlignFunctionPointers: false
+ PadOperators: true
+AlignConsecutiveMacros:
+ Enabled: false
+ AcrossEmptyLines: false
+ AcrossComments: false
+ AlignCompound: false
+ AlignFunctionPointers: false
+ PadOperators: true
+AlignConsecutiveShortCaseStatements:
+ Enabled: false
+ AcrossEmptyLines: false
+ AcrossComments: false
+ AlignCaseColons: false
AlignEscapedNewlines: Left
AlignOperands: DontAlign
-AlignTrailingComments: false
+AlignTrailingComments:
+ Kind: Never
+ OverEmptyLines: 0
AllowAllArgumentsOnNextLine: true
-AllowAllConstructorInitializersOnNextLine: true
AllowAllParametersOfDeclarationOnNextLine: false
-AllowShortEnumsOnASingleLine: true
+AllowBreakBeforeNoexceptSpecifier: Never
AllowShortBlocksOnASingleLine: Never
AllowShortCaseLabelsOnASingleLine: false
+AllowShortCompoundRequirementOnASingleLine: true
+AllowShortEnumsOnASingleLine: true
AllowShortFunctionsOnASingleLine: Empty
-AllowShortLambdasOnASingleLine: All
AllowShortIfStatementsOnASingleLine: Never
+AllowShortLambdasOnASingleLine: All
AllowShortLoopsOnASingleLine: false
AlwaysBreakAfterDefinitionReturnType: None
AlwaysBreakAfterReturnType: None
@@ -27,17 +60,18 @@ AttributeMacros:
- __capability
BinPackArguments: false
BinPackParameters: false
+BitFieldColonSpacing: Both
BraceWrapping:
AfterCaseLabel: false
AfterClass: false
AfterControlStatement: Never
AfterEnum: false
+ AfterExternBlock: false
AfterFunction: false
AfterNamespace: false
AfterObjCDeclaration: false
AfterStruct: false
AfterUnion: false
- AfterExternBlock: false
BeforeCatch: false
BeforeElse: false
BeforeLambdaBody: false
@@ -46,26 +80,27 @@ BraceWrapping:
SplitEmptyFunction: true
SplitEmptyRecord: true
SplitEmptyNamespace: true
+BreakAdjacentStringLiterals: true
+BreakAfterAttributes: Leave
+BreakAfterJavaFieldAnnotations: false
+BreakArrays: true
BreakBeforeBinaryOperators: None
-BreakBeforeConceptDeclarations: true
+BreakBeforeConceptDeclarations: Always
BreakBeforeBraces: Attach
-BreakBeforeInheritanceComma: false
-BreakInheritanceList: BeforeColon
+BreakBeforeInlineASMColon: OnlyMultiline
BreakBeforeTernaryOperators: true
-BreakConstructorInitializersBeforeComma: false
BreakConstructorInitializers: BeforeColon
-BreakAfterJavaFieldAnnotations: false
+BreakInheritanceList: BeforeColon
BreakStringLiterals: false
ColumnLimit: 80
CommentPragmas: '^ IWYU pragma:'
CompactNamespaces: false
-ConstructorInitializerAllOnOneLineOrOnePerLine: true
ConstructorInitializerIndentWidth: 4
ContinuationIndentWidth: 4
Cpp11BracedListStyle: true
-DeriveLineEnding: true
DerivePointerAlignment: false
DisableFormat: false
+EmptyLineAfterAccessModifier: Never
EmptyLineBeforeAccessModifier: LogicalBlock
ExperimentalAutoDetectBinPacking: false
FixNamespaceComments: true
@@ -73,8 +108,8 @@ ForEachMacros:
- FOR_EACH
- FOR_EACH_R
- FOR_EACH_RANGE
-StatementAttributeLikeMacros:
- - Q_EMIT
+IfMacros:
+ - KJ_IF_MAYBE
IncludeBlocks: Preserve
IncludeCategories:
- Regex: '^<.*\.h(pp)?>'
@@ -91,18 +126,31 @@ IncludeCategories:
CaseSensitive: false
IncludeIsMainRegex: '(Test)?$'
IncludeIsMainSourceRegex: ''
-IndentCaseLabels: true
+IndentAccessModifiers: false
IndentCaseBlocks: false
+IndentCaseLabels: true
+IndentExternBlock: AfterExternBlock
IndentGotoLabels: true
IndentPPDirectives: None
-IndentExternBlock: AfterExternBlock
-IndentRequires: false
+IndentRequiresClause: true
IndentWidth: 2
IndentWrappedFunctionNames: false
+InsertBraces: false
+InsertNewlineAtEOF: false
InsertTrailingCommas: None
+IntegerLiteralSeparator:
+ Binary: 0
+ BinaryMinDigits: 0
+ Decimal: 0
+ DecimalMinDigits: 0
+ Hex: 0
+ HexMinDigits: 0
JavaScriptQuotes: Leave
JavaScriptWrapImports: true
KeepEmptyLinesAtTheStartOfBlocks: false
+KeepEmptyLinesAtEOF: false
+LambdaBodyIndentation: Signature
+LineEnding: DeriveLF
MacroBlockBegin: ''
MacroBlockEnd: ''
MaxEmptyLinesToKeep: 1
@@ -112,53 +160,85 @@ ObjCBlockIndentWidth: 2
ObjCBreakBeforeNestedBlockParam: true
ObjCSpaceAfterProperty: false
ObjCSpaceBeforeProtocolList: false
+PackConstructorInitializers: NextLine
PenaltyBreakAssignment: 2
PenaltyBreakBeforeFirstCallParameter: 1
PenaltyBreakComment: 300
PenaltyBreakFirstLessLess: 120
+PenaltyBreakOpenParenthesis: 0
+PenaltyBreakScopeResolution: 500
PenaltyBreakString: 1000
PenaltyBreakTemplateDeclaration: 10
PenaltyExcessCharacter: 1000000
-PenaltyReturnTypeOnItsOwnLine: 200
PenaltyIndentedWhitespace: 0
+PenaltyReturnTypeOnItsOwnLine: 200
PointerAlignment: Left
+PPIndentWidth: -1
+QualifierAlignment: Leave
+ReferenceAlignment: Pointer
ReflowComments: true
-SortIncludes: true
+RemoveBracesLLVM: false
+RemoveParentheses: Leave
+RemoveSemicolon: false
+RequiresClausePosition: OwnLine
+RequiresExpressionIndentation: OuterScope
+SeparateDefinitionBlocks: Leave
+ShortNamespaceLines: 1
+SkipMacroDefinitionBody: false
+SortIncludes: CaseSensitive
SortJavaStaticImport: Before
-SortUsingDeclarations: true
+SortUsingDeclarations: LexicographicNumeric
SpaceAfterCStyleCast: false
SpaceAfterLogicalNot: false
SpaceAfterTemplateKeyword: true
+SpaceAroundPointerQualifiers: Default
SpaceBeforeAssignmentOperators: true
SpaceBeforeCaseColon: false
SpaceBeforeCpp11BracedList: false
SpaceBeforeCtorInitializerColon: true
SpaceBeforeInheritanceColon: true
+SpaceBeforeJsonColon: false
SpaceBeforeParens: ControlStatements
-SpaceAroundPointerQualifiers: Default
+SpaceBeforeParensOptions:
+ AfterControlStatements: true
+ AfterForeachMacros: true
+ AfterFunctionDefinitionName: false
+ AfterFunctionDeclarationName: false
+ AfterIfMacros: true
+ AfterOverloadedOperator: false
+ AfterPlacementOperator: true
+ AfterRequiresInClause: false
+ AfterRequiresInExpression: false
+ BeforeNonEmptyParentheses: false
SpaceBeforeRangeBasedForLoopColon: true
+SpaceBeforeSquareBrackets: false
SpaceInEmptyBlock: false
-SpaceInEmptyParentheses: false
SpacesBeforeTrailingComments: 1
-SpacesInAngles: false
-SpacesInConditionalStatement: false
+SpacesInAngles: Never
SpacesInContainerLiterals: true
-SpacesInCStyleCastParentheses: false
-SpacesInParentheses: false
+SpacesInLineCommentPrefix:
+ Minimum: 1
+ Maximum: -1
+SpacesInParens: Never
+SpacesInParensOptions:
+ InCStyleCasts: false
+ InConditionalStatements: false
+ InEmptyParentheses: false
+ Other: false
SpacesInSquareBrackets: false
-SpaceBeforeSquareBrackets: false
-BitFieldColonSpacing: Both
Standard: Latest
+StatementAttributeLikeMacros:
+ - Q_EMIT
StatementMacros:
- Q_UNUSED
- QT_REQUIRE_VERSION
TabWidth: 8
-UseCRLF: false
UseTab: Never
+VerilogBreakBetweenInstancePorts: true
WhitespaceSensitiveMacros:
- - STRINGIZE
- - PP_STRINGIZE
- BOOST_PP_STRINGIZE
- - NS_SWIFT_NAME
- CF_SWIFT_NAME
+ - NS_SWIFT_NAME
+ - PP_STRINGIZE
+ - STRINGIZE
...
diff --git a/.github/dependabot.yml b/.github/dependabot.yml
new file mode 100644
index 00000000000..1a7fef172cd
--- /dev/null
+++ b/.github/dependabot.yml
@@ -0,0 +1,11 @@
+# From https://docs.github.com/en/code-security/dependabot/dependabot-version-updates/configuration-options-for-the-dependabot.yml-file
+version: 2
+updates:
+ - package-ecosystem: "pip"
+ directory: "/"
+ schedule:
+ interval: "weekly"
+ reviewers:
+ - "pytorch/team-executorch"
+ allow:
+ - dependency-name: "torchfix"
diff --git a/.github/workflows/_unittest.yml b/.github/workflows/_unittest.yml
index c36c5861168..219812fdd26 100644
--- a/.github/workflows/_unittest.yml
+++ b/.github/workflows/_unittest.yml
@@ -57,9 +57,6 @@ jobs:
script: |
set -eux
- WORKSPACE=$(pwd)
- pushd "${WORKSPACE}/pytorch/executorch"
-
BUILD_TOOL=${{ matrix.build-tool }}
bash .ci/scripts/setup-conda.sh
@@ -75,5 +72,3 @@ jobs:
${CONDA_RUN} pytest -n auto --cov=./ --cov-report=xml
# Run gtest
${CONDA_RUN} buck2 test runtime/core/... runtime/platform/...
-
- popd
diff --git a/.github/workflows/android.yml b/.github/workflows/android.yml
index 0d8931cf102..fa5cd854cc0 100644
--- a/.github/workflows/android.yml
+++ b/.github/workflows/android.yml
@@ -10,7 +10,8 @@ on:
- .ci/docker/**
- .github/workflows/android.yml
- install_requirements.sh
- - examples/demo-apps/**
+ - examples/demo-apps/android/**
+ - extension/android/**
- extension/module/**
workflow_dispatch:
@@ -33,6 +34,7 @@ jobs:
submodules: 'true'
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
timeout: 90
+ upload-artifact: android-apps
script: |
set -eux
@@ -45,3 +47,62 @@ jobs:
PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh "${BUILD_TOOL}"
# Build Android demo app
bash build/test_android_ci.sh
+
+ mkdir -p artifacts-to-be-uploaded
+ # Copy the app and its test suite to S3
+ cp examples/demo-apps/android/LlamaDemo/app/build/outputs/apk/debug/*.apk artifacts-to-be-uploaded/
+ cp examples/demo-apps/android/LlamaDemo/app/build/outputs/apk/androidTest/debug/*.apk artifacts-to-be-uploaded/
+ # Also copy the share libraries
+ cp cmake-out-android/lib/*.a artifacts-to-be-uploaded/
+
+ # Upload the app and its test suite to S3 so that they can be downloaded by the test job
+ upload-artifacts:
+ needs: test-demo-android
+ runs-on: linux.2xlarge
+ steps:
+ - name: Download the artifacts
+ uses: actions/download-artifact@v3
+ with:
+ # The name here needs to match the name of the upload-artifact parameter
+ name: android-apps
+ path: ${{ runner.temp }}/artifacts/
+
+ - name: Verify the artifacts
+ shell: bash
+ working-directory: ${{ runner.temp }}/artifacts/
+ run: |
+ ls -lah ./
+
+ - name: Upload the artifacts to S3
+ uses: seemethere/upload-artifact-s3@v5
+ with:
+ s3-bucket: gha-artifacts
+ s3-prefix: |
+ ${{ github.repository }}/${{ github.run_id }}/artifact
+ retention-days: 14
+ if-no-files-found: ignore
+ path: ${{ runner.temp }}/artifacts/
+
+ # Let's see how expensive this job is, we might want to tone it down by running it periodically
+ test-llama-app:
+ needs: upload-artifacts
+ permissions:
+ id-token: write
+ contents: read
+ uses: pytorch/test-infra/.github/workflows/mobile_job.yml@main
+ with:
+ device-type: android
+ runner: ubuntu-latest
+ test-infra-ref: ''
+ # This is the ARN of ExecuTorch project on AWS
+ project-arn: arn:aws:devicefarm:us-west-2:308535385114:project:02a2cf0f-6d9b-45ee-ba1a-a086587469e6
+ # This is the custom Android device pool that only includes Samsung Galaxy S2x
+ device-pool-arn: arn:aws:devicefarm:us-west-2:308535385114:devicepool:02a2cf0f-6d9b-45ee-ba1a-a086587469e6/e59f866a-30aa-4aa1-87b7-4510e5820dfa
+ # Uploaded to S3 from the previous job, the name of the app comes from the project itself
+ android-app-archive: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifact/app-debug.apk
+ android-test-archive: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifact/app-debug-androidTest.apk
+ # The test spec can be downloaded from https://ossci-assets.s3.amazonaws.com/android-llama2-device-farm-test-spec.yml
+ test-spec: arn:aws:devicefarm:us-west-2:308535385114:upload:02a2cf0f-6d9b-45ee-ba1a-a086587469e6/abd86868-fa63-467e-a5c7-218194665a77
+ # The exported llama2 model and its tokenizer, can be downloaded from https://ossci-assets.s3.amazonaws.com/executorch-android-llama2-7b.zip.
+ # Among the input, this is the biggest file and uploading it to AWS beforehand makes the test run much faster
+ extra-data: arn:aws:devicefarm:us-west-2:308535385114:upload:02a2cf0f-6d9b-45ee-ba1a-a086587469e6/bd15825b-ddab-4e47-9fef-a9c8935778dd
diff --git a/.github/workflows/apple.yml b/.github/workflows/apple.yml
index 06aa6a66e98..54d019b6764 100644
--- a/.github/workflows/apple.yml
+++ b/.github/workflows/apple.yml
@@ -34,8 +34,6 @@ jobs:
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
timeout: 90
script: |
- WORKSPACE=$(pwd)
- pushd "${WORKSPACE}/pytorch/executorch"
BUILD_TOOL=cmake
.ci/scripts/setup-conda.sh
@@ -48,8 +46,6 @@ jobs:
PYTHON_EXECUTABLE=python ${CONDA_RUN} --no-capture-output \
build/test_ios_ci.sh
- popd
-
build-frameworks-ios:
name: build-frameworks-ios
uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
@@ -61,10 +57,8 @@ jobs:
upload-artifact: executorch-frameworks-ios
timeout: 90
script: |
- WORKSPACE=$(pwd)
- pushd "${WORKSPACE}/pytorch/executorch"
BUILD_TOOL=cmake
- VERSION="0.1.0"
+ VERSION="latest"
FRAMEWORKS=(
"executorch"
"coreml_backend"
@@ -111,8 +105,6 @@ jobs:
zip -r "${RUNNER_TEMP}/artifacts/${FRAMEWORK}_debug-${VERSION}.zip" "${FRAMEWORK}_debug.xcframework"
) done
- popd
-
upload-frameworks-ios:
runs-on: ubuntu-22.04
needs: build-frameworks-ios
diff --git a/.github/workflows/doc-build.yml b/.github/workflows/doc-build.yml
index ee5cfb859b3..ccc852c24fb 100644
--- a/.github/workflows/doc-build.yml
+++ b/.github/workflows/doc-build.yml
@@ -8,6 +8,7 @@ on:
- release/*
tags:
- v[0-9]+.[0-9]+.[0-9]+
+ - v[0-9]+.[0-9]+.[0-9]+-rc[0-9]+
workflow_dispatch:
schedule:
- cron: '0 0 * * *'
@@ -46,13 +47,9 @@ jobs:
# ET_VERSION_DOCS will be pulled during the doc build to add to the version dropdown
# on the website. See docs/source/conf.py for details
- REF_TYPE=${{ github.ref_type }}
- REF_NAME=${{ github.ref_name }}
-
- echo "$REF_TYPE"
- echo "$REF_NAME"
-
- ET_VERSION_DOCS="${REF_NAME}"
+ GITHUB_REF=${{ github.ref }}
+ echo "$GITHUB_REF"
+ export ET_VERSION_DOCS="${GITHUB_REF}"
echo "$ET_VERSION_DOCS"
set -eux
@@ -68,23 +65,21 @@ jobs:
make html
cd ..
+ # If it's main branch, add noindex tag to all .html files to exclude from Google Search indexing.
+ echo "GitHub Ref: ${GITHUB_REF}"
+ if [[ "${{ github.ref }}" == 'refs/heads/main' ]]; then
+ find docs/_build/html/ -name "*.html" -print0 | xargs -0 sed -i '/
/a \ \ ';
+ fi
+
cp -rf docs/_build/html/* "${RUNNER_DOCS_DIR}"
mv docs/_build/html "${RUNNER_ARTIFACT_DIR}"
ls -R "${RUNNER_ARTIFACT_DIR}"/*/*.html
-# Enable preview later. Previews are available publicly
-#
-# upload-preview:
-# if: github.repository == 'pytorch/executorch' && github.event_name == 'push' &&
-# (github.ref_type == 'branch' && github.ref_name == 'main')
-# uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
-
upload-gh-pages:
needs: build
- if: github.repository == 'pytorch/executorch' && github.event_name == 'push' &&
- ((github.ref_type == 'branch' && github.ref_name == 'main') || github.ref_type == 'tag')
+ if: github.repository == 'pytorch/executorch' && github.event_name == 'push' && (github.ref == 'refs/heads/main' || startsWith(github.ref, 'refs/tags/v'))
permissions:
contents: write
uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
@@ -96,22 +91,17 @@ jobs:
script: |
set -euo pipefail
- REF_TYPE=${{ github.ref_type }}
- REF_NAME=${{ github.ref_name }}
-
- # If building for a release tag, branch, set the branch/tag name
- # as the target folder in the gh-pages branch. The artifacts created
- # during the build will be copied over to the target dir in the
- # gh-pages branch.
- if [[ "${REF_TYPE}" == branch ]]; then
- TARGET_FOLDER="${REF_NAME}"
- elif [[ "${REF_TYPE}" == tag ]]; then
- # Strip the leading "v" as well as the trailing patch version and "-rc" suffix.
- # For example: 'v0.1.2' -> '0.1' and 'v0.1.2-rc1' -> 0.1.
- TARGET_FOLDER=$(echo "${REF_NAME}" | sed 's/^v//i; s/-rc[0-9]*$//; s/\.[0-9]*$//')
+ # Get github.ref for the output doc folder. By default "main"
+ # If matches a tag like refs/tags/v1.12.0-rc3 or
+ # refs/tags/v1.12.0 convert to 1.12
+ GITHUB_REF=${{ github.ref }}
+
+ # Convert refs/tags/v1.12.0rc3 into 1.12.
+ # Adopted from https://github.com/pytorch/pytorch/blob/main/.github/workflows/_docs.yml#L150C11-L155C13
+ if [[ "${GITHUB_REF}" =~ ^refs/tags/v([0-9]+\\.[0-9]+)\\. ]]; then
+ TARGET_FOLDER="${BASH_REMATCH[1]}"
else
- echo "ERROR: Invalid REF_TYPE: ${REF_TYPE}. Expected 'branch' or 'tag'."
- exit 1
+ TARGET_FOLDER="main"
fi
echo "Target Folder: ${TARGET_FOLDER}"
@@ -122,12 +112,6 @@ jobs:
mv "${RUNNER_ARTIFACT_DIR}"/html/* "${TARGET_FOLDER}"
git add "${TARGET_FOLDER}" || true
- # If it's main branch, add noindex tag to all .html files to exclude from Google Search indexing.
- if [[ "${REF_NAME}" == 'main' ]]; then
- find "${TARGET_FOLDER}" -type f -name "*.html" -exec sed -i '//a ' {} \;
- git add "${TARGET_FOLDER}"/**/*.html || true
- fi
-
git config user.name 'pytorchbot'
git config user.email 'soumith+bot@pytorch.org'
git commit -m "Auto-generating sphinx docs" || true
diff --git a/.github/workflows/pull.yml b/.github/workflows/pull.yml
index 9751b906cd8..f650fc79209 100644
--- a/.github/workflows/pull.yml
+++ b/.github/workflows/pull.yml
@@ -90,7 +90,7 @@ jobs:
matrix:
dtype: [fp32]
build-tool: [buck2, cmake]
- mode: [portable, xnnpack]
+ mode: [portable, xnnpack+custom, xnnpack+custom+qe]
fail-fast: false
with:
runner: linux.2xlarge
diff --git a/.github/workflows/trunk.yml b/.github/workflows/trunk.yml
index 16ed6a27577..b10c6227d39 100644
--- a/.github/workflows/trunk.yml
+++ b/.github/workflows/trunk.yml
@@ -46,9 +46,6 @@ jobs:
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
timeout: ${{ matrix.timeout }}
script: |
- WORKSPACE=$(pwd)
- pushd "${WORKSPACE}/pytorch/executorch"
-
MODEL_NAME=${{ matrix.model }}
BUILD_TOOL=${{ matrix.build-tool }}
BACKEND=${{ matrix.backend }}
@@ -59,7 +56,6 @@ jobs:
PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/setup-macos.sh "${BUILD_TOOL}"
# Build and test xecutorch
PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/test.sh "${MODEL_NAME}" "${BUILD_TOOL}" "${BACKEND}" "${DEMO_BACKEND_DELEGATION}"
- popd
test-custom-ops-macos:
name: test-custom-ops-macos
@@ -75,9 +71,6 @@ jobs:
submodules: 'true'
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
script: |
- WORKSPACE=$(pwd)
- pushd "${WORKSPACE}/pytorch/executorch"
-
BUILD_TOOL=${{ matrix.build-tool }}
bash .ci/scripts/setup-conda.sh
@@ -85,7 +78,6 @@ jobs:
PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/setup-macos.sh "${BUILD_TOOL}"
# Build and test custom ops
PYTHON_EXECUTABLE=python ${CONDA_RUN} bash examples/portable/custom_ops/test_custom_ops.sh "${BUILD_TOOL}"
- popd
test-selective-build-macos:
name: test-selective-build-macos
@@ -101,9 +93,6 @@ jobs:
submodules: 'true'
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
script: |
- WORKSPACE=$(pwd)
- pushd "${WORKSPACE}/pytorch/executorch"
-
BUILD_TOOL=${{ matrix.build-tool }}
bash .ci/scripts/setup-conda.sh
@@ -111,7 +100,6 @@ jobs:
PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/setup-macos.sh "${BUILD_TOOL}"
# Build and test selective build
PYTHON_EXECUTABLE=python ${CONDA_RUN} bash examples/selective_build/test_selective_build.sh "${BUILD_TOOL}"
- popd
test-demo-backend-delegation:
name: test-demo-backend-delegation
@@ -208,9 +196,6 @@ jobs:
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
timeout: 90
script: |
- WORKSPACE=$(pwd)
- pushd "${WORKSPACE}/pytorch/executorch"
-
BUILD_TOOL=cmake
bash .ci/scripts/setup-conda.sh
@@ -218,7 +203,6 @@ jobs:
GITHUB_RUNNER=1 PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/setup-macos.sh "${BUILD_TOOL}"
# Build and test coreml delegate
PYTHON_EXECUTABLE=python ${CONDA_RUN} bash backends/apple/coreml/scripts/build_all.sh
- popd
test-pybind-build-macos:
name: test-pybind-build-macos
@@ -235,8 +219,6 @@ jobs:
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
timeout: 180
script: |
- WORKSPACE=$(pwd)
- pushd "${WORKSPACE}/pytorch/executorch"
bash .ci/scripts/setup-conda.sh
# build module for executorch.extension.pybindings.portable_lib
@@ -245,7 +227,6 @@ jobs:
# see if we can import the module successfully
${CONDA_RUN} python -c "from executorch.extension.pybindings import portable_lib; print('success!')"
- popd
test-llama-runner-macos:
name: test-llama-runner-mac
@@ -254,7 +235,7 @@ jobs:
matrix:
dtype: [fp32]
build-tool: [buck2, cmake]
- mode: [portable, xnnpack]
+ mode: [portable, xnnpack+kv+custom]
fail-fast: false
with:
runner: macos-m1-stable
@@ -263,8 +244,6 @@ jobs:
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
timeout: 900
script: |
- WORKSPACE=$(pwd)
- pushd "${WORKSPACE}/pytorch/executorch"
bash .ci/scripts/setup-conda.sh
DTYPE=${{ matrix.dtype }}
@@ -278,4 +257,3 @@ jobs:
PYTHON_EXECUTABLE=python ${CONDA_RUN} bash examples/models/llama2/install_requirements.sh
# Test llama2
PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/test_llama.sh stories110M.pt "${BUILD_TOOL}" "${DTYPE}" "${MODE}"
- popd
diff --git a/.gitignore b/.gitignore
index 6661daed13e..26a46f23f62 100644
--- a/.gitignore
+++ b/.gitignore
@@ -2,6 +2,7 @@
buck-out/
cmake-out/
cmake-android-out/
+cmake-out-android/
cmake-ios-out/
ethos-u-scratch/
executorch.egg-info
diff --git a/.gitmodules b/.gitmodules
index 44137b27a71..42deca0a6bb 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -62,3 +62,9 @@
[submodule "examples/third-party/LLaVA"]
path = examples/third-party/LLaVA
url = https://github.com/haotian-liu/LLaVA.git
+[submodule "examples/models/llama2/third-party/re2"]
+ path = examples/models/llama2/third-party/re2
+ url = https://github.com/google/re2.git
+[submodule "examples/models/llama2/third-party/abseil-cpp"]
+ path = examples/models/llama2/third-party/abseil-cpp
+ url = https://github.com/abseil/abseil-cpp.git
diff --git a/.swift/custom_backend_debug/dummy.swift b/.swift/custom_backend_debug/dummy.swift
deleted file mode 100644
index e69de29bb2d..00000000000
diff --git a/.swift/executorch/dummy.swift b/.swift/executorch/dummy.swift
deleted file mode 100644
index e69de29bb2d..00000000000
diff --git a/.swift/executorch_debug/dummy.swift b/.swift/executorch_debug/dummy.swift
deleted file mode 100644
index e69de29bb2d..00000000000
diff --git a/.swift/mps_backend/dummy.swift b/.swift/mps_backend/dummy.swift
deleted file mode 100644
index e69de29bb2d..00000000000
diff --git a/.swift/mps_backend_debug/dummy.swift b/.swift/mps_backend_debug/dummy.swift
deleted file mode 100644
index e69de29bb2d..00000000000
diff --git a/.swift/optimized_backend/dummy.swift b/.swift/optimized_backend/dummy.swift
deleted file mode 100644
index e69de29bb2d..00000000000
diff --git a/.swift/optimized_backend_debug/dummy.swift b/.swift/optimized_backend_debug/dummy.swift
deleted file mode 100644
index e69de29bb2d..00000000000
diff --git a/.swift/portable_backend/dummy.swift b/.swift/portable_backend/dummy.swift
deleted file mode 100644
index e69de29bb2d..00000000000
diff --git a/.swift/portable_backend_debug/dummy.swift b/.swift/portable_backend_debug/dummy.swift
deleted file mode 100644
index e69de29bb2d..00000000000
diff --git a/.swift/quantized_backend/dummy.swift b/.swift/quantized_backend/dummy.swift
deleted file mode 100644
index e69de29bb2d..00000000000
diff --git a/.swift/quantized_backend_debug/dummy.swift b/.swift/quantized_backend_debug/dummy.swift
deleted file mode 100644
index e69de29bb2d..00000000000
diff --git a/.swift/xnnpack_backend/dummy.swift b/.swift/xnnpack_backend/dummy.swift
deleted file mode 100644
index e69de29bb2d..00000000000
diff --git a/.swift/xnnpack_backend_debug/dummy.swift b/.swift/xnnpack_backend_debug/dummy.swift
deleted file mode 100644
index e69de29bb2d..00000000000
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 46b73f63492..0610462aed9 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -144,6 +144,8 @@ option(EXECUTORCH_BUILD_COREML "Build the Core ML backend" OFF)
option(EXECUTORCH_BUILD_CUSTOM "Build the custom kernels" OFF)
+option(EXECUTORCH_BUILD_CUSTOM_OPS_AOT "Build the custom ops lib for AOT" OFF)
+
option(EXECUTORCH_BUILD_EXTENSION_DATA_LOADER "Build the Data Loader extension"
OFF)
@@ -175,8 +177,9 @@ option(EXECUTORCH_BUILD_VULKAN "Build the Vulkan backend" OFF)
#
# pthreadpool: build pthreadpool library. Disable on unsupported platforms
#
-cmake_dependent_option(EXECUTORCH_BUILD_PTHREADPOOL "Build pthreadpool library."
- ON "NOT EXECUTORCH_BUILD_ARM_BAREMETAL" OFF)
+cmake_dependent_option(
+ EXECUTORCH_BUILD_PTHREADPOOL "Build pthreadpool library." ON
+ "NOT EXECUTORCH_BUILD_ARM_BAREMETAL" OFF)
#
# cpuinfo: build cpuinfo library. Disable on unsupported platforms
@@ -184,8 +187,19 @@ cmake_dependent_option(EXECUTORCH_BUILD_PTHREADPOOL "Build pthreadpool library."
cmake_dependent_option(EXECUTORCH_BUILD_CPUINFO "Build cpuinfo library." ON
"NOT EXECUTORCH_BUILD_ARM_BAREMETAL" OFF)
+if(EXECUTORCH_BUILD_CUSTOM_OPS_AOT)
+ set(EXECUTORCH_BUILD_CUSTOM ON)
+endif()
+
+if(EXECUTORCH_BUILD_CUSTOM)
+ set(EXECUTORCH_BUILD_OPTIMIZED ON)
+endif()
+
if(EXECUTORCH_BUILD_CPUINFO)
# --- cpuinfo
+ set(ORIGINAL_CMAKE_POSITION_INDEPENDENT_CODE_FLAG
+ ${CMAKE_POSITION_INDEPENDENT_CODE})
+ set(CMAKE_POSITION_INDEPENDENT_CODE ON)
set(CPUINFO_SOURCE_DIR "backends/xnnpack/third-party/cpuinfo")
set(CPUINFO_BUILD_TOOLS
OFF
@@ -207,10 +221,15 @@ if(EXECUTORCH_BUILD_CPUINFO)
CACHE STRING "")
set(CLOG_SOURCE_DIR "${CPUINFO_SOURCE_DIR}/deps/clog")
add_subdirectory("${CPUINFO_SOURCE_DIR}")
+ set(CMAKE_POSITION_INDEPENDENT_CODE
+ ${ORIGINAL_CMAKE_POSITION_INDEPENDENT_CODE_FLAG})
endif()
if(EXECUTORCH_BUILD_PTHREADPOOL)
# --- pthreadpool
+ set(ORIGINAL_CMAKE_POSITION_INDEPENDENT_CODE_FLAG
+ ${CMAKE_POSITION_INDEPENDENT_CODE})
+ set(CMAKE_POSITION_INDEPENDENT_CODE ON)
set(PTHREADPOOL_SOURCE_DIR "backends/xnnpack/third-party/pthreadpool")
set(PTHREADPOOL_BUILD_TESTS
OFF
@@ -230,6 +249,8 @@ if(EXECUTORCH_BUILD_PTHREADPOOL)
CACHE STRING "")
endif()
add_subdirectory("${PTHREADPOOL_SOURCE_DIR}")
+ set(CMAKE_POSITION_INDEPENDENT_CODE
+ ${ORIGINAL_CMAKE_POSITION_INDEPENDENT_CODE_FLAG})
endif()
if(NOT PYTHON_EXECUTABLE)
@@ -352,23 +373,27 @@ add_subdirectory(schema)
# Only contains primitive operators; does not contain portable kernels or other
# full operators. Does not contain any backends.
#
-
-add_library(executorch ${_executorch__srcs})
-target_link_libraries(executorch PRIVATE program_schema)
-target_link_options_shared_lib(executorch)
+add_library(executorch_no_prim_ops ${_executorch_no_prim_ops__srcs})
+target_link_libraries(executorch_no_prim_ops PRIVATE program_schema)
# Check if dl exists for this toolchain and only then link it.
find_library(DL_LIBRARY_EXISTS NAMES dl)
# Check if the library was found
if(DL_LIBRARY_EXISTS)
- target_link_libraries(executorch PRIVATE dl) # For dladdr()
+ target_link_libraries(executorch_no_prim_ops PRIVATE dl) # For dladdr()
endif()
-target_include_directories(executorch PUBLIC ${_common_include_directories})
-target_compile_options(executorch PUBLIC ${_common_compile_options})
+target_include_directories(executorch_no_prim_ops PUBLIC ${_common_include_directories})
+target_compile_options(executorch_no_prim_ops PUBLIC ${_common_compile_options})
if(MAX_KERNEL_NUM)
- target_compile_definitions(executorch
+ target_compile_definitions(executorch_no_prim_ops
PRIVATE MAX_KERNEL_NUM=${MAX_KERNEL_NUM})
endif()
+add_library(executorch ${_executorch__srcs})
+target_link_libraries(executorch PRIVATE executorch_no_prim_ops)
+target_include_directories(executorch PUBLIC ${_common_include_directories})
+target_compile_options(executorch PUBLIC ${_common_compile_options})
+target_link_options_shared_lib(executorch)
+
#
# portable_ops_lib: A library to register core ATen ops using portable kernels,
# see kernels/portable/CMakeLists.txt.
@@ -406,7 +431,7 @@ endif()
# Install `executorch` library as well as `executorch-config.cmake` under
# ${CMAKE_INSTALL_PREFIX}/
install(
- TARGETS executorch
+ TARGETS executorch executorch_no_prim_ops
DESTINATION lib
INCLUDES
DESTINATION ${_common_include_directories})
@@ -504,25 +529,51 @@ if(EXECUTORCH_BUILD_PYBIND)
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/sdk)
endif()
+ # find pytorch lib, to allow pybind to take at::Tensor as input/output
+ find_package(Torch CONFIG REQUIRED)
+ find_library(TORCH_PYTHON_LIBRARY torch_python
+ PATHS "${TORCH_INSTALL_PREFIX}/lib")
+
+ set(_dep_libs
+ ${TORCH_PYTHON_LIBRARY}
+ bundled_program
+ etdump
+ executorch
+ extension_data_loader
+ portable_ops_lib
+ util
+ torch)
+
if(EXECUTORCH_BUILD_COREML)
- set(PYBIND_LINK_COREML "coremldelegate")
+ list(APPEND _dep_libs coremldelegate)
endif()
if(EXECUTORCH_BUILD_MPS)
- set(PYBIND_LINK_MPS "mpsdelegate")
+ list(APPEND _dep_libs mpsdelegate)
endif()
if(EXECUTORCH_BUILD_XNNPACK)
- # need to explicitly specify XNNPACK here
- # otherwise uses XNNPACK symbols from libtorch_cpu
- set(PYBIND_LINK_XNNPACK xnnpack_backend XNNPACK)
+ # need to explicitly specify XNNPACK here otherwise uses XNNPACK symbols
+ # from libtorch_cpu
+ list(APPEND _dep_libs xnnpack_backend XNNPACK)
endif()
- # find pytorch lib, to allow pybind to take at::Tensor as input/output
- find_package(Torch CONFIG REQUIRED)
- find_library(TORCH_PYTHON_LIBRARY torch_python
- PATHS "${TORCH_INSTALL_PREFIX}/lib")
+ if(EXECUTORCH_BUILD_QUANTIZED)
+ target_link_options_shared_lib(quantized_ops_lib)
+ list(APPEND _dep_libs quantized_kernels quantized_ops_lib)
+ endif()
+ # TODO(larryliu): Fix macOS 2 dylibs having 2 sets of static variables issue
+ if(EXECUTORCH_BUILD_CUSTOM_OPS_AOT AND NOT APPLE)
+ list(APPEND _dep_libs custom_ops_aot_lib)
+ endif()
+ # TODO(laryliu): Fix linux duplicate registation problem. In GH CI worker
+ # libcustom_ops.a doesn't dedup with the one indirectly linked from
+ # libcustom_ops_aot_lib.a
+ if(EXECUTORCH_BUILD_CUSTOM AND APPLE)
+ target_link_options_shared_lib(custom_ops)
+ list(APPEND _dep_libs custom_ops)
+ endif()
# compile options for pybind
set(_pybind_compile_options -Wno-deprecated-declarations -fPIC -frtti
@@ -540,23 +591,14 @@ if(EXECUTORCH_BUILD_PYBIND)
# pybind portable_lib
pybind11_add_module(portable_lib extension/pybindings/pybindings.cpp)
+ # The actual output file needs a leading underscore so it can coexist with
+ # portable_lib.py in the same python package.
+ set_target_properties(portable_lib PROPERTIES OUTPUT_NAME "_portable_lib")
target_compile_definitions(portable_lib
- PUBLIC EXECUTORCH_PYTHON_MODULE_NAME=portable_lib)
+ PUBLIC EXECUTORCH_PYTHON_MODULE_NAME=_portable_lib)
target_include_directories(portable_lib PRIVATE ${TORCH_INCLUDE_DIRS})
target_compile_options(portable_lib PUBLIC ${_pybind_compile_options})
- target_link_libraries(
- portable_lib
- PUBLIC ${TORCH_PYTHON_LIBRARY}
- bundled_program
- etdump
- executorch
- extension_data_loader
- portable_ops_lib
- util
- torch
- ${PYBIND_LINK_COREML}
- ${PYBIND_LINK_MPS}
- ${PYBIND_LINK_XNNPACK})
+ target_link_libraries(portable_lib PUBLIC ${_dep_libs})
install(TARGETS portable_lib
LIBRARY DESTINATION executorch/extension/pybindings)
diff --git a/Package.swift b/Package.swift
deleted file mode 100644
index b0dfec174f2..00000000000
--- a/Package.swift
+++ /dev/null
@@ -1,101 +0,0 @@
-// swift-tools-version:5.9
-/*
- * Copyright (c) Meta Platforms, Inc. and affiliates.
- * All rights reserved.
- *
- * This source code is licensed under the BSD-style license found in the
- * LICENSE file in the root directory of this source tree.
- */
-
-import PackageDescription
-
-let version = "0.1.0"
-let url = "https://ossci-ios.s3.amazonaws.com/executorch/"
-let debug = "_debug"
-let deliverables = [
- "coreml_backend": [
- "sha256": "5bfa35cb5143b4af6840e0e5dd2d40bce93dff331b8eb5798a46274239391a5d",
- "sha256" + debug: "1422019da9000f8ff7be597de9e0e3b2482f99cdaa75c2d179835778647be1a6",
- "frameworks": [
- "Accelerate",
- "CoreML",
- ],
- "libraries": [
- "sqlite3",
- ],
- ],
- "custom_backend": [
- "sha256": "2201a61eaf7e06e1937cb73a469fb36cabc219496ba004b85feb2cc7c10f300d",
- "sha256" + debug: "3eb6eb97bf0641d2305b0f50ff05a8862d7d65e2491cf4aa05ef1d108649f07c",
- ],
- "executorch": [
- "sha256": "2b55cbcff845ab9eaf16a21e520546b2975ef8c55b9e3fbbcc0c375334e40c6f",
- "sha256" + debug: "12933cedff6cf21c9d21668779f8d8af8049646fe7d290787b12227ff7abe4a7",
- ],
- "mps_backend": [
- "sha256": "510d708361b6ea0692ce5aeb638725d6275824b37bbe744aa876fda24cc2bbbf",
- "sha256" + debug: "6a67ba0bf8033f17bd66acb222446df51cd1304e24a4fb2c6d97e15a30fb24f0",
- "frameworks": [
- "Metal",
- "MetalPerformanceShaders",
- "MetalPerformanceShadersGraph",
- ],
- ],
- "optimized_backend": [
- "sha256": "50aaa54901a7cee1059e71cc623f054610406d65ba8fd6edb10b45861be67237",
- "sha256" + debug: "3f43f465727c8705432f4bb69260cc9501c519e5da006fc19ee2ab2ea260d1f0",
- ],
- "portable_backend": [
- "sha256": "964238e92828665aa598c05b2264faab91fb13ce0f42633cc7d5653300af3e9b",
- "sha256" + debug: "d6d85304a4b40f13c9b893e8c264ebdb15307cacf8997494b3818a52e4914b28",
- ],
- "quantized_backend": [
- "sha256": "37d31a319f92e26bab2b7ec5e783a8b14457dee0a4638dcdca1d9e17539ee3fb",
- "sha256" + debug: "6b45f66f60f6106a41e191418c970bf7b0605df73b9815a06441a5f0809b54e6",
- ],
- "xnnpack_backend": [
- "sha256": "03d506243c392e872519ae1335a025ef202319c1db339a753f9d7d74cba226f0",
- "sha256" + debug: "3341e89abc99552a6a5bad360003baed194a83e865338bc07afe9e4f171ea169",
- ],
-].reduce(into: [String: [String: Any]]()) {
- $0[$1.key] = $1.value
- $0[$1.key + debug] = $1.value
-}
-.reduce(into: [String: [String: Any]]()) {
- var newValue = $1.value
- if $1.key.hasSuffix(debug) {
- $1.value.forEach { key, value in
- if key.hasSuffix(debug) {
- newValue[String(key.dropLast(debug.count))] = value
- }
- }
- }
- $0[$1.key] = newValue.filter { key, _ in !key.hasSuffix(debug) }
-}
-
-let package = Package(
- name: "executorch",
- platforms: [
- .iOS(.v15),
- ],
- products: deliverables.keys.map { key in
- .library(name: key, targets: ["\(key)_dependencies"])
- }.sorted { $0.name < $1.name },
- targets: deliverables.flatMap { key, value -> [Target] in
- [
- .binaryTarget(
- name: key,
- url: "\(url)\(key)-\(version).zip",
- checksum: value["sha256"] as? String ?? ""
- ),
- .target(
- name: "\(key)_dependencies",
- dependencies: [.target(name: key)],
- path: ".swift/\(key)",
- linkerSettings:
- (value["frameworks"] as? [String] ?? []).map { .linkedFramework($0) } +
- (value["libraries"] as? [String] ?? []).map { .linkedLibrary($0) }
- ),
- ]
- }
-)
diff --git a/README.md b/README.md
index 4f6d3da7240..e63a2d20caa 100644
--- a/README.md
+++ b/README.md
@@ -18,7 +18,7 @@ Key value propositions of ExecuTorch are:
capabilities such as CPUs, NPUs, and DSPs.
For a comprehensive technical overview of ExecuTorch and step-by-step tutorials,
-please visit our [documentation website](https://pytorch.org/executorch).
+please visit our documentation website [for the latest release](https://pytorch.org/executorch/stable/index.html) (or the [main branch](https://pytorch.org/executorch/main/index.html)).
## Important: This is a preview release
diff --git a/backends/apple/coreml/.clang-format b/backends/apple/coreml/.clang-format
index 0c8764539cf..0b04022b0f6 100644
--- a/backends/apple/coreml/.clang-format
+++ b/backends/apple/coreml/.clang-format
@@ -1,5 +1,4 @@
BasedOnStyle: WebKit
-BreakBeforeBraces: Attach
AllowShortIfStatementsOnASingleLine: false
BreakBeforeBinaryOperators: None
BreakConstructorInitializers: BeforeColon
diff --git a/backends/apple/coreml/CMakeLists.txt b/backends/apple/coreml/CMakeLists.txt
index f1c19d00ee8..b3d0182999a 100644
--- a/backends/apple/coreml/CMakeLists.txt
+++ b/backends/apple/coreml/CMakeLists.txt
@@ -13,6 +13,8 @@ if(NOT EXECUTORCH_ROOT)
set(EXECUTORCH_ROOT ${CMAKE_CURRENT_SOURCE_DIR}/../../..)
endif()
+option(COREML_BUILD_EXECUTOR_RUNNER "Build CoreML executor runner." OFF)
+
# inmemoryfs sources
set(INMEMORYFS_SOURCES
runtime/inmemoryfs/inmemory_filesystem.cpp
@@ -144,7 +146,7 @@ target_include_directories(
)
target_link_libraries(
coremldelegate PRIVATE
- executorch
+ executorch_no_prim_ops
)
if(EXECUTORCH_BUILD_SDK)
@@ -174,18 +176,26 @@ find_library(SQLITE_LIBRARY sqlite3)
target_link_libraries(coremldelegate
PRIVATE
- executorch
+ executorch_no_prim_ops
${ACCELERATE_FRAMEWORK}
${COREML_FRAMEWORK}
${FOUNDATION_FRAMEWORK}
${SQLITE_LIBRARY}
)
+if(COREML_BUILD_EXECUTOR_RUNNER)
+target_link_libraries(coremldelegate
+ PRIVATE
+ portable_ops_lib
+ portable_kernels
+)
+endif()
+
target_compile_options(coremldelegate PRIVATE "-fobjc-arc")
target_compile_options(coremldelegate PRIVATE "-fno-exceptions")
if(EXECUTORCH_BUILD_SDK)
-target_compile_options(executorch PUBLIC -DET_EVENT_TRACER_ENABLED)
+target_compile_options(executorch_no_prim_ops PUBLIC -DET_EVENT_TRACER_ENABLED)
target_compile_options(coremldelegate PRIVATE "-frtti")
target_compile_options(libprotobuf-lite PRIVATE "-frtti")
else()
diff --git a/backends/apple/coreml/README.md b/backends/apple/coreml/README.md
index 1710860f87e..4a21d8d8ae1 100644
--- a/backends/apple/coreml/README.md
+++ b/backends/apple/coreml/README.md
@@ -6,54 +6,123 @@ Core ML is an optimized framework for running machine learning models on Apple d
## Layout
- `compiler/` : Lowers a module to Core ML backend.
+- `partition/`: Partitions a module fully or partially to Core ML backend.
+- `quantizer/`: Quantizes a module in Core ML favored scheme.
- `scripts/` : Scripts for installing dependencies and running tests.
- `runtime/`: Core ML delegate runtime implementation.
- `inmemoryfs`: InMemory filesystem implementation used to serialize/de-serialize AOT blob.
- `kvstore`: Persistent Key-Value store implementation.
- `delegate`: Runtime implementation.
- `include` : Public headers.
- - `tests` : Tests for Core ML delegate.
- - `workspace` : Xcode workspace for tests.
+ - `sdk` : SDK implementation.
+ - `tests` : Unit tests.
+ - `workspace` : Xcode workspace for the runtime.
- `third-party/`: External dependencies.
-## Help & Improvements
-If you have problems or questions or have suggestions for ways to make
-implementation and testing better, please create an issue on [github](https://www.github.com/pytorch/executorch/issues).
+## Partition and Delegation
-## Delegation
-
-For delegating the Program to the **Core ML** backend, the client must be responsible for calling `to_backend` with the **CoreMLBackend** tag.
+To delegate a Program to the **Core ML** backend, the client must call `to_backend` with the **CoreMLPartitioner**.
```python
-import executorch.exir as exir
import torch
-
-from executorch.exir.backend.backend_api import to_backend
+import executorch.exir
from executorch.backends.apple.coreml.compiler import CoreMLBackend
+from executorch.backends.apple.coreml.partition.coreml_partitioner import CoreMLPartitioner
-class LowerableSubModel(torch.nn.Module):
+class Model(torch.nn.Module):
def __init__(self):
super().__init__()
def forward(self, x):
return torch.sin(x)
-# Convert the lowerable module to Edge IR Representation
-to_be_lowered = LowerableSubModel()
-example_input = (torch.ones(1), )
-to_be_lowered_exir_submodule = exir.capture(to_be_lowered, example_input).to_edge()
+source_model = Model()
+example_inputs = (torch.ones(1), )
-# Lower to Core ML backend
-lowered_module = to_backend('CoreMLBackend', to_be_lowered_exir_submodule.exported_program, [])
+# Export the source model to Edge IR representation
+aten_program = torch.export.export(source_model, example_inputs)
+edge_program_manager = executorch.exir.to_edge(aten_program)
+
+# Delegate to Core ML backend
+delegated_program_manager = edge_program_manager.to_backend(CoreMLPartitioner())
+
+# Serialize delegated program
+executorch_program = delegated_program_manager.to_executorch()
+with open("model.pte", "wb") as f:
+ f.write(executorch_program.buffer)
```
-Currently, the **Core ML** backend delegates the whole module to **Core ML**. If a specific op is not supported by the **Core ML** backend then the `to_backend` call would throw an exception. We will be adding a **Core ML Partitioner** to resolve the issue.
+The module will be fully or partially delegated to **Core ML**, depending on whether all or part of ops are supported by the **Core ML** backend. User may force skip certain ops by `CoreMLPartitioner(skip_ops_for_coreml_delegation=...)`
+
+The `to_backend` implementation is a thin wrapper over [coremltools](https://apple.github.io/coremltools/docs-guides/), `coremltools` is responsible for converting an **ExportedProgram** to a **MLModel**. The converted **MLModel** data is saved, flattened, and returned as bytes to **ExecuTorch**.
-The `to_backend` implementation is a thin wrapper over `coremltools`, `coremltools` is responsible for converting an **ExportedProgram** to a **MLModel**. The converted **MLModel** data is saved, flattened, and returned as bytes to **ExecuTorch**.
+## Quantization
+
+To quantize a Program in a Core ML favored way, the client may utilize **CoreMLQuantizer**.
+
+```python
+import torch
+import executorch.exir
+
+from torch._export import capture_pre_autograd_graph
+from torch.ao.quantization.quantize_pt2e import (
+ convert_pt2e,
+ prepare_pt2e,
+ prepare_qat_pt2e,
+)
+
+from executorch.backends.apple.coreml.quantizer.coreml_quantizer import CoreMLQuantizer
+from coremltools.optimize.torch.quantization.quantization_config import (
+ LinearQuantizerConfig,
+ QuantizationScheme,
+)
+
+class Model(torch.nn.Module):
+ def __init__(self) -> None:
+ super().__init__()
+ self.conv = torch.nn.Conv2d(
+ in_channels=3, out_channels=16, kernel_size=3, padding=1
+ )
+ self.relu = torch.nn.ReLU()
+
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
+ a = self.conv(x)
+ return self.relu(a)
+
+source_model = Model()
+example_inputs = (torch.randn((1, 3, 256, 256)), )
+
+pre_autograd_aten_dialect = capture_pre_autograd_graph(model, example_inputs)
+
+quantization_config = LinearQuantizerConfig.from_dict(
+ {
+ "global_config": {
+ "quantization_scheme": QuantizationScheme.symmetric,
+ "activation_dtype": torch.uint8,
+ "weight_dtype": torch.int8,
+ "weight_per_channel": True,
+ }
+ }
+)
+quantizer = CoreMLQuantizer(quantization_config)
+
+# For post-training quantization, use `prepare_pt2e`
+# For quantization-aware trainin,g use `prepare_qat_pt2e`
+prepared_graph = prepare_pt2e(pre_autograd_aten_dialect, quantizer)
+
+prepared_graph(*example_inputs)
+converted_graph = convert_pt2e(prepared_graph)
+```
+
+The `converted_graph` is the quantized torch model, and can be delegated to **Core ML** similarly through **CoreMLPartitioner**
## Runtime
-To execute a **Core ML** delegated **Program**, the client must link to the `coremldelegate` library. Once linked there are no additional steps required, **ExecuTorch** when running the **Program** would call the **Core ML** runtime to execute the **Core ML** delegated part of the **Program**.
+To execute a Core ML delegated program, the application must link to the `coremldelegate` library. Once linked there are no additional steps required, ExecuTorch when running the program would call the Core ML runtime to execute the Core ML delegated part of the program.
Please follow the instructions described in the [Core ML setup](/backends/apple/coreml/setup.md) to link the `coremldelegate` library.
+
+## Help & Improvements
+If you have problems or questions or have suggestions for ways to make
+implementation and testing better, please create an issue on [github](https://www.github.com/pytorch/executorch/issues).
diff --git a/backends/apple/coreml/runtime/delegate/ETCoreMLAssetManager.mm b/backends/apple/coreml/runtime/delegate/ETCoreMLAssetManager.mm
index da399e80d54..6fe37925d27 100644
--- a/backends/apple/coreml/runtime/delegate/ETCoreMLAssetManager.mm
+++ b/backends/apple/coreml/runtime/delegate/ETCoreMLAssetManager.mm
@@ -630,7 +630,7 @@ - (NSUInteger)_compact:(NSUInteger)sizeInBytes error:(NSError * __autoreleasing
}
if (_estimatedSizeInBytes <= sizeInBytes) {
- return YES;
+ return _estimatedSizeInBytes;
}
std::error_code ec;
diff --git a/backends/apple/coreml/runtime/delegate/ETCoreMLDefaultModelExecutor.h b/backends/apple/coreml/runtime/delegate/ETCoreMLDefaultModelExecutor.h
index eab239b496c..13b1023bcbc 100644
--- a/backends/apple/coreml/runtime/delegate/ETCoreMLDefaultModelExecutor.h
+++ b/backends/apple/coreml/runtime/delegate/ETCoreMLDefaultModelExecutor.h
@@ -13,7 +13,8 @@
NS_ASSUME_NONNULL_BEGIN
/// The default model executor, the executor ignores logging options.
-__attribute__((objc_subclassing_restricted)) @interface ETCoreMLDefaultModelExecutor : NSObject
+__attribute__((objc_subclassing_restricted))
+@interface ETCoreMLDefaultModelExecutor : NSObject
+ (instancetype)new NS_UNAVAILABLE;
@@ -27,6 +28,9 @@ __attribute__((objc_subclassing_restricted)) @interface ETCoreMLDefaultModelExec
/// The model.
@property (readonly, strong, nonatomic) ETCoreMLModel* model;
+/// If set to `YES` then output backing are ignored.
+@property (readwrite, atomic) BOOL ignoreOutputBackings;
+
@end
NS_ASSUME_NONNULL_END
diff --git a/backends/apple/coreml/runtime/delegate/ETCoreMLDefaultModelExecutor.mm b/backends/apple/coreml/runtime/delegate/ETCoreMLDefaultModelExecutor.mm
index 399c91bd495..57316e28015 100644
--- a/backends/apple/coreml/runtime/delegate/ETCoreMLDefaultModelExecutor.mm
+++ b/backends/apple/coreml/runtime/delegate/ETCoreMLDefaultModelExecutor.mm
@@ -26,6 +26,9 @@ - (instancetype)initWithModel:(ETCoreMLModel *)model {
loggingOptions:(const executorchcoreml::ModelLoggingOptions& __unused)loggingOptions
eventLogger:(const executorchcoreml::ModelEventLogger* _Nullable __unused)eventLogger
error:(NSError * __autoreleasing *)error {
+ if (self.ignoreOutputBackings) {
+ predictionOptions.outputBackings = @{};
+ }
id outputs = [self.model.mlModel predictionFromFeatures:inputs
options:predictionOptions
error:error];
diff --git a/backends/apple/coreml/runtime/delegate/ETCoreMLLogging.h b/backends/apple/coreml/runtime/delegate/ETCoreMLLogging.h
index 1a1b10848bb..d9c4d4ef638 100644
--- a/backends/apple/coreml/runtime/delegate/ETCoreMLLogging.h
+++ b/backends/apple/coreml/runtime/delegate/ETCoreMLLogging.h
@@ -7,6 +7,7 @@
#import
+#import
#import
NS_ASSUME_NONNULL_BEGIN
@@ -48,7 +49,11 @@ typedef NS_ERROR_ENUM(ETCoreMLErrorDomain, ETCoreMLError) {
/// Record the error with `os_log_error` and fills `*errorOut` with `NSError`.
#define ETCoreMLLogErrorAndSetNSError(errorOut, errorCode, formatString, ...) \
- os_log_error(ETCoreMLErrorUtils.loggingChannel, formatString, ##__VA_ARGS__); \
+ if (ET_LOG_ENABLED) { \
+ ET_LOG(Error, "%s", [NSString stringWithFormat:@formatString, ##__VA_ARGS__].UTF8String); \
+ } else { \
+ os_log_error(ETCoreMLErrorUtils.loggingChannel, formatString, ##__VA_ARGS__); \
+ } \
if (errorOut) { \
*errorOut = \
[NSError errorWithDomain:ETCoreMLErrorDomain \
@@ -58,24 +63,31 @@ typedef NS_ERROR_ENUM(ETCoreMLErrorDomain, ETCoreMLError) {
}]; \
}
-/// Record the error and its underlying error with `os_log_error` and fills
-/// `*errorOut` with NSError.
+/// Record the error and its underlying error with `os_log_error` and fills `*errorOut` with `NSError`.
#define ETCoreMLLogUnderlyingErrorAndSetNSError(errorOut, errorCode, underlyingNSError, formatString, ...) \
- os_log_error(ETCoreMLErrorUtils.loggingChannel, \
- formatString ", with underlying error= %@.", \
- ##__VA_ARGS__, \
- (underlyingNSError).localizedDescription); \
+ if (ET_LOG_ENABLED) { \
+ ET_LOG(Error, "%s", [NSString stringWithFormat:@formatString, ##__VA_ARGS__].UTF8String); \
+ } else { \
+ os_log_error(ETCoreMLErrorUtils.loggingChannel, \
+ formatString ", with underlying error= %@.", \
+ ##__VA_ARGS__, \
+ (underlyingNSError).localizedDescription); \
+ } \
if (errorOut) { \
*errorOut = [ETCoreMLErrorUtils errorWithCode:errorCode \
underlyingError:underlyingNSError \
format:@formatString, ##__VA_ARGS__]; \
}
-#define ETCoreMLLogError(error, formatString, ...) \
- os_log_error(ETCoreMLErrorUtils.loggingChannel, \
- formatString ", with error= %@.", \
- ##__VA_ARGS__, \
- (error).localizedDescription);
+#define ETCoreMLLogError(error, formatString, ...) \
+ if (ET_LOG_ENABLED) { \
+ ET_LOG(Error, "%s", [NSString stringWithFormat:@formatString, ##__VA_ARGS__].UTF8String); \
+ } else { \
+ os_log_error(ETCoreMLErrorUtils.loggingChannel, \
+ formatString ", with error= %@.", \
+ ##__VA_ARGS__, \
+ (error).localizedDescription); \
+ }
#pragma clang diagnostic pop
diff --git a/backends/apple/coreml/runtime/delegate/ETCoreMLModel.h b/backends/apple/coreml/runtime/delegate/ETCoreMLModel.h
index 0f8a440c858..9bf3183e65a 100644
--- a/backends/apple/coreml/runtime/delegate/ETCoreMLModel.h
+++ b/backends/apple/coreml/runtime/delegate/ETCoreMLModel.h
@@ -6,12 +6,18 @@
// Please refer to the license found in the LICENSE file in the root directory of the source tree.
#import
+#import
NS_ASSUME_NONNULL_BEGIN
@class ETCoreMLAsset;
+namespace executorchcoreml {
+class MultiArray;
+}
+
/// Represents a ML model, the class is a thin wrapper over `MLModel` with additional properties.
+__attribute__((objc_subclassing_restricted))
@interface ETCoreMLModel : NSObject
- (instancetype)init NS_UNAVAILABLE;
@@ -31,6 +37,12 @@ NS_ASSUME_NONNULL_BEGIN
orderedOutputNames:(NSOrderedSet*)orderedOutputNames
error:(NSError* __autoreleasing*)error NS_DESIGNATED_INITIALIZER;
+- (nullable NSArray*)prepareInputs:(const std::vector&)inputs
+ error:(NSError* __autoreleasing*)error;
+
+- (nullable NSArray*)prepareOutputBackings:(const std::vector&)outputs
+ error:(NSError* __autoreleasing*)error;
+
/// The underlying MLModel.
@property (strong, readonly, nonatomic) MLModel* mlModel;
diff --git a/backends/apple/coreml/runtime/delegate/ETCoreMLModel.mm b/backends/apple/coreml/runtime/delegate/ETCoreMLModel.mm
index 791fb7c03b6..ee7218bd271 100644
--- a/backends/apple/coreml/runtime/delegate/ETCoreMLModel.mm
+++ b/backends/apple/coreml/runtime/delegate/ETCoreMLModel.mm
@@ -8,6 +8,164 @@
#import
#import
+#import
+#import
+#import
+#import
+
+#pragma mark - ETCoreMLMultiArrayDescriptor
+__attribute__((objc_subclassing_restricted))
+@interface ETCoreMLMultiArrayDescriptor: NSObject
+
+- (instancetype)init NS_UNAVAILABLE;
+
++ (instancetype)new NS_UNAVAILABLE;
+
+- (instancetype)initWithShape:(NSArray *)shape
+ dataType:(MLMultiArrayDataType)dataType NS_DESIGNATED_INITIALIZER;
+
+@property (copy, readonly, nonatomic) NSArray *shape;
+
+@property (assign, readonly, nonatomic) MLMultiArrayDataType dataType;
+
+@end
+
+@implementation ETCoreMLMultiArrayDescriptor
+
+- (instancetype)initWithShape:(NSArray *)shape
+ dataType:(MLMultiArrayDataType)dataType {
+ self = [super init];
+ if (self) {
+ _shape = shape;
+ _dataType = dataType;
+ }
+
+ return self;
+}
+
+- (BOOL)isEqual:(id)object {
+ if (object == self) {
+ return YES;
+ }
+
+ if (![object isKindOfClass:self.class]) {
+ return NO;
+ }
+
+ ETCoreMLMultiArrayDescriptor *other = (ETCoreMLMultiArrayDescriptor *)object;
+ return [self.shape isEqualToArray:other.shape] && self.dataType == other.dataType;
+}
+
+- (NSUInteger)hash {
+ return [self.shape hash] ^ (NSUInteger)self.dataType;
+}
+
+- (instancetype)copyWithZone:(NSZone *)zone {
+ return [[ETCoreMLMultiArrayDescriptor allocWithZone:zone] initWithShape:self.shape
+ dataType:self.dataType];
+}
+
+@end
+
+namespace {
+
+using namespace executorchcoreml;
+
+size_t get_number_of_bytes(MLMultiArrayDataType data_type) {
+ switch (data_type) {
+ case MLMultiArrayDataTypeFloat16: {
+ return 2;
+ }
+ case MLMultiArrayDataTypeFloat32: {
+ return 4;
+ }
+ case MLMultiArrayDataTypeInt32: {
+ return 4;
+ }
+ case MLMultiArrayDataTypeFloat64: {
+ return 8;
+ }
+ default: {
+ return 0;
+ }
+ }
+}
+
+std::vector calculate_strides(const std::vector& shape) {
+ if (shape.size() == 0) {
+ return {};
+ }
+
+ if (shape.size() == 1) {
+ return {1};
+ }
+
+ std::vector strides(shape.size(), 1);
+ size_t product = 1;
+ for (size_t i = shape.size(); i > 0; i--) {
+ strides[i - 1] = product;
+ product *= shape[i - 1];
+ }
+
+ return strides;
+}
+
+MLMultiArray * _Nullable make_ml_multi_array(const std::vector& shape,
+ MLMultiArrayDataType dataType,
+ NSCache *cache,
+ NSError * __autoreleasing *error) {
+ ETCoreMLMultiArrayDescriptor *descriptor = [[ETCoreMLMultiArrayDescriptor alloc] initWithShape:to_array(shape)
+ dataType:dataType];
+ // Check the cache first otherwise allocate a new backing storage.
+ NSMutableData *backing_storage = [cache objectForKey:descriptor];
+ if (backing_storage) {
+ [cache removeObjectForKey:descriptor];
+ } else {
+ size_t n = std::accumulate(shape.cbegin(), shape.cend(), 1, std::multiplies{});
+ backing_storage = [[NSMutableData alloc] initWithLength:n * get_number_of_bytes(dataType)];
+ }
+
+ __weak NSCache *weakCache = cache;
+ // Add the storage back to the cache when it gets deallocated, the next prediction would use the same storage.
+ MLMultiArray *result = [[MLMultiArray alloc] initWithDataPointer:backing_storage.mutableBytes
+ shape:descriptor.shape
+ dataType:descriptor.dataType
+ strides:to_array(calculate_strides(shape))
+ deallocator:^(void * _Nonnull bytes) {[weakCache setObject:backing_storage forKey:descriptor];}
+ error:error];
+
+ return result;
+}
+
+NSDictionary *
+get_multi_array_constraints_by_name(NSDictionary *feature_descriptions) {
+ NSMutableDictionary *result = [NSMutableDictionary dictionaryWithCapacity:feature_descriptions.count];
+ [feature_descriptions enumerateKeysAndObjectsUsingBlock:^(NSString *key, MLFeatureDescription *description, BOOL * _Nonnull stop) {
+ result[key] = description.multiArrayConstraint;
+ }];
+
+ return result;
+}
+
+NSDictionary *get_multi_array_input_constraints_by_name(MLModelDescription *description) {
+ return get_multi_array_constraints_by_name(description.inputDescriptionsByName);
+}
+
+NSDictionary *get_multi_array_output_constraints_by_name(MLModelDescription *description) {
+ return get_multi_array_constraints_by_name(description.outputDescriptionsByName);
+}
+
+}
+
+#pragma mark - ETCoreMLModel
+@interface ETCoreMLModel ()
+
+@property (strong, readonly, nonatomic) NSCache *cache;
+@property (copy, readonly, nonatomic) NSDictionary *inputConstraintsByName;
+@property (copy, readonly, nonatomic) NSDictionary *outputConstraintsByName;
+
+@end
+
@implementation ETCoreMLModel
@@ -33,8 +191,11 @@ - (nullable instancetype)initWithAsset:(ETCoreMLAsset *)asset
_asset = asset;
_orderedInputNames = [orderedInputNames copy];
_orderedOutputNames = [orderedOutputNames copy];
+ _cache = [[NSCache alloc] init];
+ _inputConstraintsByName = get_multi_array_input_constraints_by_name(mlModel.modelDescription);
+ _outputConstraintsByName = get_multi_array_output_constraints_by_name(mlModel.modelDescription);
}
-
+
return self;
}
@@ -42,4 +203,73 @@ - (NSString *)identifier {
return self.asset.identifier;
}
+- (nullable NSArray *)prepareArgs:(const std::vector&)args
+ argNames:(NSOrderedSet *)argNames
+ argConstraintsByName:(NSDictionary *)argConstraintsByName
+ copyData:(const BOOL)copyData
+ error:(NSError * __autoreleasing *)error {
+ NSEnumerator *nameEnumerator = [argNames objectEnumerator];
+ NSMutableArray *result = [NSMutableArray arrayWithCapacity:args.size()];
+ for (const auto& arg : args) {
+ BOOL lCopyData = copyData;
+ NSString *argName = [nameEnumerator nextObject];
+ MLMultiArrayConstraint *constraint = argConstraintsByName[argName];
+ const auto& layout = arg.layout();
+ auto dataType = to_ml_multiarray_data_type(layout.dataType());
+ MLMultiArray *multiArrayArg = nil;
+ if (dataType == constraint.dataType) {
+ // We can use the same data storage.
+ multiArrayArg = [[MLMultiArray alloc] initWithDataPointer:arg.data()
+ shape:to_array(layout.shape())
+ dataType:constraint.dataType
+ strides:to_array(layout.strides())
+ deallocator:^(void * _Nonnull bytes) {}
+ error:error];
+ lCopyData = NO;
+ } else {
+ // We can't use the same data storage, data types are not the same.
+ multiArrayArg = ::make_ml_multi_array(layout.shape(), constraint.dataType, self.cache, error);
+ }
+
+ if (!multiArrayArg) {
+ return nil;
+ }
+
+ if (multiArrayArg && lCopyData) {
+ [multiArrayArg getMutableBytesWithHandler:^(void *_Nonnull mutableBytes,
+ NSInteger __unused size,
+ NSArray *strides) {
+ MultiArray buffer(mutableBytes, MultiArray::MemoryLayout(to_multiarray_data_type(constraint.dataType).value(),
+ layout.shape(),
+ to_vector(strides)));
+ arg.copy(buffer);
+ }];
+ }
+
+ [result addObject:multiArrayArg];
+ }
+
+ return result;
+}
+
+- (nullable NSArray *)prepareInputs:(const std::vector&)inputs
+ error:(NSError * __autoreleasing *)error {
+ return [self prepareArgs:inputs
+ argNames:self.orderedInputNames
+ argConstraintsByName:self.inputConstraintsByName
+ copyData:YES
+ error:error];
+
+}
+
+- (nullable NSArray *)prepareOutputBackings:(const std::vector&)outputs
+ error:(NSError * __autoreleasing *)error {
+ return [self prepareArgs:outputs
+ argNames:self.orderedOutputNames
+ argConstraintsByName:self.outputConstraintsByName
+ copyData:NO
+ error:error];
+
+}
+
@end
diff --git a/backends/apple/coreml/runtime/delegate/ETCoreMLModelCompiler.h b/backends/apple/coreml/runtime/delegate/ETCoreMLModelCompiler.h
index 3a3578e06ab..f846ebbb969 100644
--- a/backends/apple/coreml/runtime/delegate/ETCoreMLModelCompiler.h
+++ b/backends/apple/coreml/runtime/delegate/ETCoreMLModelCompiler.h
@@ -9,7 +9,8 @@
NS_ASSUME_NONNULL_BEGIN
/// A class responsible for compiling a CoreML model.
-__attribute__((objc_subclassing_restricted)) @interface ETCoreMLModelCompiler : NSObject
+__attribute__((objc_subclassing_restricted))
+@interface ETCoreMLModelCompiler : NSObject
+ (instancetype)new NS_UNAVAILABLE;
diff --git a/backends/apple/coreml/runtime/delegate/ETCoreMLModelExecutor.h b/backends/apple/coreml/runtime/delegate/ETCoreMLModelExecutor.h
index e6e329c9ddd..2f1b22f456b 100644
--- a/backends/apple/coreml/runtime/delegate/ETCoreMLModelExecutor.h
+++ b/backends/apple/coreml/runtime/delegate/ETCoreMLModelExecutor.h
@@ -35,6 +35,9 @@ NS_ASSUME_NONNULL_BEGIN
/// The model.
@property (readonly, strong, nonatomic) ETCoreMLModel* model;
+/// If set to `YES` then output backing are ignored.
+@property (readwrite, atomic) BOOL ignoreOutputBackings;
+
@end
diff --git a/backends/apple/coreml/runtime/delegate/ETCoreMLModelLoader.h b/backends/apple/coreml/runtime/delegate/ETCoreMLModelLoader.h
index 3d2e1006329..05e96ad59f5 100644
--- a/backends/apple/coreml/runtime/delegate/ETCoreMLModelLoader.h
+++ b/backends/apple/coreml/runtime/delegate/ETCoreMLModelLoader.h
@@ -16,7 +16,8 @@ struct ModelMetadata;
NS_ASSUME_NONNULL_BEGIN
/// A class responsible for loading a CoreML model.
-__attribute__((objc_subclassing_restricted)) @interface ETCoreMLModelLoader : NSObject
+__attribute__((objc_subclassing_restricted))
+@interface ETCoreMLModelLoader : NSObject
+ (instancetype)new NS_UNAVAILABLE;
diff --git a/backends/apple/coreml/runtime/delegate/ETCoreMLModelManager.h b/backends/apple/coreml/runtime/delegate/ETCoreMLModelManager.h
index fb616c71527..394cff4f897 100644
--- a/backends/apple/coreml/runtime/delegate/ETCoreMLModelManager.h
+++ b/backends/apple/coreml/runtime/delegate/ETCoreMLModelManager.h
@@ -7,11 +7,14 @@
#import
+#import
+
NS_ASSUME_NONNULL_BEGIN
namespace executorchcoreml {
struct ModelLoggingOptions;
class ModelEventLogger;
+class MultiArray;
};
@class ETCoreMLModel;
@@ -20,7 +23,8 @@ class ModelEventLogger;
typedef void ModelHandle;
/// A class responsible for managing the models loaded by the delegate.
-__attribute__((objc_subclassing_restricted)) @interface ETCoreMLModelManager : NSObject
+__attribute__((objc_subclassing_restricted))
+@interface ETCoreMLModelManager : NSObject
+ (instancetype)new NS_UNAVAILABLE;
@@ -49,7 +53,7 @@ __attribute__((objc_subclassing_restricted)) @interface ETCoreMLModelManager : N
/// Executes the loaded model.
///
/// @param handle The handle to the loaded model.
-/// @param args The arguments to the model.
+/// @param args The arguments (inputs and outputs) of the model.
/// @param loggingOptions The model logging options.
/// @param error On failure, error is filled with the failure information.
/// @retval `YES` if the execution succeeded otherwise `NO`.
@@ -59,6 +63,19 @@ __attribute__((objc_subclassing_restricted)) @interface ETCoreMLModelManager : N
eventLogger:(const executorchcoreml::ModelEventLogger* _Nullable)eventLogger
error:(NSError* __autoreleasing*)error;
+/// Executes the loaded model.
+///
+/// @param handle The handle to the loaded model.
+/// @param argsVec The arguments (inputs and outputs) of the model.
+/// @param loggingOptions The model logging options.
+/// @param error On failure, error is filled with the failure information.
+/// @retval `YES` if the execution succeeded otherwise `NO`.
+- (BOOL)executeModelWithHandle:(ModelHandle*)handle
+ argsVec:(const std::vector&)argsVec
+ loggingOptions:(const executorchcoreml::ModelLoggingOptions&)loggingOptions
+ eventLogger:(const executorchcoreml::ModelEventLogger* _Nullable)eventLogger
+ error:(NSError* __autoreleasing*)error;
+
/// Unloads the loaded model.
///
/// @param handle The handle to the loaded model.
diff --git a/backends/apple/coreml/runtime/delegate/ETCoreMLModelManager.mm b/backends/apple/coreml/runtime/delegate/ETCoreMLModelManager.mm
index 1c0d2a30f97..c51de9d1e14 100644
--- a/backends/apple/coreml/runtime/delegate/ETCoreMLModelManager.mm
+++ b/backends/apple/coreml/runtime/delegate/ETCoreMLModelManager.mm
@@ -22,6 +22,8 @@
#import
#import
#import
+#import
+#import
#import
#import
#import
@@ -98,32 +100,60 @@ BOOL is_backed_by_same_buffer(MLMultiArray *array1, MLMultiArray *array2) {
return options;
}
-BOOL copy(MLMultiArray *src, MLMultiArray *dst, NSError * __autoreleasing *error) {
- if (![src.shape isEqualToArray:dst.shape]) {
- ETCoreMLLogErrorAndSetNSError(error, 0, "%@: Model is broken", NSStringFromClass(ETCoreMLModelManager.class));
- return NO;
- }
+void copy(MLMultiArray *src, MLMultiArray *dst) {
if (::is_backed_by_same_buffer(src, dst)) {
- return YES;
- }
- @autoreleasepool {
- [src copyInto:dst];
+ return;
}
- return YES;
+
+ [src copyInto:dst];
}
-BOOL set_outputs(NSArray *outputs,
- NSArray *model_outputs,
- NSError * __autoreleasing *error) {
+void set_outputs(NSArray *outputs, NSArray *model_outputs) {
NSEnumerator *enumerator = [model_outputs objectEnumerator];
for (MLMultiArray *output in outputs) {
MLMultiArray *model_output = [enumerator nextObject];
- if (!::copy(output, model_output, error)) {
- return NO;
+ ::copy(model_output, output);
+ }
+}
+
+std::optional get_data_type(MLMultiArrayDataType data_type) {
+ switch (data_type) {
+ case MLMultiArrayDataTypeFloat16: {
+ return MultiArray::DataType::Float16;
+ }
+ case MLMultiArrayDataTypeFloat32: {
+ return MultiArray::DataType::Float32;
+ }
+ case MLMultiArrayDataTypeFloat64: {
+ return MultiArray::DataType::Float64;
+ }
+ case MLMultiArrayDataTypeInt32: {
+ return MultiArray::DataType::Int32;
+ }
+ default: {
+ return std::nullopt;
}
}
-
- return YES;
+}
+
+void copy(MLMultiArray *src, executorchcoreml::MultiArray& dst) {
+ [src getBytesWithHandler:^(const void * _Nonnull bytes, NSInteger size) {
+ if (bytes == dst.data()) {
+ return;
+ }
+
+ MultiArray::MemoryLayout src_layout(get_data_type(src.dataType).value(), to_vector(src.shape), to_vector(src.strides));
+ MultiArray(const_cast(bytes), std::move(src_layout)).copy(dst);
+ }];
+}
+
+void set_outputs(std::vector& outputs,
+ NSArray *model_outputs) {
+ NSEnumerator *enumerator = [model_outputs objectEnumerator];
+ for (auto& output : outputs) {
+ MLMultiArray *model_output = [enumerator nextObject];
+ ::copy(model_output, output);
+ }
}
NSData * _Nullable get_file_data(const inmemoryfs::InMemoryFileSystem *inMemoryFS,
@@ -313,6 +343,7 @@ void add_compute_unit(std::string& identifier, MLComputeUnits compute_units) {
return result;
}
+
#endif
} //namespace
@@ -467,7 +498,7 @@ - (nullable NSURL *)compiledModelURLWithIdentifier:(NSString *)identifier
return [[ETCoreMLModelAnalyzer alloc] initWithCompiledModelAsset:compiledModelAsset
modelAsset:modelAsset
metadata:metadata
- operationPathToDebugSymbolMap: operation_path_to_symbol_name_map
+ operationPathToDebugSymbolMap:operation_path_to_symbol_name_map
configuration:configuration
assetManager:self.assetManager
error:error];
@@ -641,6 +672,48 @@ - (void)addPrewarmedAsset:(ETCoreMLAsset *)asset {
os_unfair_lock_unlock(&_lock);
}
+- (nullable NSArray *)executeModelUsingExecutor:(id)executor
+ inputs:(NSArray *)inputs
+ outputBackings:(NSArray *)outputBackings
+ loggingOptions:(const executorchcoreml::ModelLoggingOptions&)loggingOptions
+ eventLogger:(const executorchcoreml::ModelEventLogger* _Nullable)eventLogger
+ error:(NSError * __autoreleasing *)error {
+ NSError *localError = nil;
+ ETCoreMLModel *model = executor.model;
+ MLPredictionOptions *predictionOptions = ::get_prediction_options(outputBackings, model.orderedOutputNames, error);
+ if (!predictionOptions) {
+ return nil;
+ }
+
+ id inputFeatures = ::get_feature_provider(inputs, model.orderedInputNames, error);
+ if (!inputFeatures) {
+ return nil;
+ }
+
+ NSArray *modelOutputs = [executor executeModelWithInputs:inputFeatures
+ predictionOptions:predictionOptions
+ loggingOptions:loggingOptions
+ eventLogger:eventLogger
+ error:&localError];
+ // Try without output backings.
+ if (!modelOutputs && predictionOptions.outputBackings.count > 0) {
+ localError = nil;
+ executor.ignoreOutputBackings = YES;
+ }
+
+ modelOutputs = [executor executeModelWithInputs:inputFeatures
+ predictionOptions:predictionOptions
+ loggingOptions:loggingOptions
+ eventLogger:eventLogger
+ error:&localError];
+
+ if (error) {
+ *error = localError;
+ }
+
+ return modelOutputs;
+}
+
- (BOOL)executeModelWithHandle:(ModelHandle *)handle
args:(NSArray *)args
loggingOptions:(const executorchcoreml::ModelLoggingOptions&)loggingOptions
@@ -659,33 +732,91 @@ - (BOOL)executeModelWithHandle:(ModelHandle *)handle
if (args.count != model.orderedInputNames.count + model.orderedOutputNames.count) {
ETCoreMLLogErrorAndSetNSError(error,
ETCoreMLErrorCorruptedModel,
- "%@: Model is invalid.",
- NSStringFromClass(self.class));
+ "%@: Model is invalid, expected args count to be %lu but got %lu.",
+ NSStringFromClass(self.class),
+ static_cast(model.orderedInputNames.count + model.orderedOutputNames.count),
+ args.count);
return NO;
}
-
- NSArray *inputs = [args subarrayWithRange:NSMakeRange(0, model.orderedInputNames.count)];
- NSArray *outputs = [args subarrayWithRange:NSMakeRange(model.orderedInputNames.count, args.count - model.orderedInputNames.count)];
- id inputFeatures = ::get_feature_provider(inputs, model.orderedInputNames, error);
- if (!inputFeatures) {
- return NO;
+ @autoreleasepool {
+ NSArray *inputs = [args subarrayWithRange:NSMakeRange(0, model.orderedInputNames.count)];
+ NSArray *outputs = [args subarrayWithRange:NSMakeRange(model.orderedInputNames.count, args.count - model.orderedInputNames.count)];
+ NSArray *outputBackings = @[];
+ if (executor.ignoreOutputBackings == NO) {
+ outputBackings = outputs;
+ }
+
+ NSArray *modelOutputs = [self executeModelUsingExecutor:executor
+ inputs:inputs
+ outputBackings:outputBackings
+ loggingOptions:loggingOptions
+ eventLogger:eventLogger
+ error:error];
+ if (!modelOutputs) {
+ return NO;
+ }
+
+ ::set_outputs(outputs, modelOutputs);
}
- MLPredictionOptions *predictionOptions = ::get_prediction_options(outputs, model.orderedOutputNames, error);
- if (!predictionOptions) {
+ return YES;
+}
+
+- (BOOL)executeModelWithHandle:(ModelHandle *)handle
+ argsVec:(const std::vector&)argsVec
+ loggingOptions:(const executorchcoreml::ModelLoggingOptions&)loggingOptions
+ eventLogger:(const executorchcoreml::ModelEventLogger* _Nullable)eventLogger
+ error:(NSError * __autoreleasing *)error {
+ id executor = [self executorWithHandle:handle];
+ if (!executor) {
+ ETCoreMLLogErrorAndSetNSError(error,
+ 0,
+ "%@: Model is already unloaded.",
+ NSStringFromClass(self.class));
return NO;
}
- NSArray *modelOutputs = [executor executeModelWithInputs:inputFeatures
- predictionOptions:predictionOptions
- loggingOptions:loggingOptions
- eventLogger:eventLogger
- error:error];
- if (!outputs) {
+ ETCoreMLModel *model = executor.model;
+ if (argsVec.size() != model.orderedInputNames.count + model.orderedOutputNames.count) {
+ ETCoreMLLogErrorAndSetNSError(error,
+ ETCoreMLErrorCorruptedModel,
+ "%@: Model is invalid, expected args count to be %lu but got %lu.",
+ NSStringFromClass(self.class),
+ static_cast(model.orderedInputNames.count + model.orderedOutputNames.count),
+ argsVec.size());
return NO;
}
- return ::set_outputs(outputs, modelOutputs, error);
+ std::vector inputArgs(argsVec.begin(), argsVec.begin() + model.orderedInputNames.count);
+ std::vector outputArgs(argsVec.begin() + model.orderedInputNames.count, argsVec.end());
+ @autoreleasepool {
+ NSArray *inputs = [model prepareInputs:inputArgs error:error];
+ if (!inputs) {
+ return NO;
+ }
+
+ NSArray *outputBackings = @[];
+ if (executor.ignoreOutputBackings == NO) {
+ outputBackings = [model prepareOutputBackings:outputArgs error:error];
+ }
+
+ if (!outputBackings) {
+ return NO;
+ }
+
+ NSArray *modelOutputs = [self executeModelUsingExecutor:executor
+ inputs:inputs
+ outputBackings:outputBackings
+ loggingOptions:loggingOptions
+ eventLogger:eventLogger
+ error:error];
+ if (!modelOutputs) {
+ return NO;
+ }
+
+ ::set_outputs(outputArgs, modelOutputs);
+ return YES;
+ }
}
- (BOOL)unloadModelWithHandle:(ModelHandle *)handle {
diff --git a/backends/apple/coreml/runtime/delegate/MLMultiArray_Copy.mm b/backends/apple/coreml/runtime/delegate/MLMultiArray_Copy.mm
index 4aa5fffe94a..b8a10fcbbbc 100644
--- a/backends/apple/coreml/runtime/delegate/MLMultiArray_Copy.mm
+++ b/backends/apple/coreml/runtime/delegate/MLMultiArray_Copy.mm
@@ -7,55 +7,17 @@
#import
+#import
#import
namespace {
using namespace executorchcoreml;
-template
-T toValue(NSNumber *value);
-
-template<> size_t toValue(NSNumber *value) {
- return value.unsignedLongValue;
-}
-
-template<> ssize_t toValue(NSNumber *value) {
- return value.longLongValue;
-}
-
-template::value, T>::type>
-std::vector to_vector(NSArray *numbers) {
- std::vector result;
- result.reserve(numbers.count);
- for (NSNumber *number in numbers) {
- result.emplace_back(toValue(number));
- }
-
- return result;
-}
-
-MultiArray::DataType to_multi_array_data_type(MLMultiArrayDataType data_type) {
- switch (data_type) {
- case MLMultiArrayDataTypeInt32: {
- return MultiArray::DataType::Int;
- }
- case MLMultiArrayDataTypeFloat: {
- return MultiArray::DataType::Float;
- }
- case MLMultiArrayDataTypeFloat16: {
- return MultiArray::DataType::Float16;
- }
- case MLMultiArrayDataTypeDouble: {
- return MultiArray::DataType::Double;
- }
- }
-}
-
MultiArray to_multi_array(void *data,
MLMultiArrayDataType dataType,
NSArray *shape,
NSArray *strides) {
- auto layout = MultiArray::MemoryLayout(to_multi_array_data_type(dataType),
+ auto layout = MultiArray::MemoryLayout(to_multiarray_data_type(dataType).value(),
to_vector(shape),
to_vector(strides));
return MultiArray(data, std::move(layout));
diff --git a/backends/apple/coreml/runtime/delegate/backend_delegate.h b/backends/apple/coreml/runtime/delegate/backend_delegate.h
index d6a6016c087..ed921fb35bd 100644
--- a/backends/apple/coreml/runtime/delegate/backend_delegate.h
+++ b/backends/apple/coreml/runtime/delegate/backend_delegate.h
@@ -26,7 +26,7 @@ class BackendDelegate {
struct Config {
// Max models cache size in bytes.
- size_t max_models_cache_size = 2 * size_t(1024) * size_t(1024) * size_t(1024);
+ size_t max_models_cache_size = 10 * size_t(1024) * size_t(1024) * size_t(1024);
// If set to `true`, delegate pre-warms the most recently used asset.
bool should_prewarm_asset = true;
// If set to `true`, delegate pre-warms the model in `init`.
diff --git a/backends/apple/coreml/runtime/delegate/backend_delegate.mm b/backends/apple/coreml/runtime/delegate/backend_delegate.mm
index b91a6208b6a..1ded4a76b3b 100644
--- a/backends/apple/coreml/runtime/delegate/backend_delegate.mm
+++ b/backends/apple/coreml/runtime/delegate/backend_delegate.mm
@@ -44,44 +44,6 @@ MLComputeUnits get_compute_units(const Buffer& buffer) {
return configuration;
}
-template::value, T>::type>
-NSArray *to_array(const std::vector& array) {
- NSMutableArray *result = [NSMutableArray arrayWithCapacity:array.size()];
- for (T value : array) {
- [result addObject:@(value)];
- }
-
- return result;
-}
-
-MLMultiArrayDataType get_data_type(MultiArray::DataType dataType) {
- switch (dataType) {
- case MultiArray::DataType::Float16: {
- return MLMultiArrayDataTypeFloat16;
- }
- case MultiArray::DataType::Float: {
- return MLMultiArrayDataTypeFloat32;
- }
- case MultiArray::DataType::Double: {
- return MLMultiArrayDataTypeDouble;
- }
- case MultiArray::DataType::Int: {
- return MLMultiArrayDataTypeInt32;
- }
- }
-}
-
-MLMultiArray * _Nullable to_ml_multiarray(const MultiArray& array, NSError * __autoreleasing *error) {
- const auto& layout = array.layout();
- MLMultiArray *result = [[MLMultiArray alloc] initWithDataPointer:array.data()
- shape:to_array(layout.shape())
- dataType:get_data_type(layout.dataType())
- strides:to_array(layout.strides())
- deallocator:^(void * _Nonnull bytes) {}
- error:error];
- return result;
-}
-
NSURL * _Nullable create_directory_if_needed(NSURL *url,
NSFileManager *fileManager,
NSError * __autoreleasing *error) {
@@ -194,17 +156,8 @@ bool execute(Handle* handle,
ModelEventLogger *event_logger,
std::error_code& ec) const noexcept override {
NSError *error = nil;
- NSMutableArray *model_args = [NSMutableArray arrayWithCapacity:args.size()];
- for (const auto& arg : args) {
- MLMultiArray *multi_array = to_ml_multiarray(arg, &error);
- if (!multi_array) {
- return false;
- }
- [model_args addObject:multi_array];
- }
-
if (![model_manager_ executeModelWithHandle:handle
- args:model_args
+ argsVec:args
loggingOptions:logging_options
eventLogger:event_logger
error:&error]) {
diff --git a/backends/apple/coreml/runtime/delegate/com.apple.executorchcoreml_config.plist b/backends/apple/coreml/runtime/delegate/com.apple.executorchcoreml_config.plist
index 7dd12acaaf8..df37a47755f 100644
--- a/backends/apple/coreml/runtime/delegate/com.apple.executorchcoreml_config.plist
+++ b/backends/apple/coreml/runtime/delegate/com.apple.executorchcoreml_config.plist
@@ -7,6 +7,6 @@
shouldPrewarmModel
maxAssetsSizeInBytes
- 2147483648
+ 1073741824
diff --git a/backends/apple/coreml/runtime/delegate/coreml_backend_delegate.mm b/backends/apple/coreml/runtime/delegate/coreml_backend_delegate.mm
index a51e73ee68d..b672d4a08e4 100644
--- a/backends/apple/coreml/runtime/delegate/coreml_backend_delegate.mm
+++ b/backends/apple/coreml/runtime/delegate/coreml_backend_delegate.mm
@@ -28,16 +28,25 @@
using namespace executorchcoreml;
std::optional get_data_type(ScalarType scalar_type) {
- if (scalar_type == ScalarType::Float) {
- return MultiArray::DataType::Float;
- } else if (scalar_type == ScalarType::Double) {
- return MultiArray::DataType::Double;
- } else if (scalar_type == ScalarType::Half) {
- return MultiArray::DataType::Float16;
- } else if (scalar_type == ScalarType::Int) {
- return MultiArray::DataType::Int;
- } else {
- return std::nullopt;
+ switch (scalar_type) {
+ case ScalarType::Bool:
+ return MultiArray::DataType::Bool;
+ case ScalarType::Byte:
+ return MultiArray::DataType::Byte;
+ case ScalarType::Short:
+ return MultiArray::DataType::Short;
+ case ScalarType::Int:
+ return MultiArray::DataType::Int32;
+ case ScalarType::Long:
+ return MultiArray::DataType::Int64;
+ case ScalarType::Half:
+ return MultiArray::DataType::Float16;
+ case ScalarType::Float:
+ return MultiArray::DataType::Float32;
+ case ScalarType::Double:
+ return MultiArray::DataType::Float64;
+ default:
+ return std::nullopt;
}
}
@@ -54,6 +63,7 @@
auto tensor = eValue->toTensor();
auto dataType = get_data_type(tensor.scalar_type());
if (!dataType.has_value()) {
+ ET_LOG(Error, "%s: DataType=%d is not supported", ETCoreMLStrings.delegateIdentifier.UTF8String, (int)tensor.scalar_type());
return std::nullopt;
}
@@ -167,7 +177,7 @@ ModelLoggingOptions get_logging_options(BackendExecutionContext& context) {
auto multi_array = get_multi_array(args[i], ArgType::Input);
ET_CHECK_OR_RETURN_ERROR(multi_array.has_value(),
Internal,
- "%s: Expected tensor at args[%zu]", ETCoreMLStrings.delegateIdentifier.UTF8String, i);
+ "%s: Failed to create multiarray from input at args[%zu]", ETCoreMLStrings.delegateIdentifier.UTF8String, i);
delegate_args.emplace_back(std::move(multi_array.value()));
}
@@ -176,7 +186,7 @@ ModelLoggingOptions get_logging_options(BackendExecutionContext& context) {
auto multi_array = get_multi_array(args[i], ArgType::Output);
ET_CHECK_OR_RETURN_ERROR(multi_array.has_value(),
Internal,
- "%s: Expected tensor at args[%zu]", ETCoreMLStrings.delegateIdentifier.UTF8String, i);
+ "%s: Failed to create multiarray from output at args[%zu]", ETCoreMLStrings.delegateIdentifier.UTF8String, i);
delegate_args.emplace_back(std::move(multi_array.value()));
}
diff --git a/backends/apple/coreml/runtime/delegate/multiarray.h b/backends/apple/coreml/runtime/delegate/multiarray.h
index cd165373dc8..70a2a08a2f7 100644
--- a/backends/apple/coreml/runtime/delegate/multiarray.h
+++ b/backends/apple/coreml/runtime/delegate/multiarray.h
@@ -7,6 +7,9 @@
#pragma once
+#import
+#import
+#import
#import
namespace executorchcoreml {
@@ -29,13 +32,33 @@ class Buffer {
};
/// A class representing a MultiArray.
-class MultiArray {
+class MultiArray final {
public:
/// The MultiArray datatype.
- enum class DataType : uint8_t { Int = 0, Double, Float, Float16 };
+ enum class DataType : uint8_t {
+ Bool = 0,
+ Byte,
+ Char,
+ Short,
+ Int32,
+ Int64,
+ Float16,
+ Float32,
+ Float64,
+ };
+
+ /// Options for copying.
+ struct CopyOptions {
+ inline CopyOptions() noexcept : use_bnns(true), use_memcpy(true) { }
+
+ inline CopyOptions(bool use_bnns, bool use_memcpy) noexcept : use_bnns(use_bnns), use_memcpy(use_memcpy) { }
+
+ bool use_bnns = true;
+ bool use_memcpy = true;
+ };
/// A class describing the memory layout of a MultiArray.
- class MemoryLayout {
+ class MemoryLayout final {
public:
MemoryLayout(DataType dataType, std::vector shape, std::vector strides)
: dataType_(dataType), shape_(std::move(shape)), strides_(std::move(strides)) { }
@@ -53,7 +76,10 @@ class MultiArray {
inline size_t rank() const noexcept { return shape_.size(); }
/// Returns the number of elements in the MultiArray.
- size_t get_num_elements() const noexcept;
+ size_t num_elements() const noexcept;
+
+ /// Returns the byte size of an element.
+ size_t num_bytes() const noexcept;
/// Returns `true` if the memory layout is packed otherwise `false`.
bool is_packed() const noexcept;
@@ -78,11 +104,42 @@ class MultiArray {
/// Copies this into another `MultiArray`.
///
/// @param dst The destination `MultiArray`.
- bool copy(MultiArray& dst) const noexcept;
+ void copy(MultiArray& dst, CopyOptions options = CopyOptions()) const noexcept;
+
+ /// Get the value at `indices`.
+ template inline T value(const std::vector& indices) const noexcept {
+ return *(static_cast(data(indices)));
+ }
+
+ /// Set the value at `indices`.
+ template inline void set_value(const std::vector& indices, T value) const noexcept {
+ T* ptr = static_cast(data(indices));
+ *ptr = value;
+ }
+
+ /// Get the value at `index`.
+ template inline T value(size_t index) const noexcept { return *(static_cast(data(index))); }
+
+ /// Set the value at `index`.
+ template inline void set_value(size_t index, T value) const noexcept {
+ T* ptr = static_cast(data(index));
+ *ptr = value;
+ }
private:
+ void* data(const std::vector& indices) const noexcept;
+
+ void* data(size_t index) const noexcept;
+
void* data_;
MemoryLayout layout_;
};
+/// Converts `MultiArray::DataType` to `MLMultiArrayDataType`.
+std::optional to_ml_multiarray_data_type(MultiArray::DataType data_type);
+
+/// Converts `MLMultiArrayDataType` to `MultiArray::DataType`.
+std::optional to_multiarray_data_type(MLMultiArrayDataType data_type);
+
+
} // namespace executorchcoreml
diff --git a/backends/apple/coreml/runtime/delegate/multiarray.mm b/backends/apple/coreml/runtime/delegate/multiarray.mm
index 3b8dcb98a30..74996fb8d5a 100644
--- a/backends/apple/coreml/runtime/delegate/multiarray.mm
+++ b/backends/apple/coreml/runtime/delegate/multiarray.mm
@@ -10,120 +10,16 @@
#import
#import
-
#import
#import
+#import
+#import
#import
namespace {
using namespace executorchcoreml;
-template
-struct TypedMultiArray {
- explicit TypedMultiArray(T *data, MultiArray::MemoryLayout layout) noexcept
- :data(data), layout(std::move(layout))
- {}
-
- T *data;
- MultiArray::MemoryLayout layout;
-};
-
-#pragma mark - BNNS
-
-template
-struct BNNSCopier {
- static bool supported() noexcept {
- return false;
- }
-
- static void copy(BNNSNDArrayDescriptor *src_bnns_desc, BNNSNDArrayDescriptor *dstNNSDesc) noexcept {}
-};
-
-// float -> _Float16
-template<>
-struct BNNSCopier {
- static bool supported() noexcept {
- return true;
- }
-
- static void copy(BNNSNDArrayDescriptor *src_bnns_desc, BNNSNDArrayDescriptor *dst_bnns_desc) noexcept {
- src_bnns_desc->data_type = BNNSDataTypeFloat32;
- dst_bnns_desc->data_type = BNNSDataTypeFloat16;
- BNNSCopy(src_bnns_desc, dst_bnns_desc, NULL);
- }
-};
-
-// float -> int32_t
-template<>
-struct BNNSCopier {
- static bool supported() noexcept {
- return true;
- }
-
- static void copy(BNNSNDArrayDescriptor *src_bnns_desc, BNNSNDArrayDescriptor *dst_bnns_desc) noexcept {
- src_bnns_desc->data_type = BNNSDataTypeFloat32;
- dst_bnns_desc->data_type = BNNSDataTypeInt32;
- BNNSCopy(src_bnns_desc, dst_bnns_desc, NULL);
- }
-};
-
-// _Float16 -> float
-template<>
-struct BNNSCopier<_Float16, float> {
- static bool supported() noexcept {
- return true;
- }
-
- static void copy(BNNSNDArrayDescriptor *src_bnns_desc, BNNSNDArrayDescriptor *dst_bnns_desc) noexcept {
- src_bnns_desc->data_type = BNNSDataTypeFloat16;
- dst_bnns_desc->data_type = BNNSDataTypeFloat32;
- BNNSCopy(src_bnns_desc, dst_bnns_desc, NULL);
- }
-};
-
-// _Float16 -> int32_t
-template<>
-struct BNNSCopier<_Float16, int32_t> {
- static bool supported() noexcept {
- return true;
- }
-
- static void copy(BNNSNDArrayDescriptor *src_bnns_desc, BNNSNDArrayDescriptor *dst_bnns_desc) noexcept {
- src_bnns_desc->data_type = BNNSDataTypeFloat16;
- dst_bnns_desc->data_type = BNNSDataTypeInt32;
- BNNSCopy(src_bnns_desc, dst_bnns_desc, NULL);
- }
-};
-
-// int32_t -> _Float16
-template<>
-struct BNNSCopier {
- static bool supported() noexcept {
- return true;
- }
-
- static void copy(BNNSNDArrayDescriptor *src_bnns_desc, BNNSNDArrayDescriptor *dst_bnns_desc) noexcept {
- src_bnns_desc->data_type = BNNSDataTypeInt32;
- dst_bnns_desc->data_type = BNNSDataTypeFloat16;
- BNNSCopy(src_bnns_desc, dst_bnns_desc, NULL);
- }
-};
-
-// int32_t -> float
-template<>
-struct BNNSCopier {
- static bool supported() noexcept {
- return true;
- }
-
- static void copy(BNNSNDArrayDescriptor *src_bnns_desc, BNNSNDArrayDescriptor *dst_bnns_desc) noexcept {
- src_bnns_desc->data_type = BNNSDataTypeInt32;
- dst_bnns_desc->data_type = BNNSDataTypeFloat32;
- BNNSCopy(src_bnns_desc, dst_bnns_desc, NULL);
- }
-};
-
-/// Returns BNNSDataLayout and sets strides from the multi-array strides.
+// Returns BNNSDataLayout and sets strides from the multi-array strides.
///
/// BNNS requires strides to be non-decreasing order;
/// `bnns_strides[i] <= bnns_strides[i + 1]`. BNNSDataLayout defines
@@ -132,408 +28,491 @@ static void copy(BNNSNDArrayDescriptor *src_bnns_desc, BNNSNDArrayDescriptor *ds
/// @param multi_array_strides The multiarray strides.
/// @param bnns_strides The bnns strides.
/// @retval The `BNNSDataLayout`.
-BNNSDataLayout get_bnns_data_layout(const std::vector& multi_array_strides, size_t *bnns_strides) {
- uint32_t firstMajorFlag = 1;
+std::optional get_bnns_data_layout(const std::vector& multi_array_strides,
+ size_t *bnns_strides) {
+ bool first_major = false;
uint32_t rank = static_cast(multi_array_strides.size());
if (rank > BNNS_MAX_TENSOR_DIMENSION) {
- return (BNNSDataLayout)-1;
+ return std::nullopt;
}
if (std::is_sorted(multi_array_strides.begin(), multi_array_strides.end(), std::less())) {
- firstMajorFlag = 0;
+ first_major = false;
std::copy(multi_array_strides.begin(), multi_array_strides.end(), bnns_strides);
} else if (std::is_sorted(multi_array_strides.begin(), multi_array_strides.end(), std::greater()) ) {
- firstMajorFlag = 1;
+ first_major = true;
std::copy(multi_array_strides.rbegin(), multi_array_strides.rend(), bnns_strides);
} else {
- return (BNNSDataLayout)-1;
+ return std::nullopt;
}
// See BNNSDataLayout's raw value how this bitwise-or makes sense.
- return (BNNSDataLayout)((rank << 16) | (8 << 12) | firstMajorFlag);
+ return (BNNSDataLayout) (0x08000 + // flags as canonical first/last major type
+ 0x10000 * rank + // set dimensionality
+ (first_major ? 1 : 0)); // set first/last major bit
}
-/// Initializes BNNSNDArrayDescriptor for the shape and strides.
+/// Returns `BNNSDataType` from `MultiArray::DataType`.
///
-/// @param layout The memory layout.
-/// @param desc The ``BNNSNDArrayDescriptor` to be initialized.
-/// @retval `true` if the initialization succeeded otherwise `false`.
-bool init_bnns_array_descriptor(const MultiArray::MemoryLayout& layout, BNNSNDArrayDescriptor *desc) {
- BNNSDataLayout bnns_layout = get_bnns_data_layout(layout.strides(), desc->stride);
- if (bnns_layout == (BNNSDataLayout)-1) {
- return false;
- }
-
- std::memset(desc, 0, sizeof(*desc));
- const auto& shape = layout.shape();
- std::copy(shape.begin(), shape.end(), desc->size);
- desc->layout = bnns_layout;
- desc->data_scale = 1.0f;
- desc->data_bias = 0.0f;
-
- return true;
-}
-
-template
-struct MultiArrayBNNSCopier {
- static bool copy(TypedMultiArray& src, TypedMultiArray& dst) {
- if (!BNNSCopier::supported()) {
- return false;
+/// @param datatype The multiarray datatype.
+/// @retval The `BNNSDataType`.
+std::optional get_bnns_data_type(MultiArray::DataType datatype) {
+ switch (datatype) {
+ case MultiArray::DataType::Bool: {
+ return BNNSDataTypeBoolean;
}
-
- BNNSNDArrayDescriptor src_bnns_array;
- BNNSNDArrayDescriptor dst_bnns_array;
- if (!init_bnns_array_descriptor(src.layout, &src_bnns_array) || !init_bnns_array_descriptor(dst.layout, &dst_bnns_array)) {
- return false;
+ case MultiArray::DataType::Byte: {
+ return BNNSDataTypeUInt8;
+ }
+ case MultiArray::DataType::Char: {
+ return BNNSDataTypeInt8;
+ }
+ case MultiArray::DataType::Short: {
+ return BNNSDataTypeInt16;
+ }
+ case MultiArray::DataType::Int32: {
+ return BNNSDataTypeInt32;
+ }
+ case MultiArray::DataType::Int64: {
+ return BNNSDataTypeInt64;
+ }
+ case MultiArray::DataType::Float16: {
+ return BNNSDataTypeFloat16;
+ }
+ case MultiArray::DataType::Float32: {
+ return BNNSDataTypeFloat32;
+ }
+ default: {
+ return std::nullopt;
}
-
- BNNSCopier::copy(&src_bnns_array, &dst_bnns_array);
- return true;
}
-};
-
-#pragma mark - VImageCopier
+}
-bool init_vi_Buffer(const MultiArray::MemoryLayout& layout, vImage_Buffer *viBuf, size_t bytesPerScalar) {
- size_t rank = layout.rank();
- const auto& shape = layout.shape();
- const auto& strides = layout.strides();
-
- if (rank < 2) {
- // vImage path requires at least two dimensions.
- return false;
- }
-
- // vImage blitter requires first major and every dimension except row (shape[rank - 2]) is contiguous.
- if (!std::is_sorted(strides.begin(), strides.end(), std::greater())) {
+/// Initializes BNNS array descriptor from multi array.
+///
+/// @param bnns_descriptor The descriptor to be initialized.
+/// @param multi_array The multiarray.
+/// @retval `true` if the initialization succeeded otherwise `false`.
+bool init_bnns_descriptor(BNNSNDArrayDescriptor& bnns_descriptor, const MultiArray& multi_array) {
+ const auto& layout = multi_array.layout();
+ if (layout.num_elements() == 1) {
return false;
}
- if (strides[rank - 1] != 1) {
+ auto bnns_datatype = get_bnns_data_type(layout.dataType());
+ if (!bnns_datatype) {
return false;
}
- size_t height = std::accumulate(shape.begin(), shape.end() - 1, size_t(1), std::multiplies());
- if (height * strides[rank - 2] != strides[0] * shape[0]) {
+ std::memset(&bnns_descriptor, 0, sizeof(bnns_descriptor));
+ auto bnns_layout = get_bnns_data_layout(layout.strides(), bnns_descriptor.stride);
+ if (!bnns_layout) {
return false;
}
- size_t width = shape[rank - 1];
- size_t rowBytes = strides[rank - 2] * bytesPerScalar;
-
- viBuf->data = NULL;
- viBuf->height = height;
- viBuf->width = width;
- viBuf->rowBytes = rowBytes;
+ const auto& shape = layout.shape();
+ std::copy(shape.begin(), shape.end(), bnns_descriptor.size);
+ bnns_descriptor.layout = bnns_layout.value();
+ bnns_descriptor.data_scale = 1.0f;
+ bnns_descriptor.data_bias = 0.0f;
+ bnns_descriptor.data_type = bnns_datatype.value();
+ bnns_descriptor.data = multi_array.data();
return true;
}
-template
-struct VImageCopier {
- static bool supported() noexcept {
+bool copy_using_bnns(const MultiArray& src, MultiArray& dst) {
+ if (dst.layout().num_bytes() < src.layout().num_bytes()) {
return false;
}
-
- static void copy(vImage_Buffer *src_vi_buffer, vImage_Buffer *dst_vi_buffer) noexcept {}
-};
-
-template
-struct VImageCopier {
- static bool supported() noexcept {
- return true;
+ BNNSNDArrayDescriptor src_descriptor;
+ if (!init_bnns_descriptor(src_descriptor, src)) {
+ return false;
}
- static void copy(vImage_Buffer *src_vi_buffer, vImage_Buffer *dst_vi_buffer) noexcept {
- vImageCopyBuffer(src_vi_buffer, dst_vi_buffer, sizeof(T), kvImageDoNotTile);
- }
-};
-
-// float -> _Float16
-template <>
-struct VImageCopier {
- static bool supported() noexcept {
- return true;
+ BNNSNDArrayDescriptor dst_descriptor;
+ if (!init_bnns_descriptor(dst_descriptor, dst)) {
+ return false;
}
- static void copy(vImage_Buffer *src_vi_buffer, vImage_Buffer *dst_vi_buffer) noexcept {
- vImageConvert_PlanarFtoPlanar16F(src_vi_buffer, dst_vi_buffer, kvImageDoNotTile);
- }
-};
+ return BNNSCopy(&dst_descriptor, &src_descriptor, NULL) == 0;
+}
-// _Float16 -> float
-template <>
-struct VImageCopier<_Float16, float> {
- static bool supported() noexcept {
- return true;
- }
+std::vector get_layouts(const std::vector& arrays) {
+ std::vector result;
+ result.reserve(arrays.size());
- static void copy(vImage_Buffer *src_vi_buffer, vImage_Buffer *dst_vi_buffer) noexcept {
- vImageConvert_Planar16FtoPlanarF(src_vi_buffer, dst_vi_buffer, kvImageDoNotTile);
- }
-};
-
-template
-struct MultiArrayVImageCopier {
- static bool copy(TypedMultiArray& src, TypedMultiArray& dst) {
- if (!VImageCopier::supported()) {
- return false;
- }
-
- vImage_Buffer src_vi_buffer;
- vImage_Buffer dst_vi_buffer;
- if (!init_vi_Buffer(src.layout, &src_vi_buffer, sizeof(T1))) {
- return false;
- }
-
- if (!init_vi_Buffer(dst.layout, &dst_vi_buffer, sizeof(T2))) {
- return false;
- }
-
- VImageCopier::copy(&src_vi_buffer, &dst_vi_buffer);
- return true;
- }
-};
-
-#pragma mark - VDSPCopier
-
-template
-struct VDSPCopier {
- static bool supported() noexcept {
- return false;
- }
+ std::transform(arrays.begin(), arrays.end(), std::back_inserter(result), [](const auto& array) {
+ return array.layout();
+ });
- static void copy(const T1 *src_data, T2 *dst_data, size_t num_elements) noexcept {}
-};
+ return result;
+}
-// Double -> Float
-template<>
-struct VDSPCopier {
- static bool supported() noexcept {
- return true;
- }
+std::vector get_datas(const std::vector& arrays) {
+ std::vector result;
+ result.reserve(arrays.size());
- static void copy(const double *src_data, float *dst_data, size_t num_elements) noexcept {
- vDSP_vdpsp(src_data, 1, dst_data, 1, num_elements);
- }
-};
-
-// Float -> Double
-template<>
-struct VDSPCopier {
- static bool supported() noexcept {
- return true;
- }
+ std::transform(arrays.begin(), arrays.end(), std::back_inserter(result), [](const auto& array) {
+ return array.data();
+ });
- static void copy(const float *src_data, double *dst_data, size_t num_elements) noexcept {
- vDSP_vspdp(src_data, 1, dst_data, 1, num_elements);
- }
-};
+ return result;
+}
-// Float -> Int32
-template<>
-struct VDSPCopier {
- static bool supported() noexcept {
+// We can coalesce two adjacent dimensions if either dim has size 1 or if `shape[n] * stride[n] == stride[n + 1]`.
+bool can_coalesce_dimensions(const std::vector& shape,
+ const std::vector& strides,
+ size_t dim1,
+ size_t dim2) {
+ auto shape1 = shape[dim1];
+ auto shape2 = shape[dim2];
+ if (shape1 == 1 || shape2 == 1) {
return true;
}
- static void copy(const float *src_data, int32_t *dst_data, size_t num_elements) noexcept {
- vDSP_vfix32(src_data, 1, dst_data, 1, num_elements);
- }
-};
+ auto stride1 = strides[dim1];
+ auto stride2 = strides[dim2];
+ return shape1 * stride1 == stride2;
+}
-// Int32 -> Double
-template<>
-struct VDSPCopier {
- static bool supported() noexcept {
- return true;
+bool can_coalesce_dimensions(const std::vector& shape,
+ const std::vector>& all_strides,
+ size_t dim1,
+ size_t dim2) {
+ for (const auto& strides : all_strides) {
+ if (!::can_coalesce_dimensions(shape, strides, dim1, dim2)) {
+ return false;
+ }
}
- static void copy(const int32_t *src_data, double *dst_data, size_t num_elements) noexcept {
- vDSP_vflt32D(src_data, 1, dst_data, 1, num_elements);
- }
-};
+ return true;
+}
-// Int32 -> Float
-template<>
-struct VDSPCopier {
- static bool supported() noexcept {
- return true;
- }
-
- static void copy(const int32_t *src_data, float *dst_data, size_t num_elements) noexcept {
- vDSP_vflt32(src_data, 1, dst_data, 1, num_elements);
+void update_strides(std::vector>& all_strides,
+ size_t dim1,
+ size_t dim2) {
+ for (auto& strides : all_strides) {
+ strides[dim1] = strides[dim2];
}
-};
+}
-template
-struct MultiArrayVDSPCopier {
- static bool copy(TypedMultiArray& src, TypedMultiArray& dst) {
- if (!VDSPCopier::supported()) {
- return false;
- }
-
- if (!src.layout.is_packed() || !dst.layout.is_packed()) {
- return false;
+std::vector coalesce_dimensions(std::vector layouts) {
+ if (layouts.size() == 0) {
+ return {};
+ }
+
+ std::vector shape = layouts.back().shape();
+ // reverse shape.
+ std::reverse(shape.begin(), shape.end());
+ std::vector> all_strides;
+ // reverse strides.
+ all_strides.reserve(layouts.size());
+ std::transform(layouts.begin(), layouts.end(), std::back_inserter(all_strides), [](const MultiArray::MemoryLayout& layout) {
+ auto strides = layout.strides();
+ std::reverse(strides.begin(), strides.end());
+ return strides;
+ });
+ size_t rank = layouts[0].rank();
+ size_t prev_dim = 0;
+ for (size_t dim = 1; dim < rank; ++dim) {
+ if (::can_coalesce_dimensions(shape, all_strides, prev_dim, dim)) {
+ if (shape[prev_dim] == 1) {
+ ::update_strides(all_strides, prev_dim, dim);
+ }
+ shape[prev_dim] *= shape[dim];
+ } else {
+ ++prev_dim;
+ if (prev_dim != dim) {
+ ::update_strides(all_strides, prev_dim, dim);
+ shape[prev_dim] = shape[dim];
+ }
}
-
- VDSPCopier::copy(src.data, dst.data, src.layout.get_num_elements());
- return true;
}
-};
-
-#pragma mark - MemCopy
-
-template
-struct MemCopier {
- static bool supported() noexcept {
- return false;
+
+ if (rank == prev_dim + 1) {
+ return layouts;
}
- static void copy(const T1 *src_data, T2 *dst_data, size_t num_elements) noexcept {}
-};
-
-template
-struct MemCopier {
- static bool supported() noexcept {
- return true;
+ shape.resize(prev_dim + 1);
+ for (auto& strides : all_strides) {
+ strides.resize(prev_dim + 1);
}
- static void copy(const T *src_data, T *dst_data, size_t num_elements) noexcept {
- std::memcpy(dst_data, src_data, num_elements);
+ std::vector result;
+ result.reserve(layouts.size());
+ std::reverse(shape.begin(), shape.end());
+ for (size_t i = 0; i < layouts.size(); ++i) {
+ std::reverse(all_strides[i].begin(), all_strides[i].end());
+ result.emplace_back(layouts[i].dataType(), shape, std::move(all_strides[i]));
}
+
+ return result;
+}
+
+enum class Direction : uint8_t {
+ Forward = 0,
+ Backward
};
-template
-struct MultiArrayMemCopier {
- static bool copy(TypedMultiArray& src, TypedMultiArray& dst) {
- if (!MemCopier::supported()) {
- return false;
- }
-
- if (!src.layout.is_packed() || !dst.layout.is_packed()) {
- return false;
+void set_data_pointers(std::vector& data_pointers,
+ ssize_t index,
+ size_t dim,
+ Direction direction,
+ const std::vector& layouts) {
+ for (size_t i = 0; i < layouts.size(); ++i) {
+ const auto& layout = layouts[i];
+ const ssize_t stride = layout.strides()[dim];
+ const size_t num_bytes = layout.num_bytes();
+ ssize_t offset = 0;
+ switch (direction) {
+ case Direction::Forward: {
+ offset = stride * index * num_bytes;
+ break;
+ }
+ case Direction::Backward: {
+ offset = - stride * index * num_bytes;
+ break;
+ }
}
-
- MemCopier::copy(src.data, dst.data, src.layout.get_num_elements());
- return true;
+ data_pointers[i] = (void *)(static_cast(data_pointers[i]) + offset);
}
-};
+}
+
+void increment_data_pointers(std::vector& data_pointers,
+ size_t index,
+ size_t dim,
+ const std::vector& layouts) {
+ set_data_pointers(data_pointers, index, dim, Direction::Forward, layouts);
+}
-#pragma mark - MultiArrayIterator
-/// TODO - remove recursion and coalesce contiguous dimensions.
-template
-struct MultiArrayIterator {
- explicit MultiArrayIterator(TypedMultiArray& array1, TypedMultiArray& array2)
- :array1(array1), array2(array2)
+void decrement_data_pointers(std::vector& data_pointers,
+ size_t index,
+ size_t dim,
+ const std::vector& layouts) {
+ set_data_pointers(data_pointers, index, dim, Direction::Backward, layouts);
+}
+
+class MultiArrayIterator final {
+public:
+ explicit MultiArrayIterator(const std::vector& arrays)
+ :datas_(get_datas(arrays)),
+ layouts_(coalesce_dimensions(get_layouts(arrays)))
{}
+private:
template
- void loop(FN&& fn, T1 *data1, T2 *data2, size_t dim) {
- const size_t index = dim - 1;
- const auto& layout1 = array1.layout;
- const auto& layout2 = array2.layout;
- const ssize_t stride1 = layout1.strides()[index];
- const ssize_t stride2 = layout2.strides()[index];
- const size_t bound = layout1.shape()[index];
-
- if (index == 0) {
- for (size_t i = 0; i < bound; i++) {
- if (fn(data1 + stride1 * i, data2 + stride2 * i)) {
- break;
+ void exec(FN&& fn, const std::vector& layouts, std::vector datas, size_t n) {
+ const auto& layout = layouts.back();
+ // Avoid function call for rank <= 2.
+ switch (n) {
+ case 0: {
+ break;
+ }
+ case 1: {
+ for (size_t i = 0; i < layout.shape()[0]; ++i) {
+ ::increment_data_pointers(datas, i, 0, layouts);
+ fn(datas);
+ ::decrement_data_pointers(datas, i, 0, layouts);
+ }
+ break;
+ }
+ case 2: {
+ for (size_t i = 0; i < layout.shape()[1]; ++i) {
+ ::increment_data_pointers(datas, i, 1, layouts);
+ for (size_t j = 0; j < layout.shape()[0]; ++j) {
+ ::increment_data_pointers(datas, j, 0, layouts);
+ fn(datas);
+ ::decrement_data_pointers(datas, j, 0, layouts);
+ }
+ ::decrement_data_pointers(datas, i, 1, layouts);
+ }
+
+ break;
+ }
+
+ default: {
+ const size_t bound = layouts.back().shape()[n - 1];
+ for (size_t index = 0; index < bound; ++index) {
+ ::increment_data_pointers(datas, index, n - 1, layouts);
+ exec(std::forward(fn), layouts, datas, n - 1);
+ ::decrement_data_pointers(datas, index, n - 1, layouts);
}
}
- return;
- }
-
- for (size_t i = 0; i < bound; i++) {
- loop(fn, data1 + stride1 * i, data2 + stride2 * i, dim - 1);
}
}
+public:
template
- void loop(FN&& fn) {
- loop(fn, array1.data, array2.data, array1.layout.rank());
+ void exec(FN&& fn) {
+ std::vector datas = datas_;
+ exec(fn, layouts_, datas, layouts_[0].rank());
}
- TypedMultiArray array1;
- TypedMultiArray array2;
+private:
+ std::vector datas_;
+ std::vector layouts_;
};
+/// BNNS has no double type, so we handle the conversions here.
template
-struct MultiArrayLoopingCopier {
- static bool copy(TypedMultiArray& src, TypedMultiArray& dst) {
- auto looper = MultiArrayIterator(src, dst);
- looper.loop([](T1 *src, T2 *dst){
- *dst = static_cast(*src);
- return true;
- });
-
- return true;
- }
-};
+inline void copy_value(void *dst, const void *src) {
+ const T2 *src_ptr = static_cast(src);
+ T1 *dst_ptr = static_cast(dst);
+ *dst_ptr = static_cast(*src_ptr);
+}
-template
-struct MultiArrayCopier {
- static bool copy(TypedMultiArray& src, TypedMultiArray& dst) {
- if (src.layout.shape() != dst.layout.shape()) {
- return false;
+template
+void copy(void *dst,
+ MultiArray::DataType dst_data_type,
+ const void *src) {
+ switch (dst_data_type) {
+ case MultiArray::DataType::Bool: {
+ ::copy_value(dst, src);
+ break;
+ }
+
+ case MultiArray::DataType::Byte: {
+ ::copy_value(dst, src);
+ break;
+ }
+
+ case MultiArray::DataType::Char: {
+ ::copy_value(dst, src);
+ break;
+ }
+
+ case MultiArray::DataType::Short: {
+ ::copy_value(dst, src);
+ break;
}
-
- if (src.layout.get_num_elements() == 0) {
- return true;
+
+ case MultiArray::DataType::Int32: {
+ ::copy_value(dst, src);
+ break;
}
-
- if (MultiArrayBNNSCopier::copy(src, dst)) {
- return true;
+
+ case MultiArray::DataType::Int64: {
+ ::copy_value(dst, src);
+ break;
}
-
- if (MultiArrayVImageCopier::copy(src, dst)) {
- return true;
+
+ case MultiArray::DataType::Float16: {
+ ::copy_value<_Float16, T>(dst, src);
+ break;
}
-
- if (MultiArrayVDSPCopier::copy(src, dst)) {
- return true;
+
+ case MultiArray::DataType::Float32: {
+ ::copy_value(dst, src);
+ break;
}
-
- if (MultiArrayMemCopier::copy(src, dst)) {
- return true;
+
+ case MultiArray::DataType::Float64: {
+ ::copy_value(dst, src);
+ break;
}
-
- return MultiArrayLoopingCopier::copy(src, dst);
}
-};
+}
-template
-bool copy(TypedMultiArray& src, MultiArray& dst) {
- const auto& dstLayout = dst.layout();
- switch (dstLayout.dataType()) {
- case MultiArray::DataType::Int: {
- auto dst_array = TypedMultiArray(reinterpret_cast(dst.data()), dstLayout);
- return MultiArrayCopier::copy(src, dst_array);
+void copy(void *dst,
+ MultiArray::DataType dst_data_type,
+ const void *src,
+ MultiArray::DataType src_data_type) {
+ switch (src_data_type) {
+ case MultiArray::DataType::Bool: {
+ ::copy