From 846129f2ea355037dc9b13b4f1ac49ac0630dd28 Mon Sep 17 00:00:00 2001 From: Jacob Szwejbka Date: Wed, 15 Oct 2025 21:18:56 -0700 Subject: [PATCH 01/21] platform layer for windows and linux compatibility --- backends/cuda/runtime/cuda_backend.cpp | 39 +++++++----- backends/cuda/runtime/platform/platform.h | 76 +++++++++++++++++++++++ 2 files changed, 98 insertions(+), 17 deletions(-) create mode 100644 backends/cuda/runtime/platform/platform.h diff --git a/backends/cuda/runtime/cuda_backend.cpp b/backends/cuda/runtime/cuda_backend.cpp index 3fcd25a3d1d..5c09973ac5f 100644 --- a/backends/cuda/runtime/cuda_backend.cpp +++ b/backends/cuda/runtime/cuda_backend.cpp @@ -7,7 +7,6 @@ */ #include -#include #include #include #include @@ -23,17 +22,23 @@ // Include our shim layer headers #include #include +#include #include #include namespace executorch::backends::cuda { -#define LOAD_SYMBOL(handle, member, name, so_handle) \ - do { \ - handle->member = reinterpret_cast(dlsym(so_handle, #name)); \ - ET_CHECK_OR_RETURN_ERROR( \ - handle->member != nullptr, AccessFailed, "Failed to load " #name); \ - } while (0) +#define LOAD_SYMBOL(handle, member, name, so_handle) \ + \ + do { \ + auto symbol_res = get_function(so_handle, #name); \ + \ + if (!symbol_res.ok()) { \ + return symbol_res.error(); \ + } \ + handle->member = reinterpret_cast(symbol_res.get()); \ + } \ + while (0) using namespace std; using namespace aoti; @@ -144,24 +149,23 @@ class ET_EXPERIMENTAL CudaBackend final // Finish writing the file to disk outfile.close(); - // Load the ELF using dlopen - void* so_handle = dlopen(so_path.c_str(), RTLD_LAZY | RTLD_LOCAL); - ET_CHECK_OR_RETURN_ERROR( - so_handle != nullptr, - AccessFailed, - "Failed to load shared library: %s", - dlerror()); + // Load the lib + Result lib_handle_res = load_library(so_path); + if (!lib_handle_res.ok()) { + return lib_handle_res.error(); + } + void* lib_handle = lib_handle_res.get(); processed->Free(); // Create handle and load function pointers into it AOTIDelegateHandle* handle = new AOTIDelegateHandle(); - handle->so_handle = so_handle; + handle->so_handle = lib_handle; handle->so_path = so_path.string(); // Load function pointers specific to this handle's shared library ET_CHECK_OK_OR_RETURN_ERROR( - load_function_pointers_into_handle(so_handle, handle)); + load_function_pointers_into_handle(lib_handle, handle)); AOTInductorModelContainerHandle container_handle = nullptr; @@ -332,8 +336,9 @@ class ET_EXPERIMENTAL CudaBackend final // AOTInductorModelContainerDelete(handle->container_handle); // Now close the shared library + auto err = Error::Ok; if (handle->so_handle != nullptr) { - dlclose(handle->so_handle); + err = close_library(handle->so_handle); } // Remove the temporary shared library file diff --git a/backends/cuda/runtime/platform/platform.h b/backends/cuda/runtime/platform/platform.h new file mode 100644 index 00000000000..526b46cb261 --- /dev/null +++ b/backends/cuda/runtime/platform/platform.h @@ -0,0 +1,76 @@ + +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include +#include + +#ifdef _WIN32 +#include +#else +#include +#endif + +namespace executorch { +namespace backends { +namespace cuda { + +Result load_library(const std::string& path) { +#ifdef _WIN32 + auto lib_handle = LoadLibrary(path.c_str()); + if (hModule == NULL) { + ET_LOG(Error, "Failed to load %s with error: %lu", path.c_str(), GetLastError()); + return Error::AccessFailed; + } + +#else + void* lib_handle = dlopen(path.c_str(), RTLD_LAZY | RTLD_LOCAL); + if (so_handle == nullptr) { + ET_LOG(Error, "Failed to load %s with error: %s", path.c_str(), dlerror()); + return Error::AccessFailed; + } +#endif + return (void*) lib_handle; +} + +Error close_library(void* lib_handle) { +#ifdef _WIN32 + if (!FreeLibrary((HModule)lib_handle)) { + printf("FreeLibrary failed with error %lu\n", GetLastError()); + return Error::Internal + } +#else + if (dlclose(lib_handle) != 0) { + ET_LOG(Error, "dlclose failed: %s\n", dlerror()); + return Error::Internal; + } +#endif + return Error::Ok; +} + +Result get_function(void* lib_handle, const std::string& fn_name) { +#ifdef _WIN32 + auto fn = GetProcAddress((HModule)lib_handle, fn_name.c_str()); + if (!fn) { + ET_LOG(Error, "Failed loading symbol %s with error %lu\n", fn_name.c_str(), GetLastError()); + return Error::Internal; + } +#else + auto fn = dlsym(lib_handle, fn_name.c_str()); + if (fn == nullptr) { + ET_LOG(Error, "Failed loading symbol %s with error %s\n", fn_name.c_str(), dlerror()); + return Error::Internal; + } +#endif + + return (void*)fn; // This I think is technically ub on windows. We should probably explicitly pack the bytes. +} + +} // cuda +} // backends +} // executorch From f3568971cc5f70502257bf63331e80be7a68343f Mon Sep 17 00:00:00 2001 From: Jacob Szwejbka Date: Wed, 15 Oct 2025 21:21:54 -0700 Subject: [PATCH 02/21] lint --- backends/cuda/runtime/cuda_backend.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/backends/cuda/runtime/cuda_backend.cpp b/backends/cuda/runtime/cuda_backend.cpp index 5c09973ac5f..d4788072094 100644 --- a/backends/cuda/runtime/cuda_backend.cpp +++ b/backends/cuda/runtime/cuda_backend.cpp @@ -29,10 +29,8 @@ namespace executorch::backends::cuda { #define LOAD_SYMBOL(handle, member, name, so_handle) \ - \ do { \ auto symbol_res = get_function(so_handle, #name); \ - \ if (!symbol_res.ok()) { \ return symbol_res.error(); \ } \ From 3e9c66b9b57341add183904bcce1e51b5b294fa4 Mon Sep 17 00:00:00 2001 From: Jacob Szwejbka Date: Wed, 15 Oct 2025 21:32:11 -0700 Subject: [PATCH 03/21] abstract process id --- backends/cuda/runtime/cuda_backend.cpp | 6 +-- backends/cuda/runtime/platform/platform.h | 60 ++++++++++++++++------- 2 files changed, 44 insertions(+), 22 deletions(-) diff --git a/backends/cuda/runtime/cuda_backend.cpp b/backends/cuda/runtime/cuda_backend.cpp index d4788072094..c391297629d 100644 --- a/backends/cuda/runtime/cuda_backend.cpp +++ b/backends/cuda/runtime/cuda_backend.cpp @@ -11,7 +11,6 @@ #include #include #include -#include #include #include @@ -35,8 +34,7 @@ namespace executorch::backends::cuda { return symbol_res.error(); \ } \ handle->member = reinterpret_cast(symbol_res.get()); \ - } \ - while (0) + } while (0) using namespace std; using namespace aoti; @@ -125,7 +123,7 @@ class ET_EXPERIMENTAL CudaBackend final // Generate dynamic temporary file path filesystem::path temp_dir = filesystem::temp_directory_path(); filesystem::path so_path = - temp_dir / (so_blob_key + to_string(getpid()) + ".so"); + temp_dir / (so_blob_key + to_string(get_process_id()) + ".so"); // Create a temporary file ofstream outfile(so_path.c_str(), ios::binary); diff --git a/backends/cuda/runtime/platform/platform.h b/backends/cuda/runtime/platform/platform.h index 526b46cb261..aadd6d7173d 100644 --- a/backends/cuda/runtime/platform/platform.h +++ b/backends/cuda/runtime/platform/platform.h @@ -7,13 +7,14 @@ * LICENSE file in the root directory of this source tree. */ -#include #include +#include #ifdef _WIN32 #include #else #include +#include #endif namespace executorch { @@ -23,31 +24,35 @@ namespace cuda { Result load_library(const std::string& path) { #ifdef _WIN32 auto lib_handle = LoadLibrary(path.c_str()); - if (hModule == NULL) { - ET_LOG(Error, "Failed to load %s with error: %lu", path.c_str(), GetLastError()); - return Error::AccessFailed; + if (lib_handle == NULL) { + ET_LOG( + Error, + "Failed to load %s with error: %lu", + path.c_str(), + GetLastError()); + return Error::AccessFailed; } #else void* lib_handle = dlopen(path.c_str(), RTLD_LAZY | RTLD_LOCAL); - if (so_handle == nullptr) { + if (lib_handle == nullptr) { ET_LOG(Error, "Failed to load %s with error: %s", path.c_str(), dlerror()); return Error::AccessFailed; } #endif - return (void*) lib_handle; + return (void*)lib_handle; } Error close_library(void* lib_handle) { #ifdef _WIN32 - if (!FreeLibrary((HModule)lib_handle)) { - printf("FreeLibrary failed with error %lu\n", GetLastError()); - return Error::Internal - } + if (!FreeLibrary((HMODULE)lib_handle)) { + printf("FreeLibrary failed with error %lu\n", GetLastError()); + return Error::Internal; + } #else if (dlclose(lib_handle) != 0) { - ET_LOG(Error, "dlclose failed: %s\n", dlerror()); - return Error::Internal; + ET_LOG(Error, "dlclose failed: %s\n", dlerror()); + return Error::Internal; } #endif return Error::Ok; @@ -55,20 +60,39 @@ Error close_library(void* lib_handle) { Result get_function(void* lib_handle, const std::string& fn_name) { #ifdef _WIN32 - auto fn = GetProcAddress((HModule)lib_handle, fn_name.c_str()); + auto fn = GetProcAddress((HMODULE)lib_handle, fn_name.c_str()); if (!fn) { - ET_LOG(Error, "Failed loading symbol %s with error %lu\n", fn_name.c_str(), GetLastError()); - return Error::Internal; + ET_LOG( + Error, + "Failed loading symbol %s with error %lu\n", + fn_name.c_str(), + GetLastError()); + return Error::Internal; } #else auto fn = dlsym(lib_handle, fn_name.c_str()); if (fn == nullptr) { - ET_LOG(Error, "Failed loading symbol %s with error %s\n", fn_name.c_str(), dlerror()); - return Error::Internal; + ET_LOG( + Error, + "Failed loading symbol %s with error %s\n", + fn_name.c_str(), + dlerror()); + return Error::Internal; } #endif - return (void*)fn; // This I think is technically ub on windows. We should probably explicitly pack the bytes. + return (void*)fn; // This I think is technically ub on windows. We should + // probably explicitly pack the bytes. +} + +int32_t get_process_id(void* lib_handle) { +#ifdef _WIN32 + return GetCurrentProcessId(); +#else + return getpid(); +#endif +} + } } // cuda From d39a36eaa4157d76274f77004a10fca978746805 Mon Sep 17 00:00:00 2001 From: Jacob Szwejbka Date: Wed, 15 Oct 2025 22:07:12 -0700 Subject: [PATCH 04/21] missing import --- backends/aoti/utils.h | 1 + 1 file changed, 1 insertion(+) diff --git a/backends/aoti/utils.h b/backends/aoti/utils.h index b24fcaac864..80abe663fa2 100644 --- a/backends/aoti/utils.h +++ b/backends/aoti/utils.h @@ -15,6 +15,7 @@ #include #include #include +#include namespace executorch { namespace backends { From caf8495c337f06b2b614d3ea9c2810f4a97bfeaf Mon Sep 17 00:00:00 2001 From: Jacob Szwejbka Date: Wed, 15 Oct 2025 22:12:36 -0700 Subject: [PATCH 05/21] missing import and namespacing --- backends/cuda/runtime/platform/platform.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/backends/cuda/runtime/platform/platform.h b/backends/cuda/runtime/platform/platform.h index aadd6d7173d..9391c198535 100644 --- a/backends/cuda/runtime/platform/platform.h +++ b/backends/cuda/runtime/platform/platform.h @@ -8,6 +8,7 @@ */ #include +#include #include #ifdef _WIN32 @@ -21,7 +22,7 @@ namespace executorch { namespace backends { namespace cuda { -Result load_library(const std::string& path) { +executorch::runtime::Result load_library(const std::string& path) { #ifdef _WIN32 auto lib_handle = LoadLibrary(path.c_str()); if (lib_handle == NULL) { @@ -43,7 +44,7 @@ Result load_library(const std::string& path) { return (void*)lib_handle; } -Error close_library(void* lib_handle) { +executorch::runtime::Error close_library(void* lib_handle) { #ifdef _WIN32 if (!FreeLibrary((HMODULE)lib_handle)) { printf("FreeLibrary failed with error %lu\n", GetLastError()); @@ -58,7 +59,7 @@ Error close_library(void* lib_handle) { return Error::Ok; } -Result get_function(void* lib_handle, const std::string& fn_name) { +executorch::runtime::Result get_function(void* lib_handle, const std::string& fn_name) { #ifdef _WIN32 auto fn = GetProcAddress((HMODULE)lib_handle, fn_name.c_str()); if (!fn) { From 2fa126aaf050f4ae62c2fc732185e2e16a315f45 Mon Sep 17 00:00:00 2001 From: Jacob Szwejbka Date: Wed, 15 Oct 2025 22:15:21 -0700 Subject: [PATCH 06/21] more namespace errors --- backends/cuda/runtime/platform/platform.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/backends/cuda/runtime/platform/platform.h b/backends/cuda/runtime/platform/platform.h index 9391c198535..898110cf84f 100644 --- a/backends/cuda/runtime/platform/platform.h +++ b/backends/cuda/runtime/platform/platform.h @@ -38,7 +38,7 @@ executorch::runtime::Result load_library(const std::string& path) { void* lib_handle = dlopen(path.c_str(), RTLD_LAZY | RTLD_LOCAL); if (lib_handle == nullptr) { ET_LOG(Error, "Failed to load %s with error: %s", path.c_str(), dlerror()); - return Error::AccessFailed; + return executorch::runtime::Error::AccessFailed; } #endif return (void*)lib_handle; @@ -48,7 +48,7 @@ executorch::runtime::Error close_library(void* lib_handle) { #ifdef _WIN32 if (!FreeLibrary((HMODULE)lib_handle)) { printf("FreeLibrary failed with error %lu\n", GetLastError()); - return Error::Internal; + return executorch::runtime::Error::Internal; } #else if (dlclose(lib_handle) != 0) { @@ -56,7 +56,7 @@ executorch::runtime::Error close_library(void* lib_handle) { return Error::Internal; } #endif - return Error::Ok; + return executorch::runtime::Error::Ok; } executorch::runtime::Result get_function(void* lib_handle, const std::string& fn_name) { @@ -68,7 +68,7 @@ executorch::runtime::Result get_function(void* lib_handle, const std::str "Failed loading symbol %s with error %lu\n", fn_name.c_str(), GetLastError()); - return Error::Internal; + return executorch::runtime::Error::Internal; } #else auto fn = dlsym(lib_handle, fn_name.c_str()); From 36b67105f16258207a9a6270c8db58607ff47f5c Mon Sep 17 00:00:00 2001 From: Jacob Szwejbka Date: Wed, 15 Oct 2025 22:16:14 -0700 Subject: [PATCH 07/21] namespacing hell --- backends/cuda/runtime/platform/platform.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backends/cuda/runtime/platform/platform.h b/backends/cuda/runtime/platform/platform.h index 898110cf84f..355c6fcd7eb 100644 --- a/backends/cuda/runtime/platform/platform.h +++ b/backends/cuda/runtime/platform/platform.h @@ -31,7 +31,7 @@ executorch::runtime::Result load_library(const std::string& path) { "Failed to load %s with error: %lu", path.c_str(), GetLastError()); - return Error::AccessFailed; + return executorch::runtime::Error::AccessFailed; } #else From 4c0215c1f0d0b1ca15d918e348305376c003132a Mon Sep 17 00:00:00 2001 From: Jacob Szwejbka Date: Wed, 15 Oct 2025 22:18:19 -0700 Subject: [PATCH 08/21] erroneous bracket --- backends/cuda/runtime/platform/platform.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/backends/cuda/runtime/platform/platform.h b/backends/cuda/runtime/platform/platform.h index 355c6fcd7eb..a9d574cc365 100644 --- a/backends/cuda/runtime/platform/platform.h +++ b/backends/cuda/runtime/platform/platform.h @@ -13,7 +13,7 @@ #ifdef _WIN32 #include -#else +#else // Posix #include #include #endif @@ -94,8 +94,6 @@ int32_t get_process_id(void* lib_handle) { #endif } -} - } // cuda } // backends } // executorch From a685328b3d3ddceb49ca004f61891072890d71e9 Mon Sep 17 00:00:00 2001 From: Jacob Szwejbka Date: Wed, 15 Oct 2025 22:25:51 -0700 Subject: [PATCH 09/21] no std::string for path --- backends/cuda/runtime/platform/platform.h | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/backends/cuda/runtime/platform/platform.h b/backends/cuda/runtime/platform/platform.h index a9d574cc365..a93db08dcee 100644 --- a/backends/cuda/runtime/platform/platform.h +++ b/backends/cuda/runtime/platform/platform.h @@ -22,22 +22,22 @@ namespace executorch { namespace backends { namespace cuda { -executorch::runtime::Result load_library(const std::string& path) { +executorch::runtime::Result load_library(const char* path) { #ifdef _WIN32 - auto lib_handle = LoadLibrary(path.c_str()); + auto lib_handle = LoadLibrary(path); if (lib_handle == NULL) { ET_LOG( Error, "Failed to load %s with error: %lu", - path.c_str(), + path, GetLastError()); return executorch::runtime::Error::AccessFailed; } #else - void* lib_handle = dlopen(path.c_str(), RTLD_LAZY | RTLD_LOCAL); + void* lib_handle = dlopen(path, RTLD_LAZY | RTLD_LOCAL); if (lib_handle == nullptr) { - ET_LOG(Error, "Failed to load %s with error: %s", path.c_str(), dlerror()); + ET_LOG(Error, "Failed to load %s with error: %s", path, dlerror()); return executorch::runtime::Error::AccessFailed; } #endif @@ -59,7 +59,9 @@ executorch::runtime::Error close_library(void* lib_handle) { return executorch::runtime::Error::Ok; } -executorch::runtime::Result get_function(void* lib_handle, const std::string& fn_name) { +executorch::runtime::Result get_function( + void* lib_handle, + const std::string& fn_name) { #ifdef _WIN32 auto fn = GetProcAddress((HMODULE)lib_handle, fn_name.c_str()); if (!fn) { @@ -86,7 +88,7 @@ executorch::runtime::Result get_function(void* lib_handle, const std::str // probably explicitly pack the bytes. } -int32_t get_process_id(void* lib_handle) { +int32_t get_process_id() { #ifdef _WIN32 return GetCurrentProcessId(); #else @@ -94,6 +96,6 @@ int32_t get_process_id(void* lib_handle) { #endif } -} // cuda -} // backends -} // executorch +} // namespace cuda +} // namespace backends +} // namespace executorch From 2469b441e340358e9f1fd7118dbc80d49ddfed74 Mon Sep 17 00:00:00 2001 From: Jacob Szwejbka Date: Wed, 15 Oct 2025 22:35:27 -0700 Subject: [PATCH 10/21] remove posix_memalign --- backends/cuda/runtime/cuda_backend.cpp | 2 +- backends/cuda/runtime/platform/platform.h | 24 ++++++++++++++++++----- backends/cuda/runtime/shims/memory.cpp | 11 +++-------- 3 files changed, 23 insertions(+), 14 deletions(-) diff --git a/backends/cuda/runtime/cuda_backend.cpp b/backends/cuda/runtime/cuda_backend.cpp index c391297629d..b677ef459fe 100644 --- a/backends/cuda/runtime/cuda_backend.cpp +++ b/backends/cuda/runtime/cuda_backend.cpp @@ -146,7 +146,7 @@ class ET_EXPERIMENTAL CudaBackend final outfile.close(); // Load the lib - Result lib_handle_res = load_library(so_path); + Result lib_handle_res = load_library(so_path.c_str()); if (!lib_handle_res.ok()) { return lib_handle_res.error(); } diff --git a/backends/cuda/runtime/platform/platform.h b/backends/cuda/runtime/platform/platform.h index a93db08dcee..8e4890677a7 100644 --- a/backends/cuda/runtime/platform/platform.h +++ b/backends/cuda/runtime/platform/platform.h @@ -12,10 +12,12 @@ #include #ifdef _WIN32 +#include #include #else // Posix #include #include +#include #endif namespace executorch { @@ -26,11 +28,7 @@ executorch::runtime::Result load_library(const char* path) { #ifdef _WIN32 auto lib_handle = LoadLibrary(path); if (lib_handle == NULL) { - ET_LOG( - Error, - "Failed to load %s with error: %lu", - path, - GetLastError()); + ET_LOG(Error, "Failed to load %s with error: %lu", path, GetLastError()); return executorch::runtime::Error::AccessFailed; } @@ -96,6 +94,22 @@ int32_t get_process_id() { #endif } +void* aligned_alloc(size_t alignment, size_t size) { +#ifdef _WIN32 + return _aligned_malloc(size, alignment); +#else + return std::aligned_alloc(alignment, size); +#endif +} + +void aligned_free(void* ptr) { +#ifdef _WIN32 + _aligned_free(ptr); +#else + std::free(ptr); +#endif +} + } // namespace cuda } // namespace backends } // namespace executorch diff --git a/backends/cuda/runtime/shims/memory.cpp b/backends/cuda/runtime/shims/memory.cpp index fe8ccf07281..811db422475 100644 --- a/backends/cuda/runtime/shims/memory.cpp +++ b/backends/cuda/runtime/shims/memory.cpp @@ -13,7 +13,6 @@ #include #include #include -#include // For posix_memalign #include #include #include @@ -230,15 +229,11 @@ AOTITorchError aoti_torch_empty_strided( cudaMallocAsync(&ptr, static_cast(nbytes), cudaStreamDefault)); } else if (device_type == static_cast(SupportedDevices::CPU)) { // Ensure 16-byte alignment for CPU memory to match CUDA requirements - int result = posix_memalign(&ptr, 16, nbytes); - ET_CHECK_OR_RETURN_ERROR( - result == 0, - MemoryAllocationFailed, - "Failed to allocate aligned CPU memory"); + &ptr = aligned_alloc(16, nbytes); ET_CHECK_OR_RETURN_ERROR( ptr != nullptr, MemoryAllocationFailed, - "Failed to call posix_memalign"); + "Failed to allocate aligned CPU memory"); } else { ET_CHECK_OR_RETURN_ERROR( false, @@ -339,7 +334,7 @@ AOTITorchError aoti_torch_delete_tensor_object(Tensor* tensor) { Internal, "Expected host memory but got managed!") // This is CPU memory - free immediately - free(data_ptr); + aligned_free(data_ptr); data_ptr = nullptr; } From 8610ba36206855b6b12ac53be6ff8bc909137b1e Mon Sep 17 00:00:00 2001 From: Jacob Szwejbka Date: Wed, 15 Oct 2025 22:44:35 -0700 Subject: [PATCH 11/21] more path iteration --- backends/cuda/runtime/cuda_backend.cpp | 4 ++-- backends/cuda/runtime/platform/platform.h | 10 ++++++---- backends/cuda/runtime/shims/memory.cpp | 1 + 3 files changed, 9 insertions(+), 6 deletions(-) diff --git a/backends/cuda/runtime/cuda_backend.cpp b/backends/cuda/runtime/cuda_backend.cpp index b677ef459fe..e61b03ee8e6 100644 --- a/backends/cuda/runtime/cuda_backend.cpp +++ b/backends/cuda/runtime/cuda_backend.cpp @@ -126,7 +126,7 @@ class ET_EXPERIMENTAL CudaBackend final temp_dir / (so_blob_key + to_string(get_process_id()) + ".so"); // Create a temporary file - ofstream outfile(so_path.c_str(), ios::binary); + ofstream outfile(so_path, ios::binary); // Write the ELF buffer to the temporary file ET_LOG( @@ -146,7 +146,7 @@ class ET_EXPERIMENTAL CudaBackend final outfile.close(); // Load the lib - Result lib_handle_res = load_library(so_path.c_str()); + Result lib_handle_res = load_library(so_path); if (!lib_handle_res.ok()) { return lib_handle_res.error(); } diff --git a/backends/cuda/runtime/platform/platform.h b/backends/cuda/runtime/platform/platform.h index 8e4890677a7..a5e9ce333a4 100644 --- a/backends/cuda/runtime/platform/platform.h +++ b/backends/cuda/runtime/platform/platform.h @@ -24,18 +24,20 @@ namespace executorch { namespace backends { namespace cuda { -executorch::runtime::Result load_library(const char* path) { +executorch::runtime::Result load_library( + const std::filesystem::path& path) { #ifdef _WIN32 - auto lib_handle = LoadLibrary(path); + std::string utf8 = p.u8string(); + auto lib_handle = LoadLibrary(utf8.c_str()); if (lib_handle == NULL) { - ET_LOG(Error, "Failed to load %s with error: %lu", path, GetLastError()); + ET_LOG(Error, "Failed to load %s with error: %lu", utf8, GetLastError()); return executorch::runtime::Error::AccessFailed; } #else void* lib_handle = dlopen(path, RTLD_LAZY | RTLD_LOCAL); if (lib_handle == nullptr) { - ET_LOG(Error, "Failed to load %s with error: %s", path, dlerror()); + ET_LOG(Error, "Failed to load %s with error: %s", path.c_str(), dlerror()); return executorch::runtime::Error::AccessFailed; } #endif diff --git a/backends/cuda/runtime/shims/memory.cpp b/backends/cuda/runtime/shims/memory.cpp index 811db422475..5fbb9e92f67 100644 --- a/backends/cuda/runtime/shims/memory.cpp +++ b/backends/cuda/runtime/shims/memory.cpp @@ -8,6 +8,7 @@ #include #include +#include #include #include #include From c30c33b3c486d6047dcf2ab509189c562a2778fc Mon Sep 17 00:00:00 2001 From: Jacob Szwejbka Date: Wed, 15 Oct 2025 22:50:07 -0700 Subject: [PATCH 12/21] missing include --- backends/cuda/runtime/platform/platform.h | 1 + 1 file changed, 1 insertion(+) diff --git a/backends/cuda/runtime/platform/platform.h b/backends/cuda/runtime/platform/platform.h index a5e9ce333a4..fc01beecb34 100644 --- a/backends/cuda/runtime/platform/platform.h +++ b/backends/cuda/runtime/platform/platform.h @@ -10,6 +10,7 @@ #include #include #include +#include #ifdef _WIN32 #include From 917361a1ebf8588b2fb4d52772b2e26f9bab22a2 Mon Sep 17 00:00:00 2001 From: Jacob Szwejbka Date: Wed, 15 Oct 2025 22:51:16 -0700 Subject: [PATCH 13/21] drop reference --- backends/cuda/runtime/platform/platform.h | 2 +- backends/cuda/runtime/shims/memory.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/backends/cuda/runtime/platform/platform.h b/backends/cuda/runtime/platform/platform.h index fc01beecb34..9c05afa5aac 100644 --- a/backends/cuda/runtime/platform/platform.h +++ b/backends/cuda/runtime/platform/platform.h @@ -28,7 +28,7 @@ namespace cuda { executorch::runtime::Result load_library( const std::filesystem::path& path) { #ifdef _WIN32 - std::string utf8 = p.u8string(); + std::string utf8 = path.u8string(); auto lib_handle = LoadLibrary(utf8.c_str()); if (lib_handle == NULL) { ET_LOG(Error, "Failed to load %s with error: %lu", utf8, GetLastError()); diff --git a/backends/cuda/runtime/shims/memory.cpp b/backends/cuda/runtime/shims/memory.cpp index 5fbb9e92f67..5d30d3124d9 100644 --- a/backends/cuda/runtime/shims/memory.cpp +++ b/backends/cuda/runtime/shims/memory.cpp @@ -230,7 +230,7 @@ AOTITorchError aoti_torch_empty_strided( cudaMallocAsync(&ptr, static_cast(nbytes), cudaStreamDefault)); } else if (device_type == static_cast(SupportedDevices::CPU)) { // Ensure 16-byte alignment for CPU memory to match CUDA requirements - &ptr = aligned_alloc(16, nbytes); + ptr = aligned_alloc(16, nbytes); ET_CHECK_OR_RETURN_ERROR( ptr != nullptr, MemoryAllocationFailed, From 45039a469b1dfbfe61abafeb035525f358c598bd Mon Sep 17 00:00:00 2001 From: Jacob Szwejbka Date: Wed, 15 Oct 2025 23:24:05 -0700 Subject: [PATCH 14/21] lint --- backends/cuda/runtime/platform/platform.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backends/cuda/runtime/platform/platform.h b/backends/cuda/runtime/platform/platform.h index 9c05afa5aac..e29bed36cf8 100644 --- a/backends/cuda/runtime/platform/platform.h +++ b/backends/cuda/runtime/platform/platform.h @@ -9,8 +9,8 @@ #include #include -#include #include +#include #ifdef _WIN32 #include From 25ce94e56f0f8ddc3d98ff79c9ae3757d0765e92 Mon Sep 17 00:00:00 2001 From: Jacob Szwejbka Date: Thu, 16 Oct 2025 10:00:26 -0700 Subject: [PATCH 15/21] linux fix --- backends/cuda/runtime/platform/platform.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/backends/cuda/runtime/platform/platform.h b/backends/cuda/runtime/platform/platform.h index e29bed36cf8..b479d69d4ef 100644 --- a/backends/cuda/runtime/platform/platform.h +++ b/backends/cuda/runtime/platform/platform.h @@ -36,7 +36,7 @@ executorch::runtime::Result load_library( } #else - void* lib_handle = dlopen(path, RTLD_LAZY | RTLD_LOCAL); + void* lib_handle = dlopen(path.c_str(), RTLD_LAZY | RTLD_LOCAL); if (lib_handle == nullptr) { ET_LOG(Error, "Failed to load %s with error: %s", path.c_str(), dlerror()); return executorch::runtime::Error::AccessFailed; @@ -54,7 +54,7 @@ executorch::runtime::Error close_library(void* lib_handle) { #else if (dlclose(lib_handle) != 0) { ET_LOG(Error, "dlclose failed: %s\n", dlerror()); - return Error::Internal; + return executorch::runtime::Error::Internal; } #endif return executorch::runtime::Error::Ok; From 7d701a36cf09474c6a44d9179aa05322ea574519 Mon Sep 17 00:00:00 2001 From: Jacob Szwejbka Date: Thu, 16 Oct 2025 10:39:22 -0700 Subject: [PATCH 16/21] more linux fix --- backends/cuda/runtime/platform/platform.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backends/cuda/runtime/platform/platform.h b/backends/cuda/runtime/platform/platform.h index b479d69d4ef..5bb91a67879 100644 --- a/backends/cuda/runtime/platform/platform.h +++ b/backends/cuda/runtime/platform/platform.h @@ -81,7 +81,7 @@ executorch::runtime::Result get_function( "Failed loading symbol %s with error %s\n", fn_name.c_str(), dlerror()); - return Error::Internal; + return executorch::runtime::Error::Internal; } #endif From 7af462377893aa64f95bb97a80ddd30ab2c8b2b1 Mon Sep 17 00:00:00 2001 From: Jacob Szwejbka Date: Thu, 16 Oct 2025 11:45:11 -0700 Subject: [PATCH 17/21] forgot pragma once --- backends/cuda/runtime/platform/platform.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/backends/cuda/runtime/platform/platform.h b/backends/cuda/runtime/platform/platform.h index 5bb91a67879..e6edec8bfe6 100644 --- a/backends/cuda/runtime/platform/platform.h +++ b/backends/cuda/runtime/platform/platform.h @@ -7,6 +7,8 @@ * LICENSE file in the root directory of this source tree. */ +#pragma once + #include #include #include From 362a5c0b1d506a8c1d2db48b843aeb6ee110d58f Mon Sep 17 00:00:00 2001 From: Jacob Szwejbka Date: Thu, 16 Oct 2025 12:46:53 -0700 Subject: [PATCH 18/21] ok give up on header only --- backends/cuda/CMakeLists.txt | 1 + backends/cuda/runtime/platform/platform.cpp | 120 ++++++++++++++++++++ backends/cuda/runtime/platform/platform.h | 94 +-------------- extension/module/module.cpp | 75 +++++++++--- 4 files changed, 187 insertions(+), 103 deletions(-) create mode 100644 backends/cuda/runtime/platform/platform.cpp diff --git a/backends/cuda/CMakeLists.txt b/backends/cuda/CMakeLists.txt index 8285fc0d582..d790051eb0c 100644 --- a/backends/cuda/CMakeLists.txt +++ b/backends/cuda/CMakeLists.txt @@ -39,6 +39,7 @@ set(_aoti_cuda_sources runtime/cuda_backend.cpp runtime/shims/memory.cpp runtime/shims/tensor_attribute.cpp runtime/guard.cpp runtime/shims/cuda_guard.cpp runtime/shims/int4mm.cu + runtime/platform/platform.cpp ) add_library(aoti_cuda STATIC ${_aoti_cuda_sources}) target_include_directories( diff --git a/backends/cuda/runtime/platform/platform.cpp b/backends/cuda/runtime/platform/platform.cpp new file mode 100644 index 00000000000..14a3ff2081f --- /dev/null +++ b/backends/cuda/runtime/platform/platform.cpp @@ -0,0 +1,120 @@ + +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include +#include +#include +#include +#include + +#ifdef _WIN32 +#include +#include +#else // Posix +#include +#include +#include +#endif + +namespace executorch { +namespace backends { +namespace cuda { + +executorch::runtime::Result load_library( + const std::filesystem::path& path) { +#ifdef _WIN32 + std::string utf8 = path.u8string(); + auto lib_handle = LoadLibrary(utf8.c_str()); + if (lib_handle == NULL) { + ET_LOG(Error, "Failed to load %s with error: %lu", utf8.c_str(), GetLastError()); + return executorch::runtime::Error::AccessFailed; + } + +#else + std::string path_str = path.string(); + void* lib_handle = dlopen(path_str.c_str(), RTLD_LAZY | RTLD_LOCAL); + if (lib_handle == nullptr) { + ET_LOG(Error, "Failed to load %s with error: %s", path_str.c_str(), dlerror()); + return executorch::runtime::Error::AccessFailed; + } +#endif + return (void*)lib_handle; +} + +executorch::runtime::Error close_library(void* lib_handle) { +#ifdef _WIN32 + if (!FreeLibrary((HMODULE)lib_handle)) { + printf("FreeLibrary failed with error %lu\n", GetLastError()); + return executorch::runtime::Error::Internal; + } +#else + if (dlclose(lib_handle) != 0) { + ET_LOG(Error, "dlclose failed: %s\n", dlerror()); + return executorch::runtime::Error::Internal; + } +#endif + return executorch::runtime::Error::Ok; +} + +executorch::runtime::Result get_function( + void* lib_handle, + const std::string& fn_name) { +#ifdef _WIN32 + auto fn = GetProcAddress((HMODULE)lib_handle, fn_name.c_str()); + if (!fn) { + ET_LOG( + Error, + "Failed loading symbol %s with error %lu\n", + fn_name.c_str(), + GetLastError()); + return executorch::runtime::Error::Internal; + } +#else + auto fn = dlsym(lib_handle, fn_name.c_str()); + if (fn == nullptr) { + ET_LOG( + Error, + "Failed loading symbol %s with error %s\n", + fn_name.c_str(), + dlerror()); + return executorch::runtime::Error::Internal; + } +#endif + + return (void*)fn; // This I think is technically ub on windows. We should + // probably explicitly pack the bytes. +} + +int32_t get_process_id() { +#ifdef _WIN32 + return GetCurrentProcessId(); +#else + return getpid(); +#endif +} + +void* aligned_alloc(size_t alignment, size_t size) { +#ifdef _WIN32 + return _aligned_malloc(size, alignment); +#else + return std::aligned_alloc(alignment, size); +#endif +} + +void aligned_free(void* ptr) { +#ifdef _WIN32 + _aligned_free(ptr); +#else + std::free(ptr); +#endif +} + +} // namespace cuda +} // namespace backends +} // namespace executorch diff --git a/backends/cuda/runtime/platform/platform.h b/backends/cuda/runtime/platform/platform.h index e6edec8bfe6..00f278ef85e 100644 --- a/backends/cuda/runtime/platform/platform.h +++ b/backends/cuda/runtime/platform/platform.h @@ -14,106 +14,24 @@ #include #include -#ifdef _WIN32 -#include -#include -#else // Posix -#include -#include -#include -#endif - namespace executorch { namespace backends { namespace cuda { executorch::runtime::Result load_library( - const std::filesystem::path& path) { -#ifdef _WIN32 - std::string utf8 = path.u8string(); - auto lib_handle = LoadLibrary(utf8.c_str()); - if (lib_handle == NULL) { - ET_LOG(Error, "Failed to load %s with error: %lu", utf8, GetLastError()); - return executorch::runtime::Error::AccessFailed; - } - -#else - void* lib_handle = dlopen(path.c_str(), RTLD_LAZY | RTLD_LOCAL); - if (lib_handle == nullptr) { - ET_LOG(Error, "Failed to load %s with error: %s", path.c_str(), dlerror()); - return executorch::runtime::Error::AccessFailed; - } -#endif - return (void*)lib_handle; -} + const std::filesystem::path& path); -executorch::runtime::Error close_library(void* lib_handle) { -#ifdef _WIN32 - if (!FreeLibrary((HMODULE)lib_handle)) { - printf("FreeLibrary failed with error %lu\n", GetLastError()); - return executorch::runtime::Error::Internal; - } -#else - if (dlclose(lib_handle) != 0) { - ET_LOG(Error, "dlclose failed: %s\n", dlerror()); - return executorch::runtime::Error::Internal; - } -#endif - return executorch::runtime::Error::Ok; -} +executorch::runtime::Error close_library(void* lib_handle); executorch::runtime::Result get_function( void* lib_handle, - const std::string& fn_name) { -#ifdef _WIN32 - auto fn = GetProcAddress((HMODULE)lib_handle, fn_name.c_str()); - if (!fn) { - ET_LOG( - Error, - "Failed loading symbol %s with error %lu\n", - fn_name.c_str(), - GetLastError()); - return executorch::runtime::Error::Internal; - } -#else - auto fn = dlsym(lib_handle, fn_name.c_str()); - if (fn == nullptr) { - ET_LOG( - Error, - "Failed loading symbol %s with error %s\n", - fn_name.c_str(), - dlerror()); - return executorch::runtime::Error::Internal; - } -#endif - - return (void*)fn; // This I think is technically ub on windows. We should - // probably explicitly pack the bytes. -} + const std::string& fn_name); -int32_t get_process_id() { -#ifdef _WIN32 - return GetCurrentProcessId(); -#else - return getpid(); -#endif -} +int32_t get_process_id(); -void* aligned_alloc(size_t alignment, size_t size) { -#ifdef _WIN32 - return _aligned_malloc(size, alignment); -#else - return std::aligned_alloc(alignment, size); -#endif -} +void* aligned_alloc(size_t alignment, size_t size); -void aligned_free(void* ptr) { -#ifdef _WIN32 - _aligned_free(ptr); -#else - std::free(ptr); -#endif -} +void aligned_free(void* ptr); } // namespace cuda } // namespace backends diff --git a/extension/module/module.cpp b/extension/module/module.cpp index 9de77bcbc79..c4517c24a3b 100644 --- a/extension/module/module.cpp +++ b/extension/module/module.cpp @@ -50,19 +50,41 @@ runtime::Result> make_data_loader( std::unique_ptr data_loader; switch (mode) { case Module::LoadMode::File: - data_loader = ET_UNWRAP_UNIQUE(FileDataLoader::from(file_path.c_str())); + auto res = FileDataLoader::from(file_path.c_str()); + if (!res.ok()) { + return res.error(); + } + data_loader = std::make_unique>( + std::move(*res)); break; case Module::LoadMode::Mmap: - data_loader = ET_UNWRAP_UNIQUE(MmapDataLoader::from( - file_path.c_str(), MmapDataLoader::MlockConfig::NoMlock)); + auto res_mmap = MmapDataLoader::from( + file_path.c_str(), MmapDataLoader::MlockConfig::NoMlock); + if (!res_mmap.ok()) { + return res_mmap.error(); + } + data_loader = + std::make_unique>( + std::move(*res_mmap)); break; case Module::LoadMode::MmapUseMlock: - data_loader = ET_UNWRAP_UNIQUE(MmapDataLoader::from(file_path.c_str())); + auto res_mlock = MmapDataLoader::from(file_path.c_str()); + if (!res_mlock.ok()) { + return res_mlock.error(); + } + data_loader = + std::make_unique>( + std::move(*res_mlock)); break; case Module::LoadMode::MmapUseMlockIgnoreErrors: - data_loader = ET_UNWRAP_UNIQUE(MmapDataLoader::from( - file_path.c_str(), - MmapDataLoader::MlockConfig::UseMlockIgnoreErrors)); + auto res_mlock_ignore = MmapDataLoader::from( + file_path.c_str(), MmapDataLoader::MlockConfig::UseMlockIgnoreErrors); + if (!res_mlock_ignore.ok()) { + return res_mlock_ignore.error(); + } + data_loader = std::make_unique< + std::remove_reference_t>( + std::move(*res_mlock_ignore)); break; } return data_loader; @@ -165,8 +187,15 @@ runtime::Error Module::load(const Program::Verification verification) { if (data_map_loaders_.size() > 0) { for (auto i = 0; i < data_map_loaders_.size(); ++i) { - named_data_maps_.push_back(ET_UNWRAP_UNIQUE( - FlatTensorDataMap::load(data_map_loaders_[i].get()))); + auto res_flat_tensor = + FlatTensorDataMap::load(data_map_loaders_[i].get()); + if (!res_flat_tensor.ok()) { + return res_flat_tensor.error(); + } + named_data_maps_.push_back( + std::make_unique< + std::remove_reference_t>( + std::move(*res_flat_tensor))); } // Extract raw pointers from unique_ptrs to pass to MergedDataMap::load() @@ -175,13 +204,23 @@ runtime::Error Module::load(const Program::Verification verification) { for (const auto& data_map : named_data_maps_) { raw_data_maps.push_back(data_map.get()); } - merged_data_map_ = ET_UNWRAP_UNIQUE( - MergedDataMap::load(runtime::Span( - raw_data_maps.data(), raw_data_maps.size()))); + auto res_merged = MergedDataMap::load(runtime::Span( + raw_data_maps.data(), raw_data_maps.size())); + if (!res_merged.ok()) { + return res_merged.error(); + } + merged_data_map_ = + std::make_unique>( + std::move(*res_merged)); } + auto res_program = Program::load(data_loader_.get(), verification); + if (!res_program.ok()) { + return res_program.error(); + } auto program = - ET_UNWRAP_UNIQUE(Program::load(data_loader_.get(), verification)); + std::make_unique>( + std::move(*res_program)); program_ = std::shared_ptr( program.release(), [](Program* pointer) { delete pointer; }); } @@ -237,11 +276,17 @@ runtime::Error Module::load_method( } method_holder.memory_manager = std::make_unique( memory_allocator_.get(), planned_memory, temp_allocator_.get()); - method_holder.method = ET_UNWRAP_UNIQUE(program_->load_method( + auto res_method = program_->load_method( method_name.c_str(), method_holder.memory_manager.get(), event_tracer ? event_tracer : this->event_tracer(), - merged_data_map_.get())); + merged_data_map_.get()); + if (!res_method.ok()) { + return res_method.error(); + } + method_holder.method = + std::make_unique>( + std::move(*res_method)); methods_.emplace(method_name, std::move(method_holder)); } return runtime::Error::Ok; From 6c0a0a5d2e8708cb301ca04c671da2c241334517 Mon Sep 17 00:00:00 2001 From: Jacob Szwejbka Date: Thu, 16 Oct 2025 12:56:33 -0700 Subject: [PATCH 19/21] issue with history lead to module change being included --- extension/module/module.cpp | 75 ++++++++----------------------------- 1 file changed, 15 insertions(+), 60 deletions(-) diff --git a/extension/module/module.cpp b/extension/module/module.cpp index c4517c24a3b..9de77bcbc79 100644 --- a/extension/module/module.cpp +++ b/extension/module/module.cpp @@ -50,41 +50,19 @@ runtime::Result> make_data_loader( std::unique_ptr data_loader; switch (mode) { case Module::LoadMode::File: - auto res = FileDataLoader::from(file_path.c_str()); - if (!res.ok()) { - return res.error(); - } - data_loader = std::make_unique>( - std::move(*res)); + data_loader = ET_UNWRAP_UNIQUE(FileDataLoader::from(file_path.c_str())); break; case Module::LoadMode::Mmap: - auto res_mmap = MmapDataLoader::from( - file_path.c_str(), MmapDataLoader::MlockConfig::NoMlock); - if (!res_mmap.ok()) { - return res_mmap.error(); - } - data_loader = - std::make_unique>( - std::move(*res_mmap)); + data_loader = ET_UNWRAP_UNIQUE(MmapDataLoader::from( + file_path.c_str(), MmapDataLoader::MlockConfig::NoMlock)); break; case Module::LoadMode::MmapUseMlock: - auto res_mlock = MmapDataLoader::from(file_path.c_str()); - if (!res_mlock.ok()) { - return res_mlock.error(); - } - data_loader = - std::make_unique>( - std::move(*res_mlock)); + data_loader = ET_UNWRAP_UNIQUE(MmapDataLoader::from(file_path.c_str())); break; case Module::LoadMode::MmapUseMlockIgnoreErrors: - auto res_mlock_ignore = MmapDataLoader::from( - file_path.c_str(), MmapDataLoader::MlockConfig::UseMlockIgnoreErrors); - if (!res_mlock_ignore.ok()) { - return res_mlock_ignore.error(); - } - data_loader = std::make_unique< - std::remove_reference_t>( - std::move(*res_mlock_ignore)); + data_loader = ET_UNWRAP_UNIQUE(MmapDataLoader::from( + file_path.c_str(), + MmapDataLoader::MlockConfig::UseMlockIgnoreErrors)); break; } return data_loader; @@ -187,15 +165,8 @@ runtime::Error Module::load(const Program::Verification verification) { if (data_map_loaders_.size() > 0) { for (auto i = 0; i < data_map_loaders_.size(); ++i) { - auto res_flat_tensor = - FlatTensorDataMap::load(data_map_loaders_[i].get()); - if (!res_flat_tensor.ok()) { - return res_flat_tensor.error(); - } - named_data_maps_.push_back( - std::make_unique< - std::remove_reference_t>( - std::move(*res_flat_tensor))); + named_data_maps_.push_back(ET_UNWRAP_UNIQUE( + FlatTensorDataMap::load(data_map_loaders_[i].get()))); } // Extract raw pointers from unique_ptrs to pass to MergedDataMap::load() @@ -204,23 +175,13 @@ runtime::Error Module::load(const Program::Verification verification) { for (const auto& data_map : named_data_maps_) { raw_data_maps.push_back(data_map.get()); } - auto res_merged = MergedDataMap::load(runtime::Span( - raw_data_maps.data(), raw_data_maps.size())); - if (!res_merged.ok()) { - return res_merged.error(); - } - merged_data_map_ = - std::make_unique>( - std::move(*res_merged)); + merged_data_map_ = ET_UNWRAP_UNIQUE( + MergedDataMap::load(runtime::Span( + raw_data_maps.data(), raw_data_maps.size()))); } - auto res_program = Program::load(data_loader_.get(), verification); - if (!res_program.ok()) { - return res_program.error(); - } auto program = - std::make_unique>( - std::move(*res_program)); + ET_UNWRAP_UNIQUE(Program::load(data_loader_.get(), verification)); program_ = std::shared_ptr( program.release(), [](Program* pointer) { delete pointer; }); } @@ -276,17 +237,11 @@ runtime::Error Module::load_method( } method_holder.memory_manager = std::make_unique( memory_allocator_.get(), planned_memory, temp_allocator_.get()); - auto res_method = program_->load_method( + method_holder.method = ET_UNWRAP_UNIQUE(program_->load_method( method_name.c_str(), method_holder.memory_manager.get(), event_tracer ? event_tracer : this->event_tracer(), - merged_data_map_.get()); - if (!res_method.ok()) { - return res_method.error(); - } - method_holder.method = - std::make_unique>( - std::move(*res_method)); + merged_data_map_.get())); methods_.emplace(method_name, std::move(method_holder)); } return runtime::Error::Ok; From 20b92d57f3e865e9bfd92d09b2496e67e2426e1b Mon Sep 17 00:00:00 2001 From: Jacob Szwejbka Date: Thu, 16 Oct 2025 14:01:02 -0700 Subject: [PATCH 20/21] lint --- backends/cuda/runtime/platform/platform.cpp | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/backends/cuda/runtime/platform/platform.cpp b/backends/cuda/runtime/platform/platform.cpp index 14a3ff2081f..5264dcbd03a 100644 --- a/backends/cuda/runtime/platform/platform.cpp +++ b/backends/cuda/runtime/platform/platform.cpp @@ -32,7 +32,11 @@ executorch::runtime::Result load_library( std::string utf8 = path.u8string(); auto lib_handle = LoadLibrary(utf8.c_str()); if (lib_handle == NULL) { - ET_LOG(Error, "Failed to load %s with error: %lu", utf8.c_str(), GetLastError()); + ET_LOG( + Error, + "Failed to load %s with error: %lu", + utf8.c_str(), + GetLastError()); return executorch::runtime::Error::AccessFailed; } @@ -40,7 +44,8 @@ executorch::runtime::Result load_library( std::string path_str = path.string(); void* lib_handle = dlopen(path_str.c_str(), RTLD_LAZY | RTLD_LOCAL); if (lib_handle == nullptr) { - ET_LOG(Error, "Failed to load %s with error: %s", path_str.c_str(), dlerror()); + ET_LOG( + Error, "Failed to load %s with error: %s", path_str.c_str(), dlerror()); return executorch::runtime::Error::AccessFailed; } #endif From 56734d28d2b4bcd165da7c3121c83b5de5bcd752 Mon Sep 17 00:00:00 2001 From: Jacob Szwejbka Date: Thu, 16 Oct 2025 15:05:42 -0700 Subject: [PATCH 21/21] cmake lint --- backends/cuda/CMakeLists.txt | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/backends/cuda/CMakeLists.txt b/backends/cuda/CMakeLists.txt index d790051eb0c..8b94351d469 100644 --- a/backends/cuda/CMakeLists.txt +++ b/backends/cuda/CMakeLists.txt @@ -36,9 +36,12 @@ find_package_torch() # CUDA-specific AOTI functionality set(_aoti_cuda_sources - runtime/cuda_backend.cpp runtime/shims/memory.cpp - runtime/shims/tensor_attribute.cpp runtime/guard.cpp - runtime/shims/cuda_guard.cpp runtime/shims/int4mm.cu + runtime/cuda_backend.cpp + runtime/shims/memory.cpp + runtime/shims/tensor_attribute.cpp + runtime/guard.cpp + runtime/shims/cuda_guard.cpp + runtime/shims/int4mm.cu runtime/platform/platform.cpp ) add_library(aoti_cuda STATIC ${_aoti_cuda_sources})