diff --git a/onnxruntime/core/framework/tensorprotoutils.cc b/onnxruntime/core/framework/tensorprotoutils.cc
index e0b31c29a054b..961012536126b 100644
--- a/onnxruntime/core/framework/tensorprotoutils.cc
+++ b/onnxruntime/core/framework/tensorprotoutils.cc
@@ -329,7 +329,8 @@ Status TensorProtoWithExternalDataToTensorProto(
}
Status ValidateExternalDataPath(const std::filesystem::path& base_dir,
- const std::filesystem::path& location) {
+ const std::filesystem::path& location,
+ const std::filesystem::path& model_path) {
// Reject absolute paths
ORT_RETURN_IF(location.is_absolute(),
"Absolute paths not allowed for external data location");
@@ -337,14 +338,39 @@ Status ValidateExternalDataPath(const std::filesystem::path& base_dir,
// Resolve and verify the path stays within model directory
auto base_canonical = std::filesystem::weakly_canonical(base_dir);
// If the symlink exists, it resolves to the target path;
- // so if the symllink is outside the directory it would be caught here.
+ // so if the symlink is outside the directory it would be caught here.
auto resolved = std::filesystem::weakly_canonical(base_dir / location);
+
// Check that resolved path starts with base directory
auto [base_end, resolved_it] = std::mismatch(
base_canonical.begin(), base_canonical.end(),
resolved.begin(), resolved.end());
- ORT_RETURN_IF(base_end != base_canonical.end(),
- "External data path: ", location, " escapes model directory: ", base_dir);
+
+ if (base_end != base_canonical.end()) {
+ // If validation against logical base_dir fails, we check against the
+ // real (canonical) path of the model file to support symlinked models
+ // (e.g. models in Hugging Face Hub local cache).
+ if (!model_path.empty()) {
+ auto real_model_dir = std::filesystem::weakly_canonical(model_path).parent_path();
+
+ auto [real_base_end, real_resolved_it] = std::mismatch(
+ real_model_dir.begin(), real_model_dir.end(),
+ resolved.begin(), resolved.end());
+
+ if (real_base_end == real_model_dir.end()) {
+ return Status::OK();
+ }
+
+ return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL,
+ "External data path: ", location, " (resolved path: ", resolved,
+ ") escapes both model directory: ", base_dir,
+ " and real model directory: ", real_model_dir);
+ }
+
+ return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL,
+ "External data path: ", location, " (resolved path: ", resolved,
+ ") escapes model directory: ", base_dir);
+ }
}
return Status::OK();
}
diff --git a/onnxruntime/core/framework/tensorprotoutils.h b/onnxruntime/core/framework/tensorprotoutils.h
index 685fa65a73720..941cd9af34b61 100644
--- a/onnxruntime/core/framework/tensorprotoutils.h
+++ b/onnxruntime/core/framework/tensorprotoutils.h
@@ -526,16 +526,19 @@ Status TensorProtoWithExternalDataToTensorProto(
ONNX_NAMESPACE::TensorProto& new_tensor_proto);
///
-/// The functions will make sure the 'location' specified in the external data is under the 'base_dir'.
+/// Validates if the external data path is under the model directory.
+/// If the model is a symlink, it checks against both the logical model directory (base_dir)
+/// and the real/canonical directory of the model.
/// If the `base_dir` is empty, the function only ensures that `location` is not an absolute path.
///
-/// model location directory
-/// location is a string retrieved from TensorProto external data that is not
-/// an in-memory tag
-/// The function will fail if the resolved full path is not under the model directory
-/// or one of the subdirectories
+/// Logical model location directory
+/// Location string retrieved from TensorProto external data
+/// Optional path to the model file, used for canonical path validation if base_dir check fails
+/// The function will fail if the resolved full path is not under the logical model directory
+/// nor the real directory of the model path
Status ValidateExternalDataPath(const std::filesystem::path& base_dir,
- const std::filesystem::path& location);
+ const std::filesystem::path& location,
+ const std::filesystem::path& model_path = {});
#endif // !defined(SHARED_PROVIDER)
diff --git a/onnxruntime/core/graph/graph.cc b/onnxruntime/core/graph/graph.cc
index 779ca5d180518..5aa466ecb5bc7 100644
--- a/onnxruntime/core/graph/graph.cc
+++ b/onnxruntime/core/graph/graph.cc
@@ -3771,7 +3771,7 @@ Status Graph::ConvertInitializersIntoOrtValues() {
std::unique_ptr external_data_info;
ORT_RETURN_IF_ERROR(onnxruntime::ExternalDataInfo::Create(tensor_proto.external_data(), external_data_info));
const auto& location = external_data_info->GetRelPath();
- auto st = utils::ValidateExternalDataPath(model_dir, location);
+ auto st = utils::ValidateExternalDataPath(model_dir, location, model_path);
if (!st.IsOK()) {
return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL,
"External data path validation failed for initializer: ", tensor_proto.name(),
diff --git a/onnxruntime/core/providers/shared_library/provider_api.h b/onnxruntime/core/providers/shared_library/provider_api.h
index f5421d8540db8..1ed78c89e722d 100644
--- a/onnxruntime/core/providers/shared_library/provider_api.h
+++ b/onnxruntime/core/providers/shared_library/provider_api.h
@@ -453,11 +453,6 @@ inline bool HasExternalDataInMemory(const ONNX_NAMESPACE::TensorProto& ten_proto
return g_host->Utils__HasExternalDataInMemory(ten_proto);
}
-inline Status ValidateExternalDataPath(const std::filesystem::path& base_dir,
- const std::filesystem::path& location) {
- return g_host->Utils__ValidateExternalDataPath(base_dir, location);
-}
-
} // namespace utils
namespace graph_utils {
diff --git a/onnxruntime/core/providers/shared_library/provider_interfaces.h b/onnxruntime/core/providers/shared_library/provider_interfaces.h
index aeaf05cf14591..9cbbc6234a99b 100644
--- a/onnxruntime/core/providers/shared_library/provider_interfaces.h
+++ b/onnxruntime/core/providers/shared_library/provider_interfaces.h
@@ -1004,9 +1004,6 @@ struct ProviderHost {
virtual bool Utils__HasExternalDataInMemory(const ONNX_NAMESPACE::TensorProto& ten_proto) = 0;
- virtual Status Utils__ValidateExternalDataPath(const std::filesystem::path& base_path,
- const std::filesystem::path& location) = 0;
-
// Model
virtual std::unique_ptr Model__construct(ONNX_NAMESPACE::ModelProto&& model_proto, const PathString& model_path,
const IOnnxRuntimeOpSchemaRegistryList* local_registries,
diff --git a/onnxruntime/core/session/provider_bridge_ort.cc b/onnxruntime/core/session/provider_bridge_ort.cc
index 6949ed0059add..e5bbd656bc325 100644
--- a/onnxruntime/core/session/provider_bridge_ort.cc
+++ b/onnxruntime/core/session/provider_bridge_ort.cc
@@ -1295,11 +1295,6 @@ struct ProviderHostImpl : ProviderHost {
return onnxruntime::utils::HasExternalDataInMemory(ten_proto);
}
- Status Utils__ValidateExternalDataPath(const std::filesystem::path& base_path,
- const std::filesystem::path& location) override {
- return onnxruntime::utils::ValidateExternalDataPath(base_path, location);
- }
-
// Model (wrapped)
std::unique_ptr Model__construct(ONNX_NAMESPACE::ModelProto&& model_proto, const PathString& model_path,
const IOnnxRuntimeOpSchemaRegistryList* local_registries,
diff --git a/onnxruntime/test/python/onnxruntime_test_python_symlink_data.py b/onnxruntime/test/python/onnxruntime_test_python_symlink_data.py
new file mode 100644
index 0000000000000..ea3c0f9ca9904
--- /dev/null
+++ b/onnxruntime/test/python/onnxruntime_test_python_symlink_data.py
@@ -0,0 +1,250 @@
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+
+import os
+import shutil
+import struct
+import tempfile
+import unittest
+
+import numpy as np
+from onnx import TensorProto, helper, save
+
+import onnxruntime as ort
+
+
+class TestSymLinkOnnxModelExternalData(unittest.TestCase):
+ def test_symlink_model_and_data_under_same_directory(self):
+ # The following directory structure simulates huggingface hub local cache:
+ # temp_dir/ (This corresponds to .cache/huggingface/hub/model_id/)
+ # blobs/
+ # guid1
+ # guid2
+ # snapshots/version/
+ # model.onnx -> ../../blobs/guid1
+ # data.bin -> ../../blobs/guid2
+
+ self.temp_dir = tempfile.mkdtemp()
+ try:
+ blobs_dir = os.path.join(self.temp_dir, "blobs")
+ os.makedirs(blobs_dir)
+
+ snapshots_dir = os.path.join(self.temp_dir, "snapshots", "version")
+ os.makedirs(snapshots_dir)
+
+ # Create real files in blobs
+ # We'll use the helper to create the model, but we need to control where files end up.
+ # Let's manually create the data file in blobs
+ data_blob_path = os.path.join(blobs_dir, "guid2")
+ vals = [float(i) for i in range(10)]
+ with open(data_blob_path, "wb") as f:
+ f.writelines(struct.pack("f", v) for v in vals)
+
+ # Create model in blobs (referencing "data.bin" as external data)
+ # When loaded from snapshots/version/model.onnx, ORT looks for snapshots/version/data.bin
+
+ input_ = helper.make_tensor_value_info("input", TensorProto.FLOAT, [10])
+ output = helper.make_tensor_value_info("output", TensorProto.FLOAT, [10])
+ tensor = helper.make_tensor("external_data", TensorProto.FLOAT, [10], vals)
+ tensor.data_location = TensorProto.EXTERNAL
+ tensor.ClearField("float_data")
+ tensor.ClearField("raw_data")
+
+ k = tensor.external_data.add()
+ k.key = "location"
+ k.value = "data.bin" # Relative path
+
+ offset = tensor.external_data.add()
+ offset.key = "offset"
+ offset.value = "0"
+
+ length = tensor.external_data.add()
+ length.key = "length"
+ length.value = str(len(vals) * 4)
+
+ const_node = helper.make_node("Constant", [], ["const_out"], value=tensor)
+ add_node = helper.make_node("Add", ["input", "const_out"], ["output"])
+ graph = helper.make_graph([const_node, add_node], "test_graph", [input_], [output])
+ model = helper.make_model(graph)
+
+ model_blob_path = os.path.join(blobs_dir, "guid1")
+ save(model, model_blob_path)
+
+ # Now create symlinks in snapshots
+ model_symlink_path = os.path.join(snapshots_dir, "model.onnx")
+ data_symlink_path = os.path.join(snapshots_dir, "data.bin")
+
+ try:
+ os.symlink(model_blob_path, model_symlink_path)
+ os.symlink(data_blob_path, data_symlink_path)
+ except (OSError, NotImplementedError) as e:
+ self.skipTest(f"Skipping symlink test: symlink creation is not supported in this environment: {e}")
+
+ sess = ort.InferenceSession(model_symlink_path, providers=["CPUExecutionProvider"])
+
+ input_data = np.zeros(10, dtype=np.float32)
+ res = sess.run(["output"], {"input": input_data})
+ expected = np.array([float(i) for i in range(10)], dtype=np.float32)
+ np.testing.assert_allclose(res[0], expected)
+
+ finally:
+ shutil.rmtree(self.temp_dir)
+
+ def test_symlink_with_data_in_model_sub_dir(self):
+ # working directory structure (data is in model sub directory):
+ # temp_dir/
+ # blobs/
+ # guid1
+ # data/guid2
+ # snapshots/version/
+ # model.onnx -> ../../blobs/guid1
+ # data.bin -> ../../blobs/data/guid2
+
+ self.temp_dir = tempfile.mkdtemp()
+ try:
+ blobs_dir = os.path.join(self.temp_dir, "blobs")
+ os.makedirs(blobs_dir)
+ data_dir = os.path.join(blobs_dir, "data")
+ os.makedirs(data_dir)
+
+ snapshots_dir = os.path.join(self.temp_dir, "snapshots", "version")
+ os.makedirs(snapshots_dir)
+
+ # Create real files in blobs
+ # We'll use the helper to create the model, but we need to control where files end up.
+ # Let's manually create the data file in blobs
+ data_blob_path = os.path.join(data_dir, "guid2")
+ vals = [float(i) for i in range(10)]
+ with open(data_blob_path, "wb") as f:
+ f.writelines(struct.pack("f", v) for v in vals)
+
+ # Create model in blobs (referencing "data.bin" as external data)
+ # When loaded from snapshots/version/model.onnx, ORT looks for snapshots/version/data.bin
+
+ input_ = helper.make_tensor_value_info("input", TensorProto.FLOAT, [10])
+ output = helper.make_tensor_value_info("output", TensorProto.FLOAT, [10])
+ tensor = helper.make_tensor("external_data", TensorProto.FLOAT, [10], vals)
+ tensor.data_location = TensorProto.EXTERNAL
+ tensor.ClearField("float_data")
+ tensor.ClearField("raw_data")
+
+ k = tensor.external_data.add()
+ k.key = "location"
+ k.value = "data.bin" # Relative path
+
+ offset = tensor.external_data.add()
+ offset.key = "offset"
+ offset.value = "0"
+
+ length = tensor.external_data.add()
+ length.key = "length"
+ length.value = str(len(vals) * 4)
+
+ const_node = helper.make_node("Constant", [], ["const_out"], value=tensor)
+ add_node = helper.make_node("Add", ["input", "const_out"], ["output"])
+ graph = helper.make_graph([const_node, add_node], "test_graph", [input_], [output])
+ model = helper.make_model(graph)
+
+ model_blob_path = os.path.join(blobs_dir, "guid1")
+ save(model, model_blob_path)
+
+ # Now create symlinks in snapshots
+ model_symlink_path = os.path.join(snapshots_dir, "model.onnx")
+ data_symlink_path = os.path.join(snapshots_dir, "data.bin")
+
+ try:
+ os.symlink(model_blob_path, model_symlink_path)
+ os.symlink(data_blob_path, data_symlink_path)
+ except (OSError, NotImplementedError) as e:
+ self.skipTest(f"Skipping symlink test: symlink creation is not supported in this environment: {e}")
+
+ sess = ort.InferenceSession(model_symlink_path, providers=["CPUExecutionProvider"])
+
+ input_data = np.zeros(10, dtype=np.float32)
+ res = sess.run(["output"], {"input": input_data})
+ expected = np.array([float(i) for i in range(10)], dtype=np.float32)
+ np.testing.assert_allclose(res[0], expected)
+
+ finally:
+ shutil.rmtree(self.temp_dir)
+
+ def test_symlink_with_data_not_in_model_sub_dir(self):
+ # working directory structure (data is not in model directory or its sub directories):
+ # temp_dir/
+ # model/
+ # guid1
+ # data/
+ # guid2
+ # snapshots/version/
+ # model.onnx -> ../../model/guid1
+ # data.bin -> ../../data/guid2
+
+ self.temp_dir = tempfile.mkdtemp()
+ try:
+ model_dir = os.path.join(self.temp_dir, "model")
+ os.makedirs(model_dir)
+ data_dir = os.path.join(self.temp_dir, "data")
+ os.makedirs(data_dir)
+
+ snapshots_dir = os.path.join(self.temp_dir, "snapshots", "version")
+ os.makedirs(snapshots_dir)
+
+ # Create real files in data_dir
+ # We'll use the helper to create the model, but we need to control where files end up.
+ # Let's manually create the data file in data_dir
+ data_blob_path = os.path.join(data_dir, "guid2")
+ vals = [float(i) for i in range(10)]
+ with open(data_blob_path, "wb") as f:
+ f.writelines(struct.pack("f", v) for v in vals)
+
+ # Create model in model_dir (referencing "data.bin" as external data)
+ # When loaded from snapshots/version/model.onnx, ORT looks for snapshots/version/data.bin
+
+ input_ = helper.make_tensor_value_info("input", TensorProto.FLOAT, [10])
+ output = helper.make_tensor_value_info("output", TensorProto.FLOAT, [10])
+ tensor = helper.make_tensor("external_data", TensorProto.FLOAT, [10], vals)
+ tensor.data_location = TensorProto.EXTERNAL
+ tensor.ClearField("float_data")
+ tensor.ClearField("raw_data")
+
+ k = tensor.external_data.add()
+ k.key = "location"
+ k.value = "data.bin" # Relative path
+
+ offset = tensor.external_data.add()
+ offset.key = "offset"
+ offset.value = "0"
+
+ length = tensor.external_data.add()
+ length.key = "length"
+ length.value = str(len(vals) * 4)
+
+ const_node = helper.make_node("Constant", [], ["const_out"], value=tensor)
+ add_node = helper.make_node("Add", ["input", "const_out"], ["output"])
+ graph = helper.make_graph([const_node, add_node], "test_graph", [input_], [output])
+ model = helper.make_model(graph)
+
+ model_blob_path = os.path.join(model_dir, "guid1")
+ save(model, model_blob_path)
+
+ # Now create symlinks in snapshots
+ model_symlink_path = os.path.join(snapshots_dir, "model.onnx")
+ data_symlink_path = os.path.join(snapshots_dir, "data.bin")
+
+ try:
+ os.symlink(model_blob_path, model_symlink_path)
+ os.symlink(data_blob_path, data_symlink_path)
+ except (OSError, NotImplementedError) as e:
+ self.skipTest(f"Skipping symlink test: symlink creation is not supported in this environment: {e}")
+
+ with self.assertRaises(Exception) as cm:
+ ort.InferenceSession(model_symlink_path, providers=["CPUExecutionProvider"])
+
+ # We expect an error about external data not under model directory or the real model directory.
+ self.assertIn("External data path validation failed", str(cm.exception))
+ finally:
+ shutil.rmtree(self.temp_dir)
+
+
+if __name__ == "__main__":
+ unittest.main()
diff --git a/tools/ci_build/build.py b/tools/ci_build/build.py
index 05e1f96b7610c..0862462d2d06d 100644
--- a/tools/ci_build/build.py
+++ b/tools/ci_build/build.py
@@ -1816,6 +1816,9 @@ def run_onnxruntime_tests(args, source_dir, ctest_path, build_dir, configs):
onnx_test = False
if onnx_test:
+ log.info("Testing Symlink ONNX Model and External Data")
+ run_subprocess([sys.executable, "onnxruntime_test_python_symlink_data.py"], cwd=cwd, dll_path=dll_path)
+
# Disable python onnx tests for TensorRT and CANN EP, because many tests are
# not supported yet.
if args.use_tensorrt or args.use_cann: