triton-inference-server · mc-nv · Aug 7, 2025 · Aug 7, 2025
diff --git a/qa/L0_shared_memory/shared_memory_test.py b/qa/L0_shared_memory/shared_memory_test.py
@@ -113,10 +113,6 @@ def _configure_server(
             shm_op1_handle,
         ]
         # Implicit assumption that input and output byte_sizes are 64 bytes for now
-        input0_data = np.arange(start=0, stop=16, dtype=np.int32)
-        input1_data = np.ones(shape=16, dtype=np.int32)
-        shm.set_shared_memory_region(shm_ip0_handle, [input0_data])
-        shm.set_shared_memory_region(shm_ip1_handle, [input1_data])
         self.triton_client.register_system_shared_memory(
             "input0_data", "/input0_data", register_byte_size, offset=register_offset
         )
@@ -129,6 +125,16 @@ def _configure_server(
         self.triton_client.register_system_shared_memory(
             "output1_data", "/output1_data", register_byte_size, offset=register_offset
         )
+
+        # Write data to shared memory regions
+        input0_data = np.arange(start=0, stop=16, dtype=np.int32)
+        input1_data = np.ones(shape=16, dtype=np.int32)
+        shm.set_shared_memory_region(
+            shm_ip0_handle, [input0_data], offset=register_offset
+        )
+        shm.set_shared_memory_region(
+            shm_ip1_handle, [input1_data], offset=register_offset
+        )
         self.shm_names = ["input0_data", "input1_data", "output0_data", "output1_data"]
 
     def _cleanup_shm_handles(self):
@@ -292,6 +298,40 @@ def test_too_big_shm(self):
         self._shm_handles.append(shm_ip2_handle)
         self._cleanup_shm_handles()
 
+    def test_large_shm_register_offset(self):
+        # Test for out of bounds read vulnerability when registering system shared memory with large offset
+        for platform in ["python", "onnx", "libtorch", "plan", "openvino"]:
+            model_name = f"{platform}_int32_int32_int32"
+
+            # Test for large offset
+            error_msg = []
+            page_size = os.sysconf("SC_PAGE_SIZE")
+            # Create a large shm size (page_size * 1024 is large enough to reproduce a segfault).
+            # Register offset at 1 page before the end of the shm region to give enough space for the input/output data.
+            create_byte_size = page_size * 1024
+            register_offset = page_size * 1023
+            self._configure_server(
+                create_byte_size=create_byte_size,
+                register_offset=register_offset,
+            )
+
+            iu.shm_basic_infer(
+                self,
+                self.triton_client,
+                self._shm_handles[0],
+                self._shm_handles[1],
+                self._shm_handles[2],
+                self._shm_handles[3],
+                error_msg,
+                register_offset=register_offset,
+                protocol=self.protocol,
+                use_system_shared_memory=True,
+                override_model_name=model_name,
+            )
+            self.triton_client.unregister_system_shared_memory()
+            if len(error_msg) > 0:
+                raise Exception(str(error_msg))
+
     def test_mixed_raw_shm(self):
         # Mix of shared memory and RAW inputs
         error_msg = []

diff --git a/qa/L0_shared_memory/test.sh b/qa/L0_shared_memory/test.sh
@@ -25,12 +25,26 @@
 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
+REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
+if [ "$#" -ge 1 ]; then
+    REPO_VERSION=$1
+fi
+if [ -z "$REPO_VERSION" ]; then
+    echo -e "Repository version must be specified"
+    echo -e "\n***\n*** Test Failed\n***"
+    exit 1
+fi
+if [ ! -z "$TEST_REPO_ARCH" ]; then
+    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
+fi
+
 CLIENT_LOG="./client.log"
 SHM_TEST=shared_memory_test.py
 TEST_RESULT_FILE='test_results.txt'
 
 # Configure to support test on jetson as well
 TRITON_DIR=${TRITON_DIR:="/opt/tritonserver"}
+DATADIR=/data/inferenceserver/${REPO_VERSION}
 SERVER=${TRITON_DIR}/bin/tritonserver
 BACKEND_DIR=${TRITON_DIR}/backends
 SERVER_ARGS_EXTRA="--backend-directory=${BACKEND_DIR}"
@@ -142,6 +156,60 @@ for test_case in \
     done
 done
 
+# Test large system shared memory offset
+rm -rf models/*
+# prepare add_sub model of various backends
+BACKENDS="python onnx libtorch plan openvino"
+for backend in ${BACKENDS} ; do
+    model="${backend}_int32_int32_int32"
+    model_dir="models/${model}"
+    if [[ $backend == "python" ]]; then
+        mkdir -p ${model_dir}/1
+        cp ../python_models/add_sub/model.py ${model_dir}/1/
+        cp ../python_models/add_sub/config.pbtxt ${model_dir}/
+        sed -i 's/TYPE_FP32/TYPE_INT32/g' ${model_dir}/config.pbtxt
+        echo "max_batch_size: 8" >> ${model_dir}/config.pbtxt
+    else
+        mkdir -p ${model_dir}
+        cp -r $DATADIR/qa_model_repository/${model}/1 ${model_dir}/1
+        cp $DATADIR/qa_model_repository/${model}/config.pbtxt ${model_dir}/
+        cp $DATADIR/qa_model_repository/${model}/output0_labels.txt ${model_dir}/
+        if [ $backend == "openvino" ]; then
+            echo 'parameters { key: "ENABLE_BATCH_PADDING" value { string_value: "YES" } }' >> models/${model}/config.pbtxt
+        fi
+    fi
+done
+
+test_case="test_large_shm_register_offset"
+for client_type in http grpc; do
+    SERVER_ARGS="--model-repository=`pwd`/models --log-verbose=1 ${SERVER_ARGS_EXTRA}"
+    SERVER_LOG="./${test_case}.${client_type}.server.log"
+    run_server
+    if [ "$SERVER_PID" == "0" ]; then
+        echo -e "\n***\n*** Failed to start $SERVER\n***"
+        cat $SERVER_LOG
+        exit 1
+    fi
+
+    export CLIENT_TYPE=$client_type
+    CLIENT_LOG="./${test_case}.${client_type}.client.log"
+    set +e
+    python3 $SHM_TEST SharedMemoryTest.${test_case} >>"$CLIENT_LOG" 2>&1
+    if [ $? -ne 0 ]; then
+        cat $CLIENT_LOG
+        echo -e "\n***\n*** Test Failed - ${client_type}\n***"
+        RET=1
+    fi
+
+    kill $SERVER_PID
+    wait $SERVER_PID
+    if [ $? -ne 0 ]; then
+        echo -e "\n***\n*** Test Server shut down non-gracefully\n***"
+        RET=1
+    fi
+    set -e
+done
+
 if [ $RET -eq 0 ]; then
     echo -e "\n***\n*** Test Passed\n***"
 else

diff --git a/qa/common/infer_util.py b/qa/common/infer_util.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 
-# Copyright 2018-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# Copyright 2018-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions
@@ -1367,11 +1367,13 @@ def shm_basic_infer(
     big_shm_name="",
     big_shm_size=64,
     default_shm_byte_size=64,
+    register_offset=0,
     shm_output_offset=0,
     shm_output_byte_size=64,
     protocol="http",
     use_system_shared_memory=False,
     use_cuda_shared_memory=False,
+    override_model_name=None,
 ):
     # Lazy shm imports...
     if use_system_shared_memory:
@@ -1381,20 +1383,34 @@ def shm_basic_infer(
     else:
         raise Exception("No shared memory type specified")
 
+    if override_model_name is None:
+        model_name = "simple"
+    else:
+        model_name = override_model_name
+
+    if model_name.startswith("libtorch"):
+        output_names = ["OUTPUT__0", "OUTPUT__1"]
+    else:
+        output_names = ["OUTPUT0", "OUTPUT1"]
+
     input0_data = np.arange(start=0, stop=16, dtype=np.int32)
     input1_data = np.ones(shape=16, dtype=np.int32)
     inputs = []
     outputs = []
     if protocol == "http":
         inputs.append(httpclient.InferInput("INPUT0", [1, 16], "INT32"))
         inputs.append(httpclient.InferInput("INPUT1", [1, 16], "INT32"))
-        outputs.append(httpclient.InferRequestedOutput("OUTPUT0", binary_data=True))
-        outputs.append(httpclient.InferRequestedOutput("OUTPUT1", binary_data=False))
+        outputs.append(
+            httpclient.InferRequestedOutput(output_names[0], binary_data=True)
+        )
+        outputs.append(
+            httpclient.InferRequestedOutput(output_names[1], binary_data=False)
+        )
     else:
         inputs.append(grpcclient.InferInput("INPUT0", [1, 16], "INT32"))
         inputs.append(grpcclient.InferInput("INPUT1", [1, 16], "INT32"))
-        outputs.append(grpcclient.InferRequestedOutput("OUTPUT0"))
-        outputs.append(grpcclient.InferRequestedOutput("OUTPUT1"))
+        outputs.append(grpcclient.InferRequestedOutput(output_names[0]))
+        outputs.append(grpcclient.InferRequestedOutput(output_names[1]))
 
     inputs[0].set_shared_memory("input0_data", default_shm_byte_size)
 
@@ -1414,9 +1430,9 @@ def shm_basic_infer(
 
     try:
         results = triton_client.infer(
-            "simple", inputs, model_version="", outputs=outputs
+            model_name, inputs, model_version="", outputs=outputs
         )
-        output = results.get_output("OUTPUT0")
+        output = results.get_output(output_names[0])
         if protocol == "http":
             output_datatype = output["datatype"]
             output_shape = output["shape"]
@@ -1427,11 +1443,16 @@ def shm_basic_infer(
 
         if use_system_shared_memory:
             output_data = shm.get_contents_as_numpy(
-                shm_op0_handle, output_dtype, output_shape
+                shm_op0_handle,
+                output_dtype,
+                output_shape,
+                offset=register_offset + shm_output_offset,
             )
         elif use_cuda_shared_memory:
             output_data = cudashm.get_contents_as_numpy(
-                shm_op0_handle, output_dtype, output_shape
+                shm_op0_handle,
+                output_dtype,
+                output_shape,
             )
 
         tester.assertTrue(

diff --git a/src/shared_memory_manager.cc b/src/shared_memory_manager.cc
@@ -524,12 +524,7 @@ SharedMemoryManager::GetMemoryInfo(
     *shm_info = std::static_pointer_cast<const SharedMemoryInfo>(it->second);
   }
 
-  if (it->second->kind_ == TRITONSERVER_MEMORY_CPU) {
-    *shm_mapped_addr = (void*)((uint8_t*)it->second->mapped_addr_ +
-                               it->second->offset_ + offset);
-  } else {
-    *shm_mapped_addr = (void*)((uint8_t*)it->second->mapped_addr_ + offset);
-  }
+  *shm_mapped_addr = (void*)((uint8_t*)it->second->mapped_addr_ + offset);
 
   *memory_type = it->second->kind_;
   *device_id = it->second->device_id_;