diff --git a/qa/L0_shared_memory/shared_memory_test.py b/qa/L0_shared_memory/shared_memory_test.py index 35667aacfa..63f3700c05 100755 --- a/qa/L0_shared_memory/shared_memory_test.py +++ b/qa/L0_shared_memory/shared_memory_test.py @@ -113,10 +113,6 @@ def _configure_server( shm_op1_handle, ] # Implicit assumption that input and output byte_sizes are 64 bytes for now - input0_data = np.arange(start=0, stop=16, dtype=np.int32) - input1_data = np.ones(shape=16, dtype=np.int32) - shm.set_shared_memory_region(shm_ip0_handle, [input0_data]) - shm.set_shared_memory_region(shm_ip1_handle, [input1_data]) self.triton_client.register_system_shared_memory( "input0_data", "/input0_data", register_byte_size, offset=register_offset ) @@ -129,6 +125,16 @@ def _configure_server( self.triton_client.register_system_shared_memory( "output1_data", "/output1_data", register_byte_size, offset=register_offset ) + + # Write data to shared memory regions + input0_data = np.arange(start=0, stop=16, dtype=np.int32) + input1_data = np.ones(shape=16, dtype=np.int32) + shm.set_shared_memory_region( + shm_ip0_handle, [input0_data], offset=register_offset + ) + shm.set_shared_memory_region( + shm_ip1_handle, [input1_data], offset=register_offset + ) self.shm_names = ["input0_data", "input1_data", "output0_data", "output1_data"] def _cleanup_shm_handles(self): @@ -292,6 +298,40 @@ def test_too_big_shm(self): self._shm_handles.append(shm_ip2_handle) self._cleanup_shm_handles() + def test_large_shm_register_offset(self): + # Test for out of bounds read vulnerability when registering system shared memory with large offset + for platform in ["python", "onnx", "libtorch", "plan", "openvino"]: + model_name = f"{platform}_int32_int32_int32" + + # Test for large offset + error_msg = [] + page_size = os.sysconf("SC_PAGE_SIZE") + # Create a large shm size (page_size * 1024 is large enough to reproduce a segfault). + # Register offset at 1 page before the end of the shm region to give enough space for the input/output data. + create_byte_size = page_size * 1024 + register_offset = page_size * 1023 + self._configure_server( + create_byte_size=create_byte_size, + register_offset=register_offset, + ) + + iu.shm_basic_infer( + self, + self.triton_client, + self._shm_handles[0], + self._shm_handles[1], + self._shm_handles[2], + self._shm_handles[3], + error_msg, + register_offset=register_offset, + protocol=self.protocol, + use_system_shared_memory=True, + override_model_name=model_name, + ) + self.triton_client.unregister_system_shared_memory() + if len(error_msg) > 0: + raise Exception(str(error_msg)) + def test_mixed_raw_shm(self): # Mix of shared memory and RAW inputs error_msg = [] diff --git a/qa/L0_shared_memory/test.sh b/qa/L0_shared_memory/test.sh index 55cdb80951..d34ce1a4e5 100755 --- a/qa/L0_shared_memory/test.sh +++ b/qa/L0_shared_memory/test.sh @@ -25,12 +25,26 @@ # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION} +if [ "$#" -ge 1 ]; then + REPO_VERSION=$1 +fi +if [ -z "$REPO_VERSION" ]; then + echo -e "Repository version must be specified" + echo -e "\n***\n*** Test Failed\n***" + exit 1 +fi +if [ ! -z "$TEST_REPO_ARCH" ]; then + REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH} +fi + CLIENT_LOG="./client.log" SHM_TEST=shared_memory_test.py TEST_RESULT_FILE='test_results.txt' # Configure to support test on jetson as well TRITON_DIR=${TRITON_DIR:="/opt/tritonserver"} +DATADIR=/data/inferenceserver/${REPO_VERSION} SERVER=${TRITON_DIR}/bin/tritonserver BACKEND_DIR=${TRITON_DIR}/backends SERVER_ARGS_EXTRA="--backend-directory=${BACKEND_DIR}" @@ -142,6 +156,60 @@ for test_case in \ done done +# Test large system shared memory offset +rm -rf models/* +# prepare add_sub model of various backends +BACKENDS="python onnx libtorch plan openvino" +for backend in ${BACKENDS} ; do + model="${backend}_int32_int32_int32" + model_dir="models/${model}" + if [[ $backend == "python" ]]; then + mkdir -p ${model_dir}/1 + cp ../python_models/add_sub/model.py ${model_dir}/1/ + cp ../python_models/add_sub/config.pbtxt ${model_dir}/ + sed -i 's/TYPE_FP32/TYPE_INT32/g' ${model_dir}/config.pbtxt + echo "max_batch_size: 8" >> ${model_dir}/config.pbtxt + else + mkdir -p ${model_dir} + cp -r $DATADIR/qa_model_repository/${model}/1 ${model_dir}/1 + cp $DATADIR/qa_model_repository/${model}/config.pbtxt ${model_dir}/ + cp $DATADIR/qa_model_repository/${model}/output0_labels.txt ${model_dir}/ + if [ $backend == "openvino" ]; then + echo 'parameters { key: "ENABLE_BATCH_PADDING" value { string_value: "YES" } }' >> models/${model}/config.pbtxt + fi + fi +done + +test_case="test_large_shm_register_offset" +for client_type in http grpc; do + SERVER_ARGS="--model-repository=`pwd`/models --log-verbose=1 ${SERVER_ARGS_EXTRA}" + SERVER_LOG="./${test_case}.${client_type}.server.log" + run_server + if [ "$SERVER_PID" == "0" ]; then + echo -e "\n***\n*** Failed to start $SERVER\n***" + cat $SERVER_LOG + exit 1 + fi + + export CLIENT_TYPE=$client_type + CLIENT_LOG="./${test_case}.${client_type}.client.log" + set +e + python3 $SHM_TEST SharedMemoryTest.${test_case} >>"$CLIENT_LOG" 2>&1 + if [ $? -ne 0 ]; then + cat $CLIENT_LOG + echo -e "\n***\n*** Test Failed - ${client_type}\n***" + RET=1 + fi + + kill $SERVER_PID + wait $SERVER_PID + if [ $? -ne 0 ]; then + echo -e "\n***\n*** Test Server shut down non-gracefully\n***" + RET=1 + fi + set -e +done + if [ $RET -eq 0 ]; then echo -e "\n***\n*** Test Passed\n***" else diff --git a/qa/common/infer_util.py b/qa/common/infer_util.py index edaf0ede47..7e9c775570 100755 --- a/qa/common/infer_util.py +++ b/qa/common/infer_util.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 -# Copyright 2018-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# Copyright 2018-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions @@ -1367,11 +1367,13 @@ def shm_basic_infer( big_shm_name="", big_shm_size=64, default_shm_byte_size=64, + register_offset=0, shm_output_offset=0, shm_output_byte_size=64, protocol="http", use_system_shared_memory=False, use_cuda_shared_memory=False, + override_model_name=None, ): # Lazy shm imports... if use_system_shared_memory: @@ -1381,6 +1383,16 @@ def shm_basic_infer( else: raise Exception("No shared memory type specified") + if override_model_name is None: + model_name = "simple" + else: + model_name = override_model_name + + if model_name.startswith("libtorch"): + output_names = ["OUTPUT__0", "OUTPUT__1"] + else: + output_names = ["OUTPUT0", "OUTPUT1"] + input0_data = np.arange(start=0, stop=16, dtype=np.int32) input1_data = np.ones(shape=16, dtype=np.int32) inputs = [] @@ -1388,13 +1400,17 @@ def shm_basic_infer( if protocol == "http": inputs.append(httpclient.InferInput("INPUT0", [1, 16], "INT32")) inputs.append(httpclient.InferInput("INPUT1", [1, 16], "INT32")) - outputs.append(httpclient.InferRequestedOutput("OUTPUT0", binary_data=True)) - outputs.append(httpclient.InferRequestedOutput("OUTPUT1", binary_data=False)) + outputs.append( + httpclient.InferRequestedOutput(output_names[0], binary_data=True) + ) + outputs.append( + httpclient.InferRequestedOutput(output_names[1], binary_data=False) + ) else: inputs.append(grpcclient.InferInput("INPUT0", [1, 16], "INT32")) inputs.append(grpcclient.InferInput("INPUT1", [1, 16], "INT32")) - outputs.append(grpcclient.InferRequestedOutput("OUTPUT0")) - outputs.append(grpcclient.InferRequestedOutput("OUTPUT1")) + outputs.append(grpcclient.InferRequestedOutput(output_names[0])) + outputs.append(grpcclient.InferRequestedOutput(output_names[1])) inputs[0].set_shared_memory("input0_data", default_shm_byte_size) @@ -1414,9 +1430,9 @@ def shm_basic_infer( try: results = triton_client.infer( - "simple", inputs, model_version="", outputs=outputs + model_name, inputs, model_version="", outputs=outputs ) - output = results.get_output("OUTPUT0") + output = results.get_output(output_names[0]) if protocol == "http": output_datatype = output["datatype"] output_shape = output["shape"] @@ -1427,11 +1443,16 @@ def shm_basic_infer( if use_system_shared_memory: output_data = shm.get_contents_as_numpy( - shm_op0_handle, output_dtype, output_shape + shm_op0_handle, + output_dtype, + output_shape, + offset=register_offset + shm_output_offset, ) elif use_cuda_shared_memory: output_data = cudashm.get_contents_as_numpy( - shm_op0_handle, output_dtype, output_shape + shm_op0_handle, + output_dtype, + output_shape, ) tester.assertTrue( diff --git a/src/shared_memory_manager.cc b/src/shared_memory_manager.cc index 80b739ee1b..a2d52f5f48 100644 --- a/src/shared_memory_manager.cc +++ b/src/shared_memory_manager.cc @@ -524,12 +524,7 @@ SharedMemoryManager::GetMemoryInfo( *shm_info = std::static_pointer_cast(it->second); } - if (it->second->kind_ == TRITONSERVER_MEMORY_CPU) { - *shm_mapped_addr = (void*)((uint8_t*)it->second->mapped_addr_ + - it->second->offset_ + offset); - } else { - *shm_mapped_addr = (void*)((uint8_t*)it->second->mapped_addr_ + offset); - } + *shm_mapped_addr = (void*)((uint8_t*)it->second->mapped_addr_ + offset); *memory_type = it->second->kind_; *device_id = it->second->device_id_;