Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 44 additions & 4 deletions qa/L0_shared_memory/shared_memory_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,10 +113,6 @@ def _configure_server(
shm_op1_handle,
]
# Implicit assumption that input and output byte_sizes are 64 bytes for now
input0_data = np.arange(start=0, stop=16, dtype=np.int32)
input1_data = np.ones(shape=16, dtype=np.int32)
shm.set_shared_memory_region(shm_ip0_handle, [input0_data])
shm.set_shared_memory_region(shm_ip1_handle, [input1_data])
self.triton_client.register_system_shared_memory(
"input0_data", "/input0_data", register_byte_size, offset=register_offset
)
Expand All @@ -129,6 +125,16 @@ def _configure_server(
self.triton_client.register_system_shared_memory(
"output1_data", "/output1_data", register_byte_size, offset=register_offset
)

# Write data to shared memory regions
input0_data = np.arange(start=0, stop=16, dtype=np.int32)
input1_data = np.ones(shape=16, dtype=np.int32)
shm.set_shared_memory_region(
shm_ip0_handle, [input0_data], offset=register_offset
)
shm.set_shared_memory_region(
shm_ip1_handle, [input1_data], offset=register_offset
)
self.shm_names = ["input0_data", "input1_data", "output0_data", "output1_data"]

def _cleanup_shm_handles(self):
Expand Down Expand Up @@ -292,6 +298,40 @@ def test_too_big_shm(self):
self._shm_handles.append(shm_ip2_handle)
self._cleanup_shm_handles()

def test_large_shm_register_offset(self):
# Test for out of bounds read vulnerability when registering system shared memory with large offset
for platform in ["python", "onnx", "libtorch", "plan", "openvino"]:
model_name = f"{platform}_int32_int32_int32"

# Test for large offset
error_msg = []
page_size = os.sysconf("SC_PAGE_SIZE")
# Create a large shm size (page_size * 1024 is large enough to reproduce a segfault).
# Register offset at 1 page before the end of the shm region to give enough space for the input/output data.
create_byte_size = page_size * 1024
register_offset = page_size * 1023
self._configure_server(
create_byte_size=create_byte_size,
register_offset=register_offset,
)

iu.shm_basic_infer(
self,
self.triton_client,
self._shm_handles[0],
self._shm_handles[1],
self._shm_handles[2],
self._shm_handles[3],
error_msg,
register_offset=register_offset,
protocol=self.protocol,
use_system_shared_memory=True,
override_model_name=model_name,
)
self.triton_client.unregister_system_shared_memory()
if len(error_msg) > 0:
raise Exception(str(error_msg))

def test_mixed_raw_shm(self):
# Mix of shared memory and RAW inputs
error_msg = []
Expand Down
68 changes: 68 additions & 0 deletions qa/L0_shared_memory/test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -25,12 +25,26 @@
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
if [ "$#" -ge 1 ]; then
REPO_VERSION=$1
fi
if [ -z "$REPO_VERSION" ]; then
echo -e "Repository version must be specified"
echo -e "\n***\n*** Test Failed\n***"
exit 1
fi
if [ ! -z "$TEST_REPO_ARCH" ]; then
REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
fi

CLIENT_LOG="./client.log"
SHM_TEST=shared_memory_test.py
TEST_RESULT_FILE='test_results.txt'

# Configure to support test on jetson as well
TRITON_DIR=${TRITON_DIR:="/opt/tritonserver"}
DATADIR=/data/inferenceserver/${REPO_VERSION}
SERVER=${TRITON_DIR}/bin/tritonserver
BACKEND_DIR=${TRITON_DIR}/backends
SERVER_ARGS_EXTRA="--backend-directory=${BACKEND_DIR}"
Expand Down Expand Up @@ -142,6 +156,60 @@ for test_case in \
done
done

# Test large system shared memory offset
rm -rf models/*
# prepare add_sub model of various backends
BACKENDS="python onnx libtorch plan openvino"
for backend in ${BACKENDS} ; do
model="${backend}_int32_int32_int32"
model_dir="models/${model}"
if [[ $backend == "python" ]]; then
mkdir -p ${model_dir}/1
cp ../python_models/add_sub/model.py ${model_dir}/1/
cp ../python_models/add_sub/config.pbtxt ${model_dir}/
sed -i 's/TYPE_FP32/TYPE_INT32/g' ${model_dir}/config.pbtxt
echo "max_batch_size: 8" >> ${model_dir}/config.pbtxt
else
mkdir -p ${model_dir}
cp -r $DATADIR/qa_model_repository/${model}/1 ${model_dir}/1
cp $DATADIR/qa_model_repository/${model}/config.pbtxt ${model_dir}/
cp $DATADIR/qa_model_repository/${model}/output0_labels.txt ${model_dir}/
if [ $backend == "openvino" ]; then
echo 'parameters { key: "ENABLE_BATCH_PADDING" value { string_value: "YES" } }' >> models/${model}/config.pbtxt
fi
fi
done

test_case="test_large_shm_register_offset"
for client_type in http grpc; do
SERVER_ARGS="--model-repository=`pwd`/models --log-verbose=1 ${SERVER_ARGS_EXTRA}"
SERVER_LOG="./${test_case}.${client_type}.server.log"
run_server
if [ "$SERVER_PID" == "0" ]; then
echo -e "\n***\n*** Failed to start $SERVER\n***"
cat $SERVER_LOG
exit 1
fi

export CLIENT_TYPE=$client_type
CLIENT_LOG="./${test_case}.${client_type}.client.log"
set +e
python3 $SHM_TEST SharedMemoryTest.${test_case} >>"$CLIENT_LOG" 2>&1
if [ $? -ne 0 ]; then
cat $CLIENT_LOG
echo -e "\n***\n*** Test Failed - ${client_type}\n***"
RET=1
fi

kill $SERVER_PID
wait $SERVER_PID
if [ $? -ne 0 ]; then
echo -e "\n***\n*** Test Server shut down non-gracefully\n***"
RET=1
fi
set -e
done

if [ $RET -eq 0 ]; then
echo -e "\n***\n*** Test Passed\n***"
else
Expand Down
39 changes: 30 additions & 9 deletions qa/common/infer_util.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#!/usr/bin/env python3

# Copyright 2018-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# Copyright 2018-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
Expand Down Expand Up @@ -1367,11 +1367,13 @@ def shm_basic_infer(
big_shm_name="",
big_shm_size=64,
default_shm_byte_size=64,
register_offset=0,
shm_output_offset=0,
shm_output_byte_size=64,
protocol="http",
use_system_shared_memory=False,
use_cuda_shared_memory=False,
override_model_name=None,
):
# Lazy shm imports...
if use_system_shared_memory:
Expand All @@ -1381,20 +1383,34 @@ def shm_basic_infer(
else:
raise Exception("No shared memory type specified")

if override_model_name is None:
model_name = "simple"
else:
model_name = override_model_name

if model_name.startswith("libtorch"):
output_names = ["OUTPUT__0", "OUTPUT__1"]
else:
output_names = ["OUTPUT0", "OUTPUT1"]

input0_data = np.arange(start=0, stop=16, dtype=np.int32)
input1_data = np.ones(shape=16, dtype=np.int32)
inputs = []
outputs = []
if protocol == "http":
inputs.append(httpclient.InferInput("INPUT0", [1, 16], "INT32"))
inputs.append(httpclient.InferInput("INPUT1", [1, 16], "INT32"))
outputs.append(httpclient.InferRequestedOutput("OUTPUT0", binary_data=True))
outputs.append(httpclient.InferRequestedOutput("OUTPUT1", binary_data=False))
outputs.append(
httpclient.InferRequestedOutput(output_names[0], binary_data=True)
)
outputs.append(
httpclient.InferRequestedOutput(output_names[1], binary_data=False)
)
else:
inputs.append(grpcclient.InferInput("INPUT0", [1, 16], "INT32"))
inputs.append(grpcclient.InferInput("INPUT1", [1, 16], "INT32"))
outputs.append(grpcclient.InferRequestedOutput("OUTPUT0"))
outputs.append(grpcclient.InferRequestedOutput("OUTPUT1"))
outputs.append(grpcclient.InferRequestedOutput(output_names[0]))
outputs.append(grpcclient.InferRequestedOutput(output_names[1]))

inputs[0].set_shared_memory("input0_data", default_shm_byte_size)

Expand All @@ -1414,9 +1430,9 @@ def shm_basic_infer(

try:
results = triton_client.infer(
"simple", inputs, model_version="", outputs=outputs
model_name, inputs, model_version="", outputs=outputs
)
output = results.get_output("OUTPUT0")
output = results.get_output(output_names[0])
if protocol == "http":
output_datatype = output["datatype"]
output_shape = output["shape"]
Expand All @@ -1427,11 +1443,16 @@ def shm_basic_infer(

if use_system_shared_memory:
output_data = shm.get_contents_as_numpy(
shm_op0_handle, output_dtype, output_shape
shm_op0_handle,
output_dtype,
output_shape,
offset=register_offset + shm_output_offset,
)
elif use_cuda_shared_memory:
output_data = cudashm.get_contents_as_numpy(
shm_op0_handle, output_dtype, output_shape
shm_op0_handle,
output_dtype,
output_shape,
)

tester.assertTrue(
Expand Down
7 changes: 1 addition & 6 deletions src/shared_memory_manager.cc
Original file line number Diff line number Diff line change
Expand Up @@ -524,12 +524,7 @@ SharedMemoryManager::GetMemoryInfo(
*shm_info = std::static_pointer_cast<const SharedMemoryInfo>(it->second);
}

if (it->second->kind_ == TRITONSERVER_MEMORY_CPU) {
*shm_mapped_addr = (void*)((uint8_t*)it->second->mapped_addr_ +
it->second->offset_ + offset);
} else {
*shm_mapped_addr = (void*)((uint8_t*)it->second->mapped_addr_ + offset);
}
*shm_mapped_addr = (void*)((uint8_t*)it->second->mapped_addr_ + offset);

*memory_type = it->second->kind_;
*device_id = it->second->device_id_;
Expand Down
Loading