diff --git a/examples/cpp/meson.build b/examples/cpp/meson.build index 89e5ced60..66b7ff81a 100644 --- a/examples/cpp/meson.build +++ b/examples/cpp/meson.build @@ -15,7 +15,7 @@ nixl_example = executable('nixl_example', 'nixl_example.cpp', - dependencies: [nixl_dep, nixl_infra, nixl_common_deps], + dependencies: [nixl_dep, nixl_infra, nixl_common_deps, nixl_test_utils_dep], include_directories: [nixl_inc_dirs, utils_inc_dirs], link_with: [serdes_lib], install: true) @@ -23,7 +23,7 @@ nixl_example = executable('nixl_example', if etcd_dep.found() etcd_example = executable('nixl_etcd_example', 'nixl_etcd_example.cpp', - dependencies: [nixl_dep, nixl_infra, nixl_common_deps], + dependencies: [nixl_dep, nixl_infra, nixl_common_deps, nixl_test_utils_dep], include_directories: [nixl_inc_dirs, utils_inc_dirs], link_with: [serdes_lib], install: true) diff --git a/examples/cpp/nixl_etcd_example.cpp b/examples/cpp/nixl_etcd_example.cpp index 1403a30c7..97ac4f6bd 100644 --- a/examples/cpp/nixl_etcd_example.cpp +++ b/examples/cpp/nixl_etcd_example.cpp @@ -15,12 +15,13 @@ * limitations under the License. */ #include -#include #include #include #include #include "nixl.h" +#include "test_utils.h" + // Change these values to match your etcd setup const std::string ETCD_ENDPOINT = "http://localhost:2379"; @@ -130,7 +131,7 @@ int main() { std::vector plugins; ret1 = A1.getAvailPlugins(plugins); - assert (ret1 == NIXL_SUCCESS); + nixl_exit_on_failure(ret1, "Failed to get available plugins", AGENT1_NAME); std::cout << "Available plugins:\n"; @@ -140,8 +141,8 @@ int main() { ret1 = A1.getPluginParams("UCX", mems1, init1); ret2 = A2.getPluginParams("UCX", mems2, init2); - assert (ret1 == NIXL_SUCCESS); - assert (ret2 == NIXL_SUCCESS); + nixl_exit_on_failure(ret1, "Failed to get plugin params for UCX", AGENT1_NAME); + nixl_exit_on_failure(ret2, "Failed to get plugin params for UCX", AGENT2_NAME); std::cout << "Params before init:\n"; printParams(init1, mems1); @@ -151,15 +152,15 @@ int main() { nixlBackendH* ucx1, *ucx2; ret1 = A1.createBackend("UCX", init1, ucx1); ret2 = A2.createBackend("UCX", init2, ucx2); - - assert (ret1 == NIXL_SUCCESS); - assert (ret2 == NIXL_SUCCESS); + nixl_exit_on_failure(ret1, "Failed to create UCX backend", AGENT1_NAME); + nixl_exit_on_failure(ret2, "Failed to create UCX backend", AGENT2_NAME); ret1 = A1.getBackendParams(ucx1, mems1, init1); ret2 = A2.getBackendParams(ucx2, mems2, init2); - assert (ret1 == NIXL_SUCCESS); - assert (ret2 == NIXL_SUCCESS); + nixl_exit_on_failure(ret1, "Failed to get UCX backend params", AGENT1_NAME); + nixl_exit_on_failure(ret2, "Failed to get UCX backend params", AGENT2_NAME); + std::cout << "Params after init:\n"; printParams(init1, mems1); @@ -167,9 +168,9 @@ int main() { // Register memory with both agents status = registerMemory(&addr1, &A1, &dlist1, &extra_params1, ucx1, 0xaa); - assert(status == NIXL_SUCCESS); + nixl_exit_on_failure(status, "Failed to register memory", AGENT1_NAME); status = registerMemory(&addr2, &A2, &dlist2, &extra_params2, ucx2, 0xbb); - assert(status == NIXL_SUCCESS); + nixl_exit_on_failure(status, "Failed to register memory", AGENT2_NAME); std::cout << "\nEtcd Metadata Exchange Demo\n"; std::cout << "==========================\n"; @@ -179,10 +180,10 @@ int main() { // Both agents send their metadata to etcd status = A1.sendLocalMD(); - assert(status == NIXL_SUCCESS); + nixl_exit_on_failure(status, "Failed to send local MD", AGENT1_NAME); status = A2.sendLocalMD(); - assert(status == NIXL_SUCCESS); + nixl_exit_on_failure(status, "Failed to send local MD", AGENT2_NAME); // Give etcd time to process std::this_thread::sleep_for(std::chrono::seconds(1)); @@ -192,11 +193,11 @@ int main() { // Agent1 fetches metadata for Agent2 status = A1.fetchRemoteMD(AGENT2_NAME); - assert(status == NIXL_SUCCESS); + nixl_exit_on_failure(status, "Failed to fetch remote MD", AGENT1_NAME); // Agent2 fetches metadata for Agent1 status = A2.fetchRemoteMD(AGENT1_NAME); - assert(status == NIXL_SUCCESS); + nixl_exit_on_failure(status, "Failed to fetch remote MD", AGENT2_NAME); // Do transfer from Agent 1 to Agent 2 size_t req_size = 8; @@ -229,8 +230,10 @@ int main() { extra_params1.hasNotif = true; ret1 = A1.createXferReq(NIXL_WRITE, req_src_descs, req_dst_descs, AGENT2_NAME, req_handle, &extra_params1); std::cout << "Xfer request created, status: " << nixlEnumStrings::statusStr(ret1) << std::endl; + nixl_exit_on_failure(ret1, "Failed to create Xfer Req", AGENT1_NAME); status = A1.postXferReq(req_handle); + nixl_exit_on_failure((status >= NIXL_SUCCESS), "Failed to post Xfer Req", AGENT1_NAME); std::cout << "Transfer was posted\n"; @@ -240,20 +243,20 @@ int main() { while (status != NIXL_SUCCESS || n_notifs == 0) { if (status != NIXL_SUCCESS) status = A1.getXferStatus(req_handle); if (n_notifs == 0) ret2 = A2.getNotifs(notif_map); - assert (status >= 0); - assert (ret2 == NIXL_SUCCESS); + nixl_exit_on_failure((status >= NIXL_SUCCESS), "Failed to get Xfer status", AGENT1_NAME); + nixl_exit_on_failure(ret2, "Failed to get notifs", AGENT2_NAME); n_notifs = notif_map.size(); } std::cout << "Transfer verified\n"; ret1 = A1.releaseXferReq(req_handle); - assert (ret1 == NIXL_SUCCESS); + nixl_exit_on_failure(ret1, "Failed to release Xfer Req", AGENT1_NAME); ret1 = A1.deregisterMem(dlist1, &extra_params1); ret2 = A2.deregisterMem(dlist2, &extra_params2); - assert (ret1 == NIXL_SUCCESS); - assert (ret2 == NIXL_SUCCESS); + nixl_exit_on_failure(ret1, "Failed to deregister memory", AGENT1_NAME); + nixl_exit_on_failure(ret2, "Failed to deregister memory", AGENT2_NAME); // 3. Partial Metadata Exchange std::cout << "\n3. Sending partial metadata to etcd...\n"; @@ -274,36 +277,36 @@ int main() { // Send partial metadata status = A1.sendLocalPartialMD(empty_dlist1, &conn_params1); - assert(status == NIXL_SUCCESS); + nixl_exit_on_failure(status, "Failed to send local partial MD", AGENT1_NAME); status = A2.sendLocalPartialMD(empty_dlist2, &conn_params2); - assert(status == NIXL_SUCCESS); + nixl_exit_on_failure(status, "Failed to send local partial MD", AGENT2_NAME); // Send once partial with different label conn_params1.metadataLabel = PARTIAL_LABEL_2; status = A1.sendLocalPartialMD(empty_dlist1, &conn_params1); - assert(status == NIXL_SUCCESS); + nixl_exit_on_failure(status, "Failed to send local partial MD", AGENT1_NAME); conn_params2.metadataLabel = PARTIAL_LABEL_2; status = A2.sendLocalPartialMD(empty_dlist2, &conn_params2); - assert(status == NIXL_SUCCESS); + nixl_exit_on_failure(status, "Failed to send local partial MD", AGENT2_NAME); nixl_opt_args_t fetch_params; fetch_params.metadataLabel = PARTIAL_LABEL_1; status = A1.fetchRemoteMD(AGENT2_NAME, &fetch_params); - assert(status == NIXL_SUCCESS); + nixl_exit_on_failure(status, "Failed to fetch remote MD", AGENT1_NAME); status = A2.fetchRemoteMD(AGENT1_NAME, &fetch_params); - assert(status == NIXL_SUCCESS); + nixl_exit_on_failure(status, "Failed to fetch remote MD", AGENT2_NAME); std::this_thread::sleep_for(std::chrono::seconds(1)); // 4. Invalidate Metadata std::cout << "\n4. Invalidating metadata in etcd...\n"; - // Invalidate agent1's metadata + // Invalidate AGENT1_NAME's metadata status = A1.invalidateLocalMD(); - assert(status == NIXL_SUCCESS); + nixl_exit_on_failure(status, "Failed to invalidate local MD", AGENT1_NAME); std::this_thread::sleep_for(std::chrono::seconds(1)); @@ -316,14 +319,14 @@ int main() { // Try invalidating again, this should log a debug message std::cout << "Trying to invalidate again...\n"; status = A1.invalidateLocalMD(); - assert(status == NIXL_SUCCESS); + nixl_exit_on_failure(status, "Failed to invalidate local MD", AGENT1_NAME); std::this_thread::sleep_for(std::chrono::seconds(1)); // 5. Fetch metadata with invalid label. This should not block forever and print error message. std::cout << "\n5. Fetching metadata with invalid label...\n"; status = A2.fetchRemoteMD("INVALID_AGENT", &fetch_params); - assert(status == NIXL_SUCCESS); + nixl_exit_on_failure(status, "Failed to fetch remote MD", AGENT2_NAME); std::this_thread::sleep_for(std::chrono::seconds(1)); diff --git a/examples/cpp/nixl_example.cpp b/examples/cpp/nixl_example.cpp index e7eaf1073..03e8b8833 100644 --- a/examples/cpp/nixl_example.cpp +++ b/examples/cpp/nixl_example.cpp @@ -21,6 +21,8 @@ #include #include "nixl.h" +#include "test_utils.h" + std::string agent1("Agent001"); std::string agent2("Agent002"); @@ -29,7 +31,7 @@ void check_buf(void* buf, size_t len) { // Do some checks on the data. for(size_t i = 0; i plugins; ret1 = A1.getAvailPlugins(plugins); - assert (ret1 == NIXL_SUCCESS); + nixl_exit_on_failure(ret1, "Failed to get available plugins", agent1); std::cout << "Available plugins:\n"; @@ -100,8 +102,8 @@ main(int argc, char **argv) { ret1 = A1.getPluginParams(backend, mems1, init1); ret2 = A2.getPluginParams(backend, mems2, init2); - assert (ret1 == NIXL_SUCCESS); - assert (ret2 == NIXL_SUCCESS); + nixl_exit_on_failure(ret1, "Failed to get plugin params", agent1); + nixl_exit_on_failure(ret2, "Failed to get plugin params", agent2); std::cout << "Params before init:\n"; printParams(init1, mems1); @@ -115,14 +117,14 @@ main(int argc, char **argv) { extra_params1.backends.push_back(bknd1); extra_params2.backends.push_back(bknd2); - assert (ret1 == NIXL_SUCCESS); - assert (ret2 == NIXL_SUCCESS); + nixl_exit_on_failure(ret1, "Failed to create " + backend + " backend", agent1); + nixl_exit_on_failure(ret2, "Failed to create " + backend + " backend", agent2); ret1 = A1.getBackendParams(bknd1, mems1, init1); ret2 = A2.getBackendParams(bknd2, mems2, init2); - assert (ret1 == NIXL_SUCCESS); - assert (ret2 == NIXL_SUCCESS); + nixl_exit_on_failure(ret1, "Failed to get " + backend + " backend params", agent1); + nixl_exit_on_failure(ret2, "Failed to get " + backend + " backend params", agent2); std::cout << "Params after init:\n"; printParams(init1, mems1); @@ -161,25 +163,22 @@ main(int argc, char **argv) { ret1 = A1.registerMem(dlist1, &extra_params1); ret2 = A2.registerMem(dlist2, &extra_params2); - - assert (ret1 == NIXL_SUCCESS); - assert (ret2 == NIXL_SUCCESS); + nixl_exit_on_failure(ret1, "Failed to register memory", agent1); + nixl_exit_on_failure(ret2, "Failed to register memory", agent2); std::string meta1; ret1 = A1.getLocalMD(meta1); std::string meta2; ret2 = A2.getLocalMD(meta2); - - assert (ret1 == NIXL_SUCCESS); - assert (ret2 == NIXL_SUCCESS); + nixl_exit_on_failure(ret1, "Failed to get local MD", agent1); + nixl_exit_on_failure(ret2, "Failed to get local MD", agent2); std::cout << "Agent1's Metadata: " << meta1 << "\n"; std::cout << "Agent2's Metadata: " << meta2 << "\n"; ret1 = A1.loadRemoteMD (meta2, ret_s1); - assert (ret1 == NIXL_SUCCESS); - assert (ret2 == NIXL_SUCCESS); + nixl_exit_on_failure(ret1, "Failed to load remote MD", agent1); size_t req_size = 8; size_t dst_offset = 8; @@ -204,9 +203,10 @@ main(int argc, char **argv) { extra_params1.notifMsg = "notification"; extra_params1.hasNotif = true; ret1 = A1.createXferReq(NIXL_WRITE, req_src_descs, req_dst_descs, agent2, req_handle, &extra_params1); - assert (ret1 == NIXL_SUCCESS); + nixl_exit_on_failure(ret1, "Failed to create Xfer Req", agent1); nixl_status_t status = A1.postXferReq(req_handle); + nixl_exit_on_failure((status >= NIXL_SUCCESS), "Failed to post Xfer Req", agent1); std::cout << "Transfer was posted\n"; @@ -216,14 +216,16 @@ main(int argc, char **argv) { while (status != NIXL_SUCCESS || n_notifs == 0) { if (status != NIXL_SUCCESS) status = A1.getXferStatus(req_handle); if (n_notifs == 0) ret2 = A2.getNotifs(notif_map); - assert (status >= 0); - assert (ret2 == NIXL_SUCCESS); + nixl_exit_on_failure((status >= NIXL_SUCCESS), "Failed to post Xfer Req", agent1); + nixl_exit_on_failure(ret2, "Failed to get notifs", agent2); n_notifs = notif_map.size(); } std::vector agent1_notifs = notif_map[agent1]; - assert (agent1_notifs.size() == 1); - assert (agent1_notifs.front() == "notification"); + nixl_exit_on_failure((agent1_notifs.size() == 1), "Incorrect notif size", agent1); + nixl_exit_on_failure( + (agent1_notifs.front() == "notification"), "Incorrect notification", agent1); + notif_map[agent1].clear(); // Redundant, for testing notif_map.clear(); n_notifs = 0; @@ -231,16 +233,16 @@ main(int argc, char **argv) { std::cout << "Transfer verified\n"; ret1 = A1.releaseXferReq(req_handle); - assert (ret1 == NIXL_SUCCESS); + nixl_exit_on_failure(ret1, "Failed to release Xfer Req", agent1); ret1 = A1.deregisterMem(dlist1, &extra_params1); ret2 = A2.deregisterMem(dlist2, &extra_params2); - assert (ret1 == NIXL_SUCCESS); - assert (ret2 == NIXL_SUCCESS); + nixl_exit_on_failure(ret1, "Failed to deregister memory", agent1); + nixl_exit_on_failure(ret2, "Failed to deregister memory", agent2); //only initiator should call invalidate ret1 = A1.invalidateRemoteMD(agent2); - assert (ret1 == NIXL_SUCCESS); + nixl_exit_on_failure(ret1, "Failed to invalidate remote MD", agent1); free(addr1); free(addr2); diff --git a/meson.build b/meson.build index 681be08a9..c2136bb1e 100644 --- a/meson.build +++ b/meson.build @@ -235,10 +235,11 @@ plugins_inc_dirs = include_directories('src/plugins') utils_inc_dirs = include_directories('src/utils') subdir('src') - -if get_option('buildtype') != 'release' - subdir('test') - subdir('examples') +if get_option('build_tests') + subdir('test') +endif +if get_option('build_examples') + subdir('examples') endif if get_option('install_headers') diff --git a/meson_options.txt b/meson_options.txt index a316184f8..fc28c08f2 100644 --- a/meson_options.txt +++ b/meson_options.txt @@ -30,4 +30,6 @@ option('log_level', type: 'combo', choices: ['trace', 'debug', 'info', 'warning' option('rust', type: 'boolean', value: false, description: 'Build Rust bindings') # Tests +option('build_tests', type: 'boolean', value: true, description: 'Build all tests') +option('build_examples', type: 'boolean', value: true, description: 'Build all examples') option('test_all_plugins', type: 'boolean', value: false, description: 'Testing all plugins in addition to the mocks..') diff --git a/src/infra/meson.build b/src/infra/meson.build index 6a2b4352c..ec33e6959 100644 --- a/src/infra/meson.build +++ b/src/infra/meson.build @@ -21,3 +21,6 @@ nixl_build_lib = library('nixl_build', install: true) nixl_infra = declare_dependency(link_with: nixl_build_lib) + +# Test utilities library that can depend on nixl_dep (created after nixl_dep is defined) +# This will be defined in a separate meson file to avoid circular dependencies diff --git a/src/infra/test_utils.cpp b/src/infra/test_utils.cpp new file mode 100644 index 000000000..f0b014f83 --- /dev/null +++ b/src/infra/test_utils.cpp @@ -0,0 +1,37 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "test_utils.h" +#include "nixl_types.h" +#include "common/nixl_log.h" +#include + +void +nixl_exit_on_failure(nixl_status_t status, std::string_view message, std::string_view agent) { + if (status == NIXL_SUCCESS) return; + + NIXL_ERROR << message << (agent.empty() ? "" : " for agent " + std::string{agent}) << ": " + << nixlEnumStrings::statusStr(status) << " [" << status << "]"; + exit(EXIT_FAILURE); +} + +void +nixl_exit_on_failure(bool condition, std::string_view message, std::string_view agent) { + if (condition) return; + + NIXL_ERROR << message << (agent.empty() ? "" : " for agent " + std::string{agent}); + exit(EXIT_FAILURE); +} diff --git a/src/infra/test_utils.h b/src/infra/test_utils.h new file mode 100644 index 000000000..92e77175c --- /dev/null +++ b/src/infra/test_utils.h @@ -0,0 +1,49 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef NIXL_TEST_UTILS_H +#define NIXL_TEST_UTILS_H + +#include +#include "nixl_types.h" + +/** + * @brief Exit on failure utility functions for tests and examples + * + * These functions provide a convenient way to check conditions and exit + * with appropriate error messages if they fail. They are designed for + * use in tests and examples where immediate termination on error is desired. + */ + +/** + * @brief Exit if nixl_status_t indicates failure + * @param status The nixl status to check + * @param message Error message to display + * @param agent Optional agent name for context + */ +void +nixl_exit_on_failure(nixl_status_t status, std::string_view message, std::string_view agent = {}); + +/** + * @brief Exit if boolean condition is false + * @param condition The condition to check (exits if false) + * @param message Error message to display + * @param agent Optional agent name for context + */ +void +nixl_exit_on_failure(bool condition, std::string_view message, std::string_view agent = {}); + +#endif /* NIXL_TEST_UTILS_H */ diff --git a/src/meson.build b/src/meson.build index 50a6fddc8..9cc396543 100644 --- a/src/meson.build +++ b/src/meson.build @@ -21,4 +21,18 @@ subdir('utils') subdir('infra') subdir('plugins') subdir('core') + +# Test utilities library - created after nixl_dep is available to avoid circular dependencies +nixl_test_utils_lib = library('nixl_test_utils', + 'infra/test_utils.cpp', + include_directories: [ nixl_inc_dirs, utils_inc_dirs ], + dependencies: [nixl_dep, nixl_common_dep, absl_log_dep], + install: true) + +nixl_test_utils_dep = declare_dependency( + include_directories: include_directories('infra'), + link_with: nixl_test_utils_lib, + dependencies: [nixl_dep, nixl_common_dep] +) + subdir('bindings') diff --git a/test/nixl/agent_example.cpp b/test/nixl/agent_example.cpp index 5c4c0d206..f02450533 100644 --- a/test/nixl/agent_example.cpp +++ b/test/nixl/agent_example.cpp @@ -15,12 +15,12 @@ * limitations under the License. */ #include -#include #include #include #include "nixl.h" +#include "test_utils.h" std::string agent1("Agent001"); std::string agent2("Agent002"); @@ -29,7 +29,7 @@ void check_buf(void* buf, size_t len) { // Do some checks on the data. for(size_t i = 0; iregisterMem(mem_list1, &extra_params1); - assert (status == NIXL_SUCCESS); - + nixl_exit_on_failure(status, "Failed to register memory", agent1); status = A2->registerMem(mem_list2, &extra_params2); - assert (status == NIXL_SUCCESS); + nixl_exit_on_failure(status, "Failed to register memory", agent2); std::string meta2; status = A2->getLocalMD(meta2); - assert (status == NIXL_SUCCESS); - assert (meta2.size() > 0); + nixl_exit_on_failure(status, "Failed to get local MD", agent2); + nixl_exit_on_failure((meta2.size() > 0), "Incorrect local MD", agent2); + std::string remote_name; status = A1->loadRemoteMD(meta2, remote_name); - assert (status == NIXL_SUCCESS); - assert (remote_name == agent2); + + nixl_exit_on_failure(status, "Failed to local remote MD", agent1); + nixl_exit_on_failure((remote_name == agent2), "Incorrect remote MD received", agent1); std::cout << "perf setup done\n"; @@ -109,10 +116,9 @@ void test_side_perf(nixlAgent* A1, nixlAgent* A2, nixlBackendH* backend, nixlBac for(int i = 0; iprepXferDlist(agent2, dst_list, dst_side[i], &extra_params1); - assert (status == NIXL_SUCCESS); - + nixl_exit_on_failure(status, "Failed to prep Xfer Dlist for dest", agent1); status = A1->prepXferDlist(NIXL_INIT_AGENT, src_list, src_side[i], &extra_params1); - assert (status == NIXL_SUCCESS); + nixl_exit_on_failure(status, "Failed to pre Xfer Dlist for src", agent1); } gettimeofday(&end_time, NULL); @@ -137,7 +143,7 @@ void test_side_perf(nixlAgent* A1, nixlAgent* A2, nixlBackendH* backend, nixlBac extra_params1.notifMsg = "test"; extra_params1.hasNotif = true; status = A1->makeXferReq(NIXL_WRITE, src_side[0], indices, dst_side[0], indices, reqh1, &extra_params1); - assert (status == NIXL_SUCCESS); + nixl_exit_on_failure(status, "Failed to make Xfer Req", agent1); indices.clear(); for(int i = 0; i<(n_mems*descs_per_mem); i+=2) @@ -145,24 +151,24 @@ void test_side_perf(nixlAgent* A1, nixlAgent* A2, nixlBackendH* backend, nixlBac //should print (n_mems*descs_per_mem/2) number of final descriptors status = A1->makeXferReq(NIXL_WRITE, src_side[0], indices, dst_side[0], indices, reqh2, &extra_params1); - assert (status == NIXL_SUCCESS); + nixl_exit_on_failure(status, "Failed to make Xfer Req", agent1); status = A1->releaseXferReq(reqh1); - assert (status == NIXL_SUCCESS); + nixl_exit_on_failure(status, "Failed to release Xfer Req", agent1); status = A1->releaseXferReq(reqh2); - assert (status == NIXL_SUCCESS); + nixl_exit_on_failure(status, "Failed to release Xfer Req2", agent1); // Commented out to test auto deregistration // status = A1->deregisterMem(mem_list1, &extra_params1); // assert (status == NIXL_SUCCESS); status = A2->deregisterMem(mem_list2, &extra_params2); - assert (status == NIXL_SUCCESS); + nixl_exit_on_failure(status, "Failed to deregister memory", agent2); for(int i = 0; ireleasedDlistH(src_side[i]); - assert (status == NIXL_SUCCESS); + nixl_exit_on_failure(status, "Failed to release src Dlist handle", agent1); status = A1->releasedDlistH(dst_side[i]); - assert (status == NIXL_SUCCESS); + nixl_exit_on_failure(status, "Failed to release dst Dlist handle", agent1); } free(src_buf); @@ -209,10 +215,9 @@ nixl_status_t partialMdTest(nixlAgent* A1, nixlAgent* A2, nixlBackendH* backend1 // Register memory for each update for (int update = 0; update < NUM_UPDATES; update++) { status = A1->registerMem(src_mem_lists[update], &extra_params1); - assert(status == NIXL_SUCCESS); - + nixl_exit_on_failure(status, "Failed to register memory", agent1); status = A2->registerMem(dst_mem_lists[update], &extra_params2); - assert(status == NIXL_SUCCESS); + nixl_exit_on_failure(status, "Failed to register memory", agent2); } // Test metadata update with only backends and empty descriptor list @@ -220,30 +225,32 @@ nixl_status_t partialMdTest(nixlAgent* A1, nixlAgent* A2, nixlBackendH* backend1 // Agent2 might have already been previously loaded. // Invalidate it just in case but don't care either way. - A1->invalidateRemoteMD(agent2); + status = A1->invalidateRemoteMD(agent2); + nixl_exit_on_failure(status, "Failed to invalidate remote MD", agent1); nixl_reg_dlist_t empty_dlist(DRAM_SEG); std::string partial_meta; status = A2->getLocalPartialMD(empty_dlist, partial_meta, NULL); - assert(status == NIXL_SUCCESS); - assert(partial_meta.size() > 0); + nixl_exit_on_failure(status, "Failed to get local partial MD", agent2); + nixl_exit_on_failure((partial_meta.size() > 0), "Incorrect local partial MD", agent2); std::string remote_name; status = A1->loadRemoteMD(partial_meta, remote_name); - assert(status == NIXL_SUCCESS); - assert(remote_name == agent2); + nixl_exit_on_failure(status, "Failed to get load remote MD", agent1); + nixl_exit_on_failure((remote_name == agent2), "Incorrect remote MD", agent1); // Make sure unregistered descriptors are not updated for (int update = 0; update < NUM_UPDATES; update++) { nixlDlistH *dst_side; status = A1->prepXferDlist(agent2, dst_mem_lists[update].trim(), dst_side, &extra_params1); - assert(status != NIXL_SUCCESS); - assert(dst_side == nullptr); + nixl_exit_on_failure( + (status != NIXL_SUCCESS), "Prep xfer dlist should not be successful", agent1); + nixl_exit_on_failure((dst_side == nullptr), "Dst side is not null", agent1); } // Invalidate remote agent metadata to make sure we received connection info status = A1->invalidateRemoteMD(agent2); - assert(status == NIXL_SUCCESS); + nixl_exit_on_failure(status, "Failed to get invalidate remote MD", agent1); std::cout << "Metadata update - backends only completed\n"; // Main test loop - update metadata multiple times @@ -256,26 +263,27 @@ nixl_status_t partialMdTest(nixlAgent* A1, nixlAgent* A2, nixlBackendH* backend1 std::cout << "Metadata update #" << update << "\n"; // Get partial metadata from A2 status = A2->getLocalPartialMD(dst_mem_lists[update], partial_meta, &extra_params2); - assert(status == NIXL_SUCCESS); - assert(partial_meta.size() > 0); + nixl_exit_on_failure(status, "Failed to get local partial MD", agent2); + nixl_exit_on_failure((partial_meta.size() > 0), "Incorrect local partial MD", agent2); // Load the partial metadata into A1 std::string remote_name; status = A1->loadRemoteMD(partial_meta, remote_name); - assert(status == NIXL_SUCCESS); - assert(remote_name == agent2); + nixl_exit_on_failure(status, "Failed to load remote MD", agent1); + nixl_exit_on_failure((remote_name == agent2), "Incorrect remote MD", agent1); // Make sure loaded descriptors are updated nixlDlistH *dst_side; status = A1->prepXferDlist(agent2, dst_mem_lists[update].trim(), dst_side, &extra_params1); - assert(status == NIXL_SUCCESS); - assert(dst_side != nullptr); + nixl_exit_on_failure(status, "Failed to prep xfer dlist", agent1); + nixl_exit_on_failure((dst_side != nullptr), "Dst side is null", agent1); // Make sure not-loaded descriptors are not updated for (int invalid_idx = update + 1; invalid_idx < NUM_UPDATES; invalid_idx++) { status = A1->prepXferDlist(agent2, dst_mem_lists[invalid_idx].trim(), dst_side, &extra_params1); - assert(status != NIXL_SUCCESS); - assert(dst_side == nullptr); + nixl_exit_on_failure( + (status != NIXL_SUCCESS), "Prep xfer dlist should not be successful", agent1); + nixl_exit_on_failure((dst_side == nullptr), "Dst side is not null", agent1); } std::cout << "Metadata update #" << update << " completed\n"; } @@ -297,10 +305,10 @@ nixl_status_t partialMdTest(nixlAgent* A1, nixlAgent* A2, nixlBackendH* backend1 nixlDlistH *src_side, *dst_side; status = A1->prepXferDlist(NIXL_INIT_AGENT, src_xfer_list, src_side, &extra_params1); - assert(status == NIXL_SUCCESS); + nixl_exit_on_failure(status, "Failed to prep xfer dlist", agent1); status = A1->prepXferDlist(agent2, dst_xfer_list, dst_side, &extra_params1); - assert(status == NIXL_SUCCESS); + nixl_exit_on_failure(status, "Failed to prep xfer dlist", agent1); std::cout << "Transfer preparation completed\n"; @@ -316,14 +324,13 @@ nixl_status_t partialMdTest(nixlAgent* A1, nixlAgent* A2, nixlBackendH* backend1 // Create and post the transfer request status = A1->makeXferReq(NIXL_WRITE, src_side, indices, dst_side, indices, req, &extra_params1); - assert(status == NIXL_SUCCESS); - + nixl_exit_on_failure(status, "Failed to make xfer req", agent1); nixl_status_t xfer_status = A1->postXferReq(req); // Wait for transfer completion while (xfer_status != NIXL_SUCCESS) { if (xfer_status != NIXL_SUCCESS) xfer_status = A1->getXferStatus(req); - assert (xfer_status >= 0); + nixl_exit_on_failure((xfer_status >= 0), "Failed to get xfer status", agent1); } // Verify transfer results @@ -337,21 +344,17 @@ nixl_status_t partialMdTest(nixlAgent* A1, nixlAgent* A2, nixlBackendH* backend1 // Cleanup status = A1->releaseXferReq(req); - assert(status == NIXL_SUCCESS); - + nixl_exit_on_failure(status, "Failed to release xfer req", agent1); status = A1->releasedDlistH(src_side); - assert(status == NIXL_SUCCESS); - + nixl_exit_on_failure(status, "Failed to release xfer dlist", agent1); status = A1->releasedDlistH(dst_side); - assert(status == NIXL_SUCCESS); - + nixl_exit_on_failure(status, "Failed to release xfer dlist", agent1); // Deregister memory for (int update = 0; update < NUM_UPDATES; update++) { status = A1->deregisterMem(src_mem_lists[update], &extra_params1); - assert(status == NIXL_SUCCESS); - + nixl_exit_on_failure(status, "Failed to deregister memory", agent1); status = A2->deregisterMem(dst_mem_lists[update], &extra_params2); - assert(status == NIXL_SUCCESS); + nixl_exit_on_failure(status, "Failed to deregister memory", agent2); } // Free allocated memory @@ -376,8 +379,8 @@ nixl_status_t sideXferTest(nixlAgent* A1, nixlAgent* A2, nixlXferReqH* src_handl extra_params1.backends.push_back(src_backend); extra_params2.backends.push_back(dst_backend); - assert (status == NIXL_SUCCESS); - assert (src_backend); + nixl_exit_on_failure(status, "Failed to query xfer backend", agent1); + nixl_exit_on_failure((src_backend != nullptr), "Incorrect src backend handle", agent1); std::cout << "Got backend\n"; @@ -412,30 +415,26 @@ nixl_status_t sideXferTest(nixlAgent* A1, nixlAgent* A2, nixlXferReqH* src_handl dst_list = mem_list2.trim(); status = A1->registerMem(mem_list1, &extra_params1); - assert (status == NIXL_SUCCESS); - + nixl_exit_on_failure(status, "Failed to register memory", agent1); status = A2->registerMem(mem_list2, &extra_params2); - assert (status == NIXL_SUCCESS); - + nixl_exit_on_failure(status, "Failed to register memory", agent2); std::string meta2; status = A2->getLocalMD(meta2); - assert (status == NIXL_SUCCESS); - assert (meta2.size() > 0); - + nixl_exit_on_failure(status, "Failed to get local MD", agent2); + nixl_exit_on_failure((meta2.size() > 0), "Incorrect local MD", agent2); std::string remote_name; status = A1->loadRemoteMD(meta2, remote_name); - assert (status == NIXL_SUCCESS); - assert (remote_name == agent2); - + nixl_exit_on_failure(status, "Failed to load remote MD", agent1); + nixl_exit_on_failure((remote_name == agent2), "Incorrect remote MD", agent1); std::cout << "Ready to prepare side\n"; nixlDlistH *src_side, *dst_side; status = A1->prepXferDlist(NIXL_INIT_AGENT, src_list, src_side, &extra_params1); - assert (status == NIXL_SUCCESS); + nixl_exit_on_failure(status, "Failed to prep xfer dlist", agent1); status = A1->prepXferDlist(remote_name, dst_list, dst_side, &extra_params1); - assert (status == NIXL_SUCCESS); + nixl_exit_on_failure(status, "Failed to prep xfer dlist", agent1); std::cout << "prep done, starting transfers\n"; @@ -453,13 +452,14 @@ nixl_status_t sideXferTest(nixlAgent* A1, nixlAgent* A2, nixlXferReqH* src_handl //write first half of src_bufs to dst_bufs status = A1->makeXferReq(NIXL_WRITE, src_side, indices1, dst_side, indices1, req1, &extra_params1); - assert (status == NIXL_SUCCESS); - + nixl_exit_on_failure(status, "Failed to make xfer req", agent1); nixl_status_t xfer_status = A1->postXferReq(req1); while (xfer_status != NIXL_SUCCESS) { if (xfer_status != NIXL_SUCCESS) xfer_status = A1->getXferStatus(req1); - assert (xfer_status >= 0); + nixl_exit_on_failure((xfer_status == NIXL_SUCCESS || xfer_status == NIXL_IN_PROG), + "Failed to get xfer status", + agent1); } for(int i = 0; i<(n_bufs/2); i++) @@ -469,13 +469,14 @@ nixl_status_t sideXferTest(nixlAgent* A1, nixlAgent* A2, nixlXferReqH* src_handl //read first half of dst_bufs back to second half of src_bufs status = A1->makeXferReq(NIXL_READ, src_side, indices2, dst_side, indices1, req2, &extra_params1); - assert (status == NIXL_SUCCESS); - + nixl_exit_on_failure(status, "Failed to make xfer req", agent1); xfer_status = A1->postXferReq(req2); while (xfer_status != NIXL_SUCCESS) { if (xfer_status != NIXL_SUCCESS) xfer_status = A1->getXferStatus(req2); - assert (xfer_status >= 0); + nixl_exit_on_failure((xfer_status == NIXL_SUCCESS || xfer_status == NIXL_IN_PROG), + "Failed to get xfer status", + agent1); } for(int i = (n_bufs/2); imakeXferReq(NIXL_WRITE, src_side, indices2, dst_side, indices2, req3, &extra_params1); - assert (status == NIXL_SUCCESS); - + nixl_exit_on_failure(status, "Failed to make xfer req", agent1); xfer_status = A1->postXferReq(req3); while (xfer_status != NIXL_SUCCESS) { if (xfer_status != NIXL_SUCCESS) xfer_status = A1->getXferStatus(req3); - assert (xfer_status >= 0); + nixl_exit_on_failure((xfer_status == NIXL_SUCCESS || xfer_status == NIXL_IN_PROG), + "Failed to get xfer status", + agent1); } for(int i = (n_bufs/2); ireleaseXferReq(req1); - assert (status == NIXL_SUCCESS); + nixl_exit_on_failure(status, "Failed to release xfer req", agent1); status = A1->releaseXferReq(req2); - assert (status == NIXL_SUCCESS); + nixl_exit_on_failure(status, "Failed to release xfer req2", agent1); status = A1->releaseXferReq(req3); - assert (status == NIXL_SUCCESS); + nixl_exit_on_failure(status, "Failed to release xfer req3", agent1); // Commented out to test auto deregistration // status = A1->deregisterMem(mem_list1, &extra_params1); @@ -513,9 +515,9 @@ nixl_status_t sideXferTest(nixlAgent* A1, nixlAgent* A2, nixlXferReqH* src_handl // assert (status == NIXL_SUCCESS); status = A1->releasedDlistH(src_side); - assert (status == NIXL_SUCCESS); + nixl_exit_on_failure(status, "Failed to release xfer src dlist", agent1); status = A1->releasedDlistH(dst_side); - assert (status == NIXL_SUCCESS); + nixl_exit_on_failure(status, "Failed to release xfer dst dlist", agent1); for(int i = 0; i plugins; ret1 = A1.getAvailPlugins(plugins); - assert (ret1 == NIXL_SUCCESS); + nixl_exit_on_failure(ret1, "Failed to get available plugins", agent1); std::cout << "Available plugins:\n"; for (nixl_backend_t b: plugins) std::cout << b << "\n"; - std::cout << "Using backend: " << backend << "\n"; - ret1 = A1.getPluginParams(backend, mems1, init1); - ret2 = A2.getPluginParams(backend, mems2, init2); - - assert (ret1 == NIXL_SUCCESS); - assert (ret2 == NIXL_SUCCESS); + ret1 = A1.getPluginParams("UCX", mems1, init1); + ret2 = A2.getPluginParams("UCX", mems2, init2); + nixl_exit_on_failure(ret1, "Failed to get plugin params for UCX", agent1); + nixl_exit_on_failure(ret2, "Failed to get plugin params for UCX", agent2); std::cout << "Params before init:\n"; printParams(init1, mems1); @@ -598,14 +598,13 @@ main(int argc, char **argv) { extra_params1.backends.push_back(bknd1); extra_params2.backends.push_back(bknd2); - assert (ret1 == NIXL_SUCCESS); - assert (ret2 == NIXL_SUCCESS); + nixl_exit_on_failure(ret1, "Failed to create " + backend + " backend", agent1); + nixl_exit_on_failure(ret2, "Failed to create " + backend + " backend", agent2); ret1 = A1.getBackendParams(bknd1, mems1, init1); ret2 = A2.getBackendParams(bknd2, mems2, init2); - - assert (ret1 == NIXL_SUCCESS); - assert (ret2 == NIXL_SUCCESS); + nixl_exit_on_failure(ret1, "Failed to get " + backend + " backend params", agent1); + nixl_exit_on_failure(ret2, "Failed to get " + backend + " backend params", agent2); std::cout << "Params after init:\n"; printParams(init1, mems1); @@ -615,9 +614,6 @@ main(int argc, char **argv) { // ret1 = A1->makeConnection(agent2, 0); // ret2 = A2->makeConnection(agent1, 1); - // assert (ret1 == NIXL_SUCCESS); - // assert (ret2 == NIXL_SUCCESS); - // User allocates memories, and passes the corresponding address // and length to register with the backend nixlBlobDesc buff1, buff2, buff3; @@ -650,26 +646,23 @@ main(int argc, char **argv) { ret1 = A1.registerMem(dlist1, &extra_params1); ret2 = A2.registerMem(dlist2, &extra_params2); - - assert (ret1 == NIXL_SUCCESS); - assert (ret2 == NIXL_SUCCESS); + nixl_exit_on_failure(ret1, "Failed to register memory", agent1); + nixl_exit_on_failure(ret2, "Failed to register memory", agent2); std::string meta1; ret1 = A1.getLocalMD(meta1); std::string meta2; ret2 = A2.getLocalMD(meta2); - - assert (ret1 == NIXL_SUCCESS); - assert (ret2 == NIXL_SUCCESS); + nixl_exit_on_failure(ret1, "Failed to get local MD", agent1); + nixl_exit_on_failure(ret2, "Failed to get local MD", agent2); std::cout << "Agent1's Metadata: " << meta1 << "\n"; std::cout << "Agent2's Metadata: " << meta2 << "\n"; ret1 = A1.loadRemoteMD (meta2, ret_s1); ret2 = A2.loadRemoteMD (meta1, ret_s2); - - assert (ret1 == NIXL_SUCCESS); - assert (ret2 == NIXL_SUCCESS); + nixl_exit_on_failure(ret1, "Failed to load remote MD", agent1); + nixl_exit_on_failure(ret2, "Failed to load remote MD", agent2); size_t req_size = 8; size_t dst_offset = 8; @@ -701,7 +694,7 @@ main(int argc, char **argv) { extra_params1.notifMsg = "notification"; extra_params1.hasNotif = true; ret1 = A1.createXferReq(NIXL_WRITE, req_src_descs, req_dst_descs, agent2, req_handle, &extra_params1); - assert (ret1 == NIXL_SUCCESS); + nixl_exit_on_failure(ret1, "Failed to create Xfer Req", agent1); nixl_status_t status = A1.postXferReq(req_handle); @@ -713,14 +706,17 @@ main(int argc, char **argv) { while (status != NIXL_SUCCESS || n_notifs == 0) { if (status != NIXL_SUCCESS) status = A1.getXferStatus(req_handle); if (n_notifs == 0) ret2 = A2.getNotifs(notif_map); - assert (status >= 0); - assert (ret2 == NIXL_SUCCESS); + nixl_exit_on_failure( + (status == NIXL_SUCCESS || status == NIXL_IN_PROG), "Failed to post Xfer Req", agent1); + nixl_exit_on_failure(ret2, "Failed to get notifs", agent2); n_notifs = notif_map.size(); } std::vector agent1_notifs = notif_map[agent1]; - assert (agent1_notifs.size() == 1); - assert (agent1_notifs.front() == "notification"); + + nixl_exit_on_failure((agent1_notifs.size() == 1), "Incorrect notif size", agent1); + nixl_exit_on_failure( + (agent1_notifs.front() == "notification"), "Incorrect notification", agent1); notif_map[agent1].clear(); // Redundant, for testing notif_map.clear(); n_notifs = 0; @@ -729,17 +725,17 @@ main(int argc, char **argv) { std::cout << "performing partialMdTest with backends " << bknd1 << " " << bknd2 << "\n"; ret1 = partialMdTest(&A1, &A2, bknd1, bknd2); - assert (ret1 == NIXL_SUCCESS); + nixl_exit_on_failure(ret1, "Fail to run partialMDTest", agent1); std::cout << "performing sideXferTest with backends " << bknd1 << " " << bknd2 << "\n"; ret1 = sideXferTest(&A1, &A2, req_handle, bknd2); - assert (ret1 == NIXL_SUCCESS); + nixl_exit_on_failure(ret1, "Fail to run sideXferTest", agent1); std::cout << "Performing local test\n"; extra_params1.notifMsg = "local_notif"; extra_params1.hasNotif = true; ret2 = A1.createXferReq(NIXL_WRITE, req_src_descs, req_ldst_descs, agent1, req_handle2, &extra_params1); - assert (ret2 == NIXL_SUCCESS); + nixl_exit_on_failure(ret1, "Failed to create Xfer Req", agent1); status = A1.postXferReq(req_handle2); std::cout << "Local transfer was posted\n"; @@ -747,33 +743,34 @@ main(int argc, char **argv) { while (status != NIXL_SUCCESS || n_notifs == 0) { if (status != NIXL_SUCCESS) status = A1.getXferStatus(req_handle2); if (n_notifs == 0) ret2 = A1.getNotifs(notif_map); - assert (status >= 0); - assert (ret2 == NIXL_SUCCESS); + nixl_exit_on_failure( + (status == NIXL_SUCCESS || status == NIXL_IN_PROG), "Failed to post Xfer Req", agent1); + nixl_exit_on_failure(ret2, "Failed to get notifs", agent2); n_notifs = notif_map.size(); } agent1_notifs = notif_map[agent1]; - assert (agent1_notifs.size() == 1); - assert (agent1_notifs.front() == "local_notif"); - assert (equal_buf((void*) req_src.addr, (void*) req_ldst.addr, req_size) == true); + nixl_exit_on_failure((agent1_notifs.size() == 1), "Incorrect notif size", agent1); + nixl_exit_on_failure( + (agent1_notifs.front() == "local_notif"), "Incorrect notification", agent1); + nixl_exit_on_failure((equal_buf((void *)req_src.addr, (void *)req_ldst.addr, req_size) == true), + "Buffer mismatch after transfer", + agent1); ret1 = A1.releaseXferReq(req_handle); ret2 = A1.releaseXferReq(req_handle2); - - assert (ret1 == NIXL_SUCCESS); - assert (ret2 == NIXL_SUCCESS); + nixl_exit_on_failure(ret1, "Failed to release Xfer Req", agent1); + nixl_exit_on_failure(ret2, "Failed to release Xfer Req2", agent1); ret1 = A1.deregisterMem(dlist1, &extra_params1); ret2 = A2.deregisterMem(dlist2, &extra_params2); - - assert (ret1 == NIXL_SUCCESS); - assert (ret2 == NIXL_SUCCESS); + nixl_exit_on_failure(ret1, "Failed to deregister memory", agent1); + nixl_exit_on_failure(ret2, "Failed to deregister memory", agent2); //only initiator should call invalidate ret1 = A1.invalidateRemoteMD(agent2); //A2.invalidateRemoteMD(agent1); - - assert (ret1 == NIXL_SUCCESS); + nixl_exit_on_failure(ret1, "Failed to invalidate remote MD", agent1); free(addr1); free(addr2); diff --git a/test/nixl/desc_example.cpp b/test/nixl/desc_example.cpp index 8b4d971fa..d7fe89d69 100644 --- a/test/nixl/desc_example.cpp +++ b/test/nixl/desc_example.cpp @@ -16,10 +16,11 @@ */ #include #include -#include + #include "nixl.h" #include "serdes/serdes.h" #include "backend/backend_aux.h" +#include "test_utils.h" #include @@ -38,7 +39,7 @@ void testPerf(){ gettimeofday(&end_time, NULL); - assert(dlist.descCount() == 24*64*1024); + nixl_exit_on_failure((dlist.descCount() == 24 * 64 * 1024), "Incorrect number of descriptors"); timersub(&end_time, &start_time, &diff_time); std::cout << "add desc mode, total time for " << 24*64*1024 << " descs: " << diff_time.tv_sec << "s " << diff_time.tv_usec << "us \n"; @@ -57,7 +58,7 @@ void testPerf(){ gettimeofday(&end_time, NULL); - assert(dlist.descCount() == 24*64*1024); + nixl_exit_on_failure((dlist.descCount() == 24 * 64 * 1024), "Incorrect number of descriptors"); timersub(&end_time, &start_time, &diff_time); std::cout << "Operator [] mode, total time for " << 24*64*1024 << " descs: " << diff_time.tv_sec << "s " << diff_time.tv_usec << "us \n"; @@ -87,27 +88,27 @@ int main() nixlBasicDesc buff8 (1010,31,0); nixlBasicDesc importDesc(buff2.serialize()); - assert(buff2 == importDesc); - - assert (buff3==buff2); - assert (buff4==buff1); - assert (buff3!=buff1); - assert (buff8!=buff7); - - assert (buff2.covers(buff3)); - assert (buff4.overlaps(buff1)); - assert (!buff1.covers(buff2)); - assert (!buff1.overlaps(buff2)); - assert (!buff2.covers(buff1)); - assert (!buff2.overlaps(buff1)); - assert (buff2.overlaps(buff5)); - assert (buff5.overlaps(buff2)); - assert (!buff2.covers(buff5)); - assert (!buff5.covers(buff2)); - assert (!buff1.covers(buff6)); - assert (!buff6.covers(buff1)); - assert (buff1.covers(buff7)); - assert (!buff7.covers(buff1)); + nixl_exit_on_failure((buff2 == importDesc), "Descriptor mismatch for buff2 and importDesc"); + + nixl_exit_on_failure((buff3 == buff2), "Descriptor mismatch for buff3 and buff2"); + nixl_exit_on_failure((buff4 == buff1), "Descriptor mismatch for buff4 and buff1"); + nixl_exit_on_failure((buff3 != buff1), "Descriptor mismatch for buff3 and buff1"); + nixl_exit_on_failure((buff8 != buff7), "Descriptor mismatch for buff8 and buff7"); + + nixl_exit_on_failure((buff2.covers(buff3)), "Descriptor buff2 does not cover buff3"); + nixl_exit_on_failure((buff4.overlaps(buff1)), "Descriptor buff4 does not overlap buff1"); + nixl_exit_on_failure(!(buff1.covers(buff2)), "Descriptor buff1 does not cover buff2"); + nixl_exit_on_failure(!(buff1.overlaps(buff2)), "Descriptor buff1 does not overlap buff2"); + nixl_exit_on_failure(!(buff2.covers(buff1)), "Descriptor buff2 does not cover buff1"); + nixl_exit_on_failure(!(buff2.overlaps(buff1)), "Descriptor buff2 does not overlap buff1"); + nixl_exit_on_failure((buff2.overlaps(buff5)), "Descriptor buff2 does not overlap buff5"); + nixl_exit_on_failure((buff5.overlaps(buff2)), "Descriptor buff5 does not overlap buff2"); + nixl_exit_on_failure(!(buff2.covers(buff5)), "Descriptor buff2 does not cover buff5"); + nixl_exit_on_failure(!(buff5.covers(buff2)), "Descriptor buff5 does not cover buff2"); + nixl_exit_on_failure(!(buff1.covers(buff6)), "Descriptor buff1 does not cover buff6"); + nixl_exit_on_failure(!(buff6.covers(buff1)), "Descriptor buff6 does not cover buff1"); + nixl_exit_on_failure((buff1.covers(buff7)), "Descriptor buff1 does not cover buff7"); + nixl_exit_on_failure(!(buff7.covers(buff1)), "Descriptor buff7 does not cover buff1"); nixlBlobDesc stringd1; stringd1.addr = 2392382; @@ -116,7 +117,8 @@ int main() stringd1.metaInfo = std::string("567"); nixlBlobDesc importStringD(stringd1.serialize()); - assert(stringd1 == importStringD); + nixl_exit_on_failure((stringd1 == importStringD), + "Descriptor stringd1 does not match importStringD"); std::cout << "\nSerDes Desc tests:\n"; buff2.print(""); @@ -146,8 +148,8 @@ int main() meta2.devId = 0; meta2.metadataP = nullptr; - assert (stringd1!=buff1); - assert (stringd2==buff8); + nixl_exit_on_failure((stringd1 != buff1), "Descriptor stringd1 matches buff1"); + nixl_exit_on_failure((stringd2 == buff8), "Descriptor stringd2 does not match buff8"); nixlBasicDesc buff9 (stringd1); buff1.print(""); @@ -200,8 +202,10 @@ int main() std::cout << "Caught expected error: " << e.what() << std::endl; } dlist2.remDesc(dlist2.getIndex(meta3)); - assert(dlist2.getIndex(meta3)== NIXL_ERR_NOT_FOUND); - assert(dlist3.getIndex(meta1)== NIXL_ERR_NOT_FOUND); + nixl_exit_on_failure((dlist2.getIndex(meta3) == NIXL_ERR_NOT_FOUND), + "Dlist2 descriptor not removed"); + nixl_exit_on_failure((dlist3.getIndex(meta1) == NIXL_ERR_NOT_FOUND), + "Dlist3 descriptor not removed"); try { dlist3.remDesc(dlist3.getIndex(meta4)); } catch (const std::out_of_range& e) { @@ -277,13 +281,13 @@ int main() nixlSerDes* ser_des = new nixlSerDes(); nixlSerDes* ser_des2 = new nixlSerDes(); - assert(dlist10.serialize(ser_des) == 0); + nixl_exit_on_failure((dlist10.serialize(ser_des) == 0), "Failed to serialize dlist10"); nixl_xfer_dlist_t importList (ser_des);; - assert(importList == dlist10); + nixl_exit_on_failure((importList == dlist10), "Descriptor importList does not match dlist10"); - assert(dlist20.serialize(ser_des2) == 0); + nixl_exit_on_failure((dlist20.serialize(ser_des2) == 0), "Failed to serialize dlist20"); nixl_reg_dlist_t importSList (ser_des2); - assert(importSList == dlist20); + nixl_exit_on_failure((importSList == dlist20), "Descriptor importSList does not match dlist20"); dlist10.print(); std::cout << "this should be a copy:\n"; diff --git a/test/nixl/meson.build b/test/nixl/meson.build index bc6a9c9ff..fe99db3a7 100644 --- a/test/nixl/meson.build +++ b/test/nixl/meson.build @@ -15,26 +15,26 @@ desc_example = executable('desc_example', 'desc_example.cpp', - dependencies: [nixl_dep, nixl_infra], + dependencies: [nixl_dep, nixl_infra, nixl_test_utils_dep], include_directories: [nixl_inc_dirs, utils_inc_dirs], link_with: [serdes_lib], install: true) agent_example = executable('agent_example', 'agent_example.cpp', - dependencies: [nixl_dep, nixl_infra], + dependencies: [nixl_dep, nixl_infra, nixl_test_utils_dep], include_directories: [nixl_inc_dirs, utils_inc_dirs], link_with: [serdes_lib], install: true) nixl_test_app = executable('nixl_test', 'nixl_test.cpp', - dependencies: [nixl_dep, nixl_infra, stream_interface, thread_dep], + dependencies: [nixl_dep, nixl_infra, stream_interface, thread_dep, nixl_test_utils_dep], include_directories: [nixl_inc_dirs, utils_inc_dirs, '../../src/utils/serdes'], link_with: [serdes_lib], install: true) plugin_test = executable('test_plugin', 'test_plugin.cpp', - dependencies: [nixl_dep, nixl_common_dep, cuda_dep], + dependencies: [nixl_dep, nixl_common_dep, cuda_dep, nixl_test_utils_dep], include_directories: [nixl_inc_dirs, utils_inc_dirs], install: true) diff --git a/test/nixl/nixl_test.cpp b/test/nixl/nixl_test.cpp index 03095c2cf..9880949a1 100644 --- a/test/nixl/nixl_test.cpp +++ b/test/nixl/nixl_test.cpp @@ -22,7 +22,7 @@ #include #include #include -#include +#include "test_utils.h" #include "stream/metadata_stream.h" #include "serdes/serdes.h" #include @@ -83,7 +83,8 @@ static void targetThread(nixlAgent &agent, nixl_opt_args_t *extra_params, int th /** Only send desc list */ nixlSerDes serdes; - assert(dram_for_ucx.trim().serialize(&serdes) == NIXL_SUCCESS); + nixl_status_t st = dram_for_ucx.trim().serialize(&serdes); + nixl_exit_on_failure(st, "Failed to serialize registry dlist"); std::cout << "Thread " << thread_id << " Wait for initiator and then send xfer descs\n"; std::string message = serdes.exportStr(); @@ -118,6 +119,7 @@ static void targetThread(nixlAgent &agent, nixl_opt_args_t *extra_params, int th static void initiatorThread(nixlAgent &agent, nixl_opt_args_t *extra_params, const std::string &target_ip, int target_port, int thread_id, SharedNotificationState &shared_state) { + nixl_status_t st; nixl_reg_dlist_t dram_for_ucx(DRAM_SEG); auto addrs = initMem(agent, dram_for_ucx, extra_params, MEM_VAL); @@ -128,9 +130,11 @@ static void initiatorThread(nixlAgent &agent, nixl_opt_args_t *extra_params, md_extra_params.ipAddr = target_ip; md_extra_params.port = target_port; - agent.fetchRemoteMD(target, &md_extra_params); + st = agent.fetchRemoteMD(target, &md_extra_params); + nixl_exit_on_failure(st, "Failed to fetch remote MD"); - agent.sendLocalMD(&md_extra_params); + st = agent.sendLocalMD(&md_extra_params); + nixl_exit_on_failure(st, "Failed to send local MD"); // Wait for notifications and populate shared state while (true) { @@ -143,7 +147,7 @@ static void initiatorThread(nixlAgent &agent, nixl_opt_args_t *extra_params, nixl_notifs_t notifs; nixl_status_t ret = agent.getNotifs(notifs, extra_params); - assert(ret >= 0); + nixl_exit_on_failure(ret, "Failed to get notifs"); if (notifs.size() > 0) { std::lock_guard lock(shared_state.mtx); @@ -193,8 +197,9 @@ static void initiatorThread(nixlAgent &agent, nixl_opt_args_t *extra_params, while (ret != NIXL_SUCCESS) { ret = agent.getXferStatus(treq); - assert(ret >= 0); + nixl_exit_on_failure((ret >= NIXL_SUCCESS), "Failed to get transfer status"); } + std::cout << "Thread " << thread_id << " Completed Sending Data using UCX backend\n"; agent.releaseXferReq(treq); agent.invalidateLocalMD(&md_extra_params); diff --git a/test/unit/plugins/gpunetio/meson.build b/test/unit/plugins/gpunetio/meson.build index b6ad72fdb..960799f8b 100644 --- a/test/unit/plugins/gpunetio/meson.build +++ b/test/unit/plugins/gpunetio/meson.build @@ -19,6 +19,7 @@ if cuda_dep.found() cuda_dependencies = [cuda_dep] compile_flags += '-DHAVE_CUDA' nvtx_dep = nvcc.find_library('nvToolsExt', dirs: '/usr/local/cuda/lib64', required: false) + dl_dep = dependency('dl', required: true) if nvtx_dep.found() compile_flags += '-DUSE_NVTX' else @@ -26,7 +27,7 @@ if cuda_dep.found() endif nixl_gpunetio_stream_app = executable ('nixl_gpunetio_stream_test', 'nixl_gpunetio_stream_test.cu', - dependencies: [nixl_dep, nixl_infra, stream_interface] + cuda_dep + nvtx_dep, + dependencies: [nixl_dep, nixl_infra, stream_interface] + cuda_dep + nvtx_dep + dl_dep + nixl_test_utils_dep, include_directories: [nixl_inc_dirs, utils_inc_dirs, '../../../../src/utils/serdes'], cpp_args: compile_flags, cuda_args: compile_flags, diff --git a/test/unit/plugins/gpunetio/nixl_gpunetio_stream_test.cu b/test/unit/plugins/gpunetio/nixl_gpunetio_stream_test.cu index 2e09e2a98..fc8cc963a 100644 --- a/test/unit/plugins/gpunetio/nixl_gpunetio_stream_test.cu +++ b/test/unit/plugins/gpunetio/nixl_gpunetio_stream_test.cu @@ -21,7 +21,7 @@ #include #include #include -#include +#include "test_utils.h" #include "stream/metadata_stream.h" #include "serdes/serdes.h" @@ -225,7 +225,7 @@ main (int argc, char *argv[]) { nixl_notifs_t notifs; size_t buf_size = SIZE; uint32_t buf_num = TRANSFER_NUM_BUFFER; - uintptr_t data_address_ptr; + uintptr_t data_address_ptr = 0; /** Argument Parsing */ if (argc < 5) { @@ -300,10 +300,10 @@ main (int argc, char *argv[]) { /** Register memory in both initiator and target */ ret = agent.registerMem (local_vram_rdlist, &extra_params); - assert (ret == NIXL_SUCCESS); + nixl_exit_on_failure(ret, "Failed to register memory", role); local_vram = local_vram_rdlist.trim(); ret = agent.getLocalMD(metadata); - assert(ret == NIXL_SUCCESS); + nixl_exit_on_failure(ret, "Failed to get local MD", role); std::cout << " Start Control Path metadata exchanges \n"; if (role == target) { @@ -316,11 +316,16 @@ main (int argc, char *argv[]) { std::cout << " Received checkRemoteMD from " << initiator << std::endl; data_address_ptr = (uintptr_t)data_address; - assert (serdes->addBuf ("BaseAddress", &data_address_ptr, sizeof (uintptr_t)) == - NIXL_SUCCESS); - assert (serdes->addBuf ("BufferSize", &buf_size, sizeof (size_t)) == NIXL_SUCCESS); - assert (serdes->addBuf ("BufferTransfer", &buf_num, sizeof (uint32_t)) == NIXL_SUCCESS); - assert (serdes->addStr ("AgentMD", metadata) == NIXL_SUCCESS); + nixl_exit_on_failure(serdes->addBuf("BaseAddress", &data_address_ptr, sizeof(uintptr_t)), + "Failed to add BaseAddress", + role); + nixl_exit_on_failure(serdes->addBuf("BufferSize", &buf_size, sizeof(size_t)), + "Failed to add BufferSize", + role); + nixl_exit_on_failure(serdes->addBuf("BufferTransfer", &buf_num, sizeof(uint32_t)), + "Failed to add BufferTransfer", + role); + nixl_exit_on_failure(serdes->addStr("AgentMD", metadata), "Failed to add AgentMD", role); std::string message = serdes->exportStr(); while (agent.genNotif (initiator, message, &extra_params) != NIXL_SUCCESS) ; @@ -414,9 +419,9 @@ main (int argc, char *argv[]) { md_extra_params.port = peer_port; ret = agent.fetchRemoteMD (target, &md_extra_params); - assert (ret == NIXL_SUCCESS); + nixl_exit_on_failure(ret, "Failed to fetch remote MD", role); ret = agent.sendLocalMD (&md_extra_params); - assert (ret == NIXL_SUCCESS); + nixl_exit_on_failure(ret, "Failed to send local MD", role); // Not used nixl_xfer_dlist_t descs (DRAM_SEG); std::cout << initiator << " waiting checkRemoteMD from " << target << std::endl; @@ -430,14 +435,19 @@ main (int argc, char *argv[]) { for (const auto ¬if : notifs[target]) { remote_serdes->importStr (notif); - assert (remote_serdes->getBuf ("BaseAddress", &data_address_ptr, sizeof (uintptr_t)) == - NIXL_SUCCESS); - assert (remote_serdes->getBuf ("BufferSize", &buf_size, sizeof (size_t)) == - NIXL_SUCCESS); - assert (remote_serdes->getBuf ("BufferTransfer", &buf_num, sizeof (uint32_t)) == - NIXL_SUCCESS); + nixl_exit_on_failure( + remote_serdes->getBuf("BaseAddress", &data_address_ptr, sizeof(uintptr_t)), + "Failed to get BaseAddress", + role); + nixl_exit_on_failure(remote_serdes->getBuf("BufferSize", &buf_size, sizeof(size_t)), + "Failed to get BufferSize", + role); + nixl_exit_on_failure( + remote_serdes->getBuf("BufferTransfer", &buf_num, sizeof(uint32_t)), + "Failed to get BufferTransfer", + role); remote_metadata = remote_serdes->getStr ("AgentMD"); - assert (remote_metadata != ""); + nixl_exit_on_failure((remote_metadata != ""), "Failed to get AgentMD", role); agent.loadRemoteMD (remote_metadata, target); } notifs.clear(); @@ -497,14 +507,15 @@ main (int argc, char *argv[]) { std::cout << "Post the request with GPUNETIO backend transfer 1" << std::endl; PUSH_RANGE ("postXferReq", 3) status = agent.postXferReq (treq); - assert (status == NIXL_IN_PROG); + nixl_exit_on_failure((status < 0), "Failed to post Xfer Req", role); + POP_RANGE std::cout << "Waiting for completion to re-use buffers\n"; PUSH_RANGE ("getXferStatus", 4) while (status != NIXL_SUCCESS) { status = agent.getXferStatus (treq); - assert (status == NIXL_SUCCESS || status == NIXL_IN_PROG); + nixl_exit_on_failure(status < 0, "Failed to get Xfer Status", role); } POP_RANGE // No need for cudaStreamSyncronize as CUDA kernel and Xfer are on the same stream @@ -533,14 +544,16 @@ main (int argc, char *argv[]) { std::cout << "Post the request with GPUNETIO backend transfer 2" << std::endl; PUSH_RANGE ("postXferReq", 3) status = agent.postXferReq (treq); - assert (status >= NIXL_SUCCESS); + nixl_exit_on_failure(status, "Failed to post Xfer Req", role); POP_RANGE std::cout << "Waiting for completion\n"; PUSH_RANGE ("getXferStatus", 4) while (status != NIXL_SUCCESS) { status = agent.getXferStatus (treq); - assert (status >= NIXL_SUCCESS); + nixl_exit_on_failure(!(status == NIXL_SUCCESS || status == NIXL_IN_PROG), + "Failed to get Xfer Status", + role); } POP_RANGE } else { @@ -553,14 +566,16 @@ main (int argc, char *argv[]) { std::cout << "Post the request with GPUNETIO backend transfer 1" << std::endl; PUSH_RANGE ("postXferReq", 3) status = agent.postXferReq (treq); - assert (status >= NIXL_SUCCESS); + nixl_exit_on_failure(status, "Failed to post Xfer Req", role); POP_RANGE std::cout << "Waiting for completion\n"; PUSH_RANGE ("getXferStatus", 4) while (status != NIXL_SUCCESS) { status = agent.getXferStatus (treq); - assert (status >= NIXL_SUCCESS); + nixl_exit_on_failure(!(status == NIXL_SUCCESS || status == NIXL_IN_PROG), + "Failed to get Xfer Status", + role); } POP_RANGE @@ -589,14 +604,14 @@ main (int argc, char *argv[]) { std::cout << "Post the request with GPUNETIO backend transfer 2" << std::endl; PUSH_RANGE ("postXferReq", 3) status = agent.postXferReq (treq); - assert (status >= NIXL_SUCCESS); + nixl_exit_on_failure(status, "Failed to post Xfer Req", role); POP_RANGE std::cout << "Waiting for completion\n"; PUSH_RANGE ("getXferStatus", 4) while (status != NIXL_SUCCESS) { status = agent.getXferStatus (treq); - assert (status >= NIXL_SUCCESS); + nixl_exit_on_failure(status, "Failed to get Xfer Status", role); } POP_RANGE } diff --git a/test/unit/plugins/ucx/meson.build b/test/unit/plugins/ucx/meson.build index 27a0f6e5d..a24ea5e8d 100644 --- a/test/unit/plugins/ucx/meson.build +++ b/test/unit/plugins/ucx/meson.build @@ -25,14 +25,14 @@ endif ucx_backend_test = executable('ucx_backend_test', 'ucx_backend_test.cpp', - dependencies: [nixl_dep, nixl_infra, nixl_common_deps, ucx_backend_dep, ucx_dep, thread_dep] + cuda_dependencies, + dependencies: [nixl_dep, nixl_infra, nixl_common_deps, ucx_backend_dep, ucx_dep, thread_dep] + cuda_dependencies + nixl_test_utils_dep, include_directories: [nixl_inc_dirs, utils_inc_dirs, '../../../../src/plugins/ucx'], cpp_args : cpp_args, install: true) ucx_backend_multi = executable('ucx_backend_multi', 'ucx_backend_multi.cpp', - dependencies: [nixl_dep, nixl_infra, nixl_common_deps, ucx_backend_dep, ucx_dep, thread_dep] + cuda_dependencies, + dependencies: [nixl_dep, nixl_infra, nixl_common_deps, ucx_backend_dep, ucx_dep, thread_dep] + cuda_dependencies + nixl_test_utils_dep, include_directories: [nixl_inc_dirs, utils_inc_dirs, '../../../../src/plugins/ucx'], cpp_args : cpp_args, install: true) diff --git a/test/unit/plugins/ucx/ucx_backend_multi.cpp b/test/unit/plugins/ucx/ucx_backend_multi.cpp index cf3cdab0f..08d59cdff 100644 --- a/test/unit/plugins/ucx/ucx_backend_multi.cpp +++ b/test/unit/plugins/ucx/ucx_backend_multi.cpp @@ -15,10 +15,11 @@ * limitations under the License. */ #include -#include #include #include "ucx_backend.h" +#include "test_utils.h" + // Temporarily while fixing CI/CD pipeline #define USE_PTHREAD false @@ -60,13 +61,13 @@ void test_thread(int id) while(!ready[!id]); ret = ucx->loadRemoteConnInfo(other, conn_info[!id]); - assert(ret == NIXL_SUCCESS); + nixl_exit_on_failure((ret == NIXL_SUCCESS), "Failed to load remote conn info", my_name); //one-sided connect if(!id) ret = ucx->connect(other); - assert(ret == NIXL_SUCCESS); + nixl_exit_on_failure((ret == NIXL_SUCCESS), "Failed to connect", my_name); done[id] = true; while(!done[!id]) diff --git a/test/unit/plugins/ucx/ucx_backend_test.cpp b/test/unit/plugins/ucx/ucx_backend_test.cpp index 6f893b800..30b207c0b 100644 --- a/test/unit/plugins/ucx/ucx_backend_test.cpp +++ b/test/unit/plugins/ucx/ucx_backend_test.cpp @@ -17,12 +17,13 @@ #include #include #include -#include #include "ucx_backend.h" +#include "test_utils.h" using namespace std; + #ifdef HAVE_CUDA #include @@ -63,7 +64,7 @@ class testHndlIterator { ~testHndlIterator() { /* Make sure that handler was released */ - assert(!set); + nixl_exit_on_failure(!set, "Handler was not released"); } bool needPrep() { @@ -87,7 +88,7 @@ class testHndlIterator { void setHandle(nixlBackendReqH *_handle) { - assert(!set); + nixl_exit_on_failure(!set, "Handler was not released"); handle = _handle; set = true; if (reuse) { @@ -96,12 +97,12 @@ class testHndlIterator { } void unsetHandle() { - assert(set); + nixl_exit_on_failure(set, "Handler was not set"); set = false; } nixlBackendReqH *&getHandle() { - assert(set); + nixl_exit_on_failure(set, "Handler was not set"); return handle; } }; @@ -118,11 +119,7 @@ createEngine(std::string name, bool p_thread) { init.type = "UCX"; auto ucx = nixlUcxEngine::create(init).release(); - assert(!ucx->getInitErr()); - if (ucx->getInitErr()) { - std::cout << "Failed to initialize worker1" << std::endl; - exit(1); - } + nixl_exit_on_failure(!ucx->getInitErr(), "Failed to initialize worker1"); return ucx; } @@ -144,9 +141,9 @@ std::string memType2Str(nixl_mem_t mem_type) case FILE_SEG: return std::string("FILE"); default: - std::cout << "Unsupported memory type!" << std::endl; - assert(0); + nixl_exit_on_failure(false, "Unsupported memory type!"); } + return std::string(""); } @@ -203,10 +200,9 @@ void allocateBuffer(nixl_mem_t mem_type, int dev_id, size_t len, void* &addr) } #endif default: - std::cout << "Unsupported memory type!" << std::endl; - assert(0); + nixl_exit_on_failure(false, "Unsupported memory type!"); } - assert(addr); + nixl_exit_on_failure((addr != nullptr), "Failed to allocate buffer"); } void releaseBuffer(nixl_mem_t mem_type, int dev_id, void* &addr) @@ -222,8 +218,7 @@ void releaseBuffer(nixl_mem_t mem_type, int dev_id, void* &addr) break; #endif default: - std::cout << "Unsupported memory type!" << std::endl; - assert(0); + nixl_exit_on_failure(false, "Unsupported memory type!"); } } @@ -240,8 +235,7 @@ void doMemset(nixl_mem_t mem_type, int dev_id, void *addr, char byte, size_t len break; #endif default: - std::cout << "Unsupported memory type!" << std::endl; - assert(0); + nixl_exit_on_failure(false, "Unsupported memory type!"); } } @@ -259,9 +253,9 @@ void *getValidationPtr(nixl_mem_t mem_type, void *addr, size_t len) } #endif default: - std::cout << "Unsupported memory type!" << std::endl; - assert(0); + nixl_exit_on_failure(false, "Unsupported memory type!"); } + return nullptr; } void *releaseValidationPtr(nixl_mem_t mem_type, void *addr) @@ -275,15 +269,14 @@ void *releaseValidationPtr(nixl_mem_t mem_type, void *addr) break; #endif default: - std::cout << "Unsupported memory type!" << std::endl; - assert(0); + nixl_exit_on_failure(false, "Unsupported memory type!"); } - return NULL; + return nullptr; } void allocateWrongGPUTest(nixlUcxEngine *ucx, int dev_id) { - nixlBlobDesc desc; + nixlBlobDesc desc = {0}; nixlBackendMD* md; void* buf; @@ -294,7 +287,7 @@ allocateWrongGPUTest(nixlUcxEngine *ucx, int dev_id) { int ret = ucx->registerMem(desc, VRAM_SEG, md); - assert(ret == NIXL_ERR_NOT_SUPPORTED); + nixl_exit_on_failure((ret == NIXL_ERR_NOT_SUPPORTED), "Failed to register memory", "test"); releaseBuffer(VRAM_SEG, dev_id, buf); } @@ -316,7 +309,7 @@ allocateAndRegister(nixlUcxEngine *ucx, int ret = ucx->registerMem(desc, mem_type, md); - assert(ret == NIXL_SUCCESS); + nixl_exit_on_failure((ret == NIXL_SUCCESS), "Failed to allocate and register memory"); } void @@ -344,11 +337,11 @@ loadRemote(nixlUcxEngine *ucx, info.devId = dev_id; ucx->getPublicData(lmd, info.metaInfo); - assert(info.metaInfo.size() > 0); + nixl_exit_on_failure((info.metaInfo.size() > 0), "Failed to get public data"); // We get the data from the cetnral location and populate the backend, and receive remote_meta - int ret = ucx->loadRemoteMD (info, mem_type, agent, rmd); - assert(NIXL_SUCCESS == ret); + int ret = ucx->loadRemoteMD(info, mem_type, agent, rmd); + nixl_exit_on_failure((ret == NIXL_SUCCESS), "Failed to load remote MD"); } void populateDescs(nixl_meta_dlist_t &descs, int dev_id, void *addr, int desc_cnt, size_t desc_size, nixlBackendMD* &md) @@ -410,14 +403,14 @@ performTransfer(nixlUcxEngine *ucx1, // Also maybe we would remove the WRITE and let the backend class decide the op if (hiter.needPrep()) { nixlBackendReqH *new_handle = nullptr; - ret3 = ucx1->prepXfer(op, req_src_descs, req_dst_descs, remote_agent, new_handle, &opt_args); - assert(ret3 == NIXL_SUCCESS); + ret3 = + ucx1->prepXfer(op, req_src_descs, req_dst_descs, remote_agent, new_handle, &opt_args); + nixl_exit_on_failure(ret3, "Failed to prep xfer"); hiter.setHandle(new_handle); } nixlBackendReqH *&handle = hiter.getHandle(); ret3 = ucx1->postXfer(op, req_src_descs, req_dst_descs, remote_agent, handle, &opt_args); - assert( ret3 == NIXL_SUCCESS || ret3 == NIXL_IN_PROG); - + nixl_exit_on_failure(ret3 >= NIXL_SUCCESS, "Failed to post xfer"); if (ret3 == NIXL_SUCCESS) { cout << "\t\tWARNING: Tansfer request completed immediately - no testing non-inline path" << endl; @@ -429,7 +422,7 @@ performTransfer(nixlUcxEngine *ucx1, if(progress){ ucx2->progress(); } - assert( ret3 == NIXL_SUCCESS || ret3 == NIXL_IN_PROG); + nixl_exit_on_failure(ret3 >= NIXL_SUCCESS, "Failed to check xfer"); } } @@ -451,13 +444,15 @@ performTransfer(nixlUcxEngine *ucx1, if(progress){ ucx1->progress(); } - assert(ret3 == NIXL_SUCCESS); + nixl_exit_on_failure(ret3, "Failed to get notifs"); } - assert(ret2 == 1); + nixl_exit_on_failure((ret2 == 1), "Incorrect number of target notifs"); - assert(target_notifs.front().first == "Agent1"); - assert(target_notifs.front().second == test_str); + nixl_exit_on_failure((target_notifs.front().first == "Agent1"), + "Incorrect front notif source"); + nixl_exit_on_failure((target_notifs.front().second == test_str), + "Incorrect front notif message"); cout << "OK" << endl; } @@ -468,8 +463,8 @@ performTransfer(nixlUcxEngine *ucx1, chkptr2 = getValidationPtr(req_dst_descs.getType(), addr2, len); // Perform correctness check. - for(size_t i = 0; i < len; i++){ - assert( ((uint8_t*) chkptr1)[i] == ((uint8_t*) chkptr2)[i]); + for (size_t i = 0; i < len; i++) { + nixl_exit_on_failure((((uint8_t *)chkptr1)[i] == ((uint8_t *)chkptr2)[i]), "Data mismatch"); } releaseValidationPtr(req_src_descs.getType(), chkptr1); @@ -494,14 +489,14 @@ test_intra_agent_transfer(bool p_thread, nixlUcxEngine *ucx, nixl_mem_t mem_type int iter = 10; - assert(ucx->supportsLocal()); + nixl_exit_on_failure(ucx->supportsLocal(), "Failed to get conn info"); //connection info is still a string std::string conn_info1; ret1 = ucx->getConnInfo(conn_info1); - assert(ret1 == NIXL_SUCCESS); - ret1 = ucx->loadRemoteConnInfo (agent1, conn_info1); - assert(ret1 == NIXL_SUCCESS); + nixl_exit_on_failure((ret1 == NIXL_SUCCESS), "Failed to get conn info"); + ret1 = ucx->loadRemoteConnInfo(agent1, conn_info1); + nixl_exit_on_failure((ret1 == NIXL_SUCCESS), "Failed to load remote conn info"); std::cout << "Local connection complete\n"; @@ -518,9 +513,8 @@ test_intra_agent_transfer(bool p_thread, nixlUcxEngine *ucx, nixl_mem_t mem_type //string descs unnecessary, convert meta locally nixlBackendMD* rmd2; - ret1 = ucx->loadLocalMD (lmd2, rmd2); - assert(ret1 == NIXL_SUCCESS); - + ret1 = ucx->loadLocalMD(lmd2, rmd2); + nixl_exit_on_failure((ret1 == NIXL_SUCCESS), "Failed to load local MD"); nixl_meta_dlist_t req_src_descs (mem_type); populateDescs(req_src_descs, 0, addr1, desc_cnt, desc_size, lmd1); @@ -585,13 +579,13 @@ test_inter_agent_transfer(bool p_thread, // location and ask for it for a remote node std::string conn_info1, conn_info2; ret = ucx1->getConnInfo(conn_info1); - assert(ret == NIXL_SUCCESS); + nixl_exit_on_failure((ret == NIXL_SUCCESS), "Failed to get conn info"); ret = ucx2->getConnInfo(conn_info2); - assert(ret == NIXL_SUCCESS); + nixl_exit_on_failure((ret == NIXL_SUCCESS), "Failed to get conn info"); // We assumed we put them to central location and now receiving it on the other process - ret = ucx1->loadRemoteConnInfo (agent2, conn_info2); - assert(ret == NIXL_SUCCESS); + ret = ucx1->loadRemoteConnInfo(agent2, conn_info2); + nixl_exit_on_failure((ret == NIXL_SUCCESS), "Failed to load remote conn info"); // TODO: Causes race condition - investigate conn management implementation // ret = ucx2->loadRemoteConnInfo (agent1, conn_info1); @@ -662,13 +656,14 @@ test_inter_agent_transfer(bool p_thread, while(ret == 0){ ret2 = ucx2->getNotifs(target_notifs); ret = target_notifs.size(); - assert(ret2 == NIXL_SUCCESS); + nixl_exit_on_failure((ret2 == NIXL_SUCCESS), "Failed to get notifs"); } - assert(ret == 1); - - assert(target_notifs.front().first == "Agent1"); - assert(target_notifs.front().second == test_str); + nixl_exit_on_failure((ret == 1), "Incorrect number of target notifs"); + nixl_exit_on_failure((target_notifs.front().first == "Agent1"), + "Incorrect front notif source"); + nixl_exit_on_failure((target_notifs.front().second == test_str), + "Incorrect front notif message"); cout << "OK" << endl; } diff --git a/test/unit/plugins/ucx_mo/meson.build b/test/unit/plugins/ucx_mo/meson.build index 9d23daaad..207c4e69a 100644 --- a/test/unit/plugins/ucx_mo/meson.build +++ b/test/unit/plugins/ucx_mo/meson.build @@ -26,7 +26,7 @@ endif ucx_backend_test = executable('ucx_mo_backend_test', 'ucx_mo_backend_test.cpp', - dependencies: [nixl_dep, nixl_infra, nixl_common_deps, ucx_backend_dep, ucx_mo_backend_dep, ucx_dep] + cuda_dependencies, + dependencies: [nixl_dep, nixl_infra, nixl_common_deps, ucx_backend_dep, ucx_mo_backend_dep, ucx_dep] + cuda_dependencies + nixl_test_utils_dep, include_directories: [nixl_inc_dirs, utils_inc_dirs], cpp_args : cpp_args, install: true) diff --git a/test/unit/plugins/ucx_mo/ucx_mo_backend_test.cpp b/test/unit/plugins/ucx_mo/ucx_mo_backend_test.cpp index fd23ba7ec..d2098251d 100644 --- a/test/unit/plugins/ucx_mo/ucx_mo_backend_test.cpp +++ b/test/unit/plugins/ucx_mo/ucx_mo_backend_test.cpp @@ -17,12 +17,13 @@ #include #include #include -#include #include "ucx_mo_backend.h" +#include "test_utils.h" using namespace std; + #ifdef HAVE_CUDA #include @@ -32,9 +33,9 @@ int gpu_id = 0; static void checkCudaError(cudaError_t result, const char *message) { if (result != cudaSuccess) { - std::cerr << message << " (Error code: " << result << " - " - << cudaGetErrorString(result) << ")" << std::endl; - exit(EXIT_FAILURE); + nixl_exit_on_failure(result, + std::string(message) + " (Error code: " + std::to_string(result) + + " - " + cudaGetErrorString(result) + ")"); } } #endif @@ -66,9 +67,9 @@ std::string memType2Str(nixl_mem_t mem_type) case FILE_SEG: return std::string("FILE"); default: - std::cout << "Unsupported memory type!" << std::endl; - assert(0); + nixl_exit_on_failure(false, "Unsupported memory type!"); } + return std::string(""); } nixlBackendEngine *createEngine(std::string name, uint32_t ndev, bool p_thread) @@ -84,8 +85,8 @@ nixlBackendEngine *createEngine(std::string name, uint32_t ndev, bool p_thread) init.customParams = &custom_params; init.type = "UCX_MO"; - ucx_mo = (nixlBackendEngine*) new nixlUcxMoEngine (&init); - assert(!ucx_mo->getInitErr()); + ucx_mo = (nixlBackendEngine *)new nixlUcxMoEngine(&init); + nixl_exit_on_failure(!ucx_mo->getInitErr(), "Failed to initialize worker1"); if (ucx_mo->getInitErr()) { std::cout << "Failed to initialize worker1" << std::endl; exit(1); @@ -133,13 +134,15 @@ static int cudaQueryAddr(void *address, bool &is_dev, void allocateBuffer(nixl_mem_t mem_type, int dev_id, size_t len, void* &addr) { + int ret; switch(mem_type) { case DRAM_SEG: //addr = calloc(1, len); - posix_memalign(&addr, 4096, len); + ret = posix_memalign(&addr, 4096, len); + nixl_exit_on_failure((ret == 0), "Failed to allocate mem aligned buffer"); break; #ifdef HAVE_CUDA - case VRAM_SEG:{ + case VRAM_SEG: { bool is_dev; CUdevice dev; CUcontext ctx; @@ -153,10 +156,9 @@ void allocateBuffer(nixl_mem_t mem_type, int dev_id, size_t len, void* &addr) } #endif default: - std::cout << "Unsupported memory type!" << std::endl; - assert(0); + nixl_exit_on_failure(false, "Unsupported memory type!"); } - assert(addr); + nixl_exit_on_failure(addr != nullptr, "Failed to allocate buffer"); } void releaseBuffer(nixl_mem_t mem_type, int dev_id, void* &addr) @@ -172,8 +174,7 @@ void releaseBuffer(nixl_mem_t mem_type, int dev_id, void* &addr) break; #endif default: - std::cout << "Unsupported memory type!" << std::endl; - assert(0); + nixl_exit_on_failure(false, "Unsupported memory type!"); } } @@ -190,8 +191,7 @@ void doMemset(nixl_mem_t mem_type, int dev_id, void *addr, char byte, size_t len break; #endif default: - std::cout << "Unsupported memory type!" << std::endl; - assert(0); + nixl_exit_on_failure(1, "Unsupported memory type!"); } } @@ -209,9 +209,9 @@ void *getValidationPtr(nixl_mem_t mem_type, void *addr, size_t len) } #endif default: - std::cout << "Unsupported memory type!" << std::endl; - assert(0); + nixl_exit_on_failure(false, "Unsupported memory type!"); } + return nullptr; } void *releaseValidationPtr(nixl_mem_t mem_type, void *addr) @@ -225,10 +225,9 @@ void *releaseValidationPtr(nixl_mem_t mem_type, void *addr) break; #endif default: - std::cout << "Unsupported memory type!" << std::endl; - assert(0); + nixl_exit_on_failure(false, "Unsupported memory type!"); } - return NULL; + return nullptr; } typedef int dev_distr_t(int idx, int max_idx, int cnt); @@ -269,7 +268,7 @@ void createLocalDescs(nixlBackendEngine *ucx, nixl_meta_dlist_t &descs, *((nixlBasicDesc*)&desc_s) = desc; *((nixlBasicDesc*)&desc_m) = desc; int ret = ucx->registerMem(desc_s, descs.getType(), desc_m.metadataP); - assert(ret == NIXL_SUCCESS); + nixl_exit_on_failure((ret == NIXL_SUCCESS), "Failed to register ucx memory"); descs.addDesc(desc_m); } } @@ -309,11 +308,11 @@ void createRemoteDescs(nixlBackendEngine *src_ucx, status = dst_ucx->loadLocalMD(src_descs[i].metadataP, desc_m.metadataP); } else { status = src_ucx->getPublicData(src_descs[i].metadataP, desc_s.metaInfo); - assert(NIXL_SUCCESS == status); + nixl_exit_on_failure(status, "Failed to get src_ucx public data"); status = dst_ucx->loadRemoteMD (desc_s, src_descs.getType(), agent, desc_m.metadataP); } - assert(status == NIXL_SUCCESS); + nixl_exit_on_failure(status, "Failed to load dst_ucx remote MD"); dst_descs.addDesc(desc_m); } } @@ -323,8 +322,8 @@ void destroyRemoteDescs(nixlBackendEngine *dst_ucx, { nixl_status_t status; for(int i = 0; i < dst_descs.descCount(); i++) { - status = dst_ucx->unloadMD (dst_descs[i].metadataP); - assert(status == NIXL_SUCCESS); + status = dst_ucx->unloadMD(dst_descs[i].metadataP); + nixl_exit_on_failure(status, "Failed to unload dst_ucx MD"); } while(dst_descs.descCount()) { @@ -355,9 +354,9 @@ void performTransfer(nixlBackendEngine *ucx1, nixlBackendEngine *ucx2, // or an ID that later can be used to check the status as a new method // Also maybe we would remove the WRITE and let the backend class decide the op status = ucx1->prepXfer(op, req_src_descs, req_dst_descs, remote_agent, handle, &opt_args); - assert(status == NIXL_SUCCESS); + nixl_exit_on_failure(status, "Failed to prep ucx1 xfer"); status = ucx1->postXfer(op, req_src_descs, req_dst_descs, remote_agent, handle, &opt_args); - assert(status == NIXL_SUCCESS || status == NIXL_IN_PROG); + nixl_exit_on_failure((status >= NIXL_SUCCESS), "Failed to post ucx1 xfer"); if (status == NIXL_SUCCESS) { @@ -370,7 +369,7 @@ void performTransfer(nixlBackendEngine *ucx1, nixlBackendEngine *ucx2, if(progress){ ((nixlUcxMoEngine *)ucx2)->progress(); } - assert( (NIXL_SUCCESS == status) || (NIXL_IN_PROG == status) ); + nixl_exit_on_failure((status >= NIXL_SUCCESS), "Failed to check ucx1 xfer"); } ucx1->releaseReqH(handle); } @@ -383,33 +382,37 @@ void performTransfer(nixlBackendEngine *ucx1, nixlBackendEngine *ucx2, while(!target_notifs.size()){ status = ucx2->getNotifs(target_notifs); - assert(NIXL_SUCCESS == status); + nixl_exit_on_failure(status, "Failed to get ucx2 notifs"); if(progress){ ((nixlUcxMoEngine *)ucx1)->progress(); } } - assert(target_notifs.size() == 1); - assert(target_notifs.front().first == "Agent1"); - assert(target_notifs.front().second == test_str); + nixl_exit_on_failure((target_notifs.size() == 1), "Incorrect number of target notifs"); + nixl_exit_on_failure((target_notifs.front().first == "Agent1"), + "Incorrect front notif source"); + nixl_exit_on_failure((target_notifs.front().second == test_str), + "Incorrect front notif message"); cout << "OK" << endl; } cout << "\t\tData verification: " << flush; - assert(req_src_descs.descCount() == req_dst_descs.descCount()); + nixl_exit_on_failure((req_src_descs.descCount() == req_dst_descs.descCount()), + "Data length mismatch"); for(int i = 0; i < req_src_descs.descCount(); i++) { auto sdesc = req_src_descs[i]; auto ddesc = req_dst_descs[i]; - assert(sdesc.len == ddesc.len); + nixl_exit_on_failure((sdesc.len == ddesc.len), "Data length mismatch"); size_t len = ddesc.len; chkptr1 = getValidationPtr(req_src_descs.getType(), (void*)sdesc.addr, len); chkptr2 = getValidationPtr(req_dst_descs.getType(), (void*)ddesc.addr, len); // Perform correctness check. - for(size_t i = 0; i < len; i++){ - assert( ((uint8_t*) chkptr1)[i] == ((uint8_t*) chkptr2)[i]); + for (size_t i = 0; i < len; i++) { + nixl_exit_on_failure((((uint8_t *)chkptr1)[i] == ((uint8_t *)chkptr2)[i]), + "Data mismatch"); } releaseValidationPtr(req_src_descs.getType(), chkptr1); @@ -450,19 +453,16 @@ void test_agent_transfer(bool p_thread, // location and ask for it for a remote node std::string conn_info1; status = ucx1->getConnInfo(conn_info1); - assert(NIXL_SUCCESS == status); - + nixl_exit_on_failure(status, "Failed to get ucx1 conn info"); std::string conn_info2; status = ucx2->getConnInfo(conn_info2); - assert(NIXL_SUCCESS == status); - + nixl_exit_on_failure(status, "Failed to get ucx2 conn info"); // We assumed we put them to central location and now receiving it on the other process if (is_local) { agent = &agent1; } - status = ucx1->loadRemoteConnInfo (*agent, conn_info2); - assert(NIXL_SUCCESS == status); - + status = ucx1->loadRemoteConnInfo(*agent, conn_info2); + nixl_exit_on_failure(status, "Failed to load ucx1 remote conn info"); // TODO: Causes race condition - investigate conn management implementation // ret = ucx2->loadRemoteConnInfo (agent1, conn_info1); @@ -523,16 +523,18 @@ void test_agent_transfer(bool p_thread, while(target_notifs.size() == 0){ status = ucx2->getNotifs(target_notifs); - assert(NIXL_SUCCESS == status); + nixl_exit_on_failure(status, "Failed to get ucx2 notifs"); if (!p_thread) { /* progress UCX1 as well */ ((nixlUcxMoEngine *)ucx1)->progress(); } } - assert(target_notifs.size() == 1); - assert(target_notifs.front().first == "Agent1"); - assert(target_notifs.front().second == test_str); + nixl_exit_on_failure((target_notifs.size() == 1), "Incorrect number of target notifs"); + nixl_exit_on_failure((target_notifs.front().first == "Agent1"), + "Incorrect front notif source"); + nixl_exit_on_failure((target_notifs.front().second == test_str), + "Incorrect front notif message"); cout << "OK" << endl; } diff --git a/test/unit/utils/common/map_perf.cpp b/test/unit/utils/common/map_perf.cpp index 13e731b65..964e7d00e 100644 --- a/test/unit/utils/common/map_perf.cpp +++ b/test/unit/utils/common/map_perf.cpp @@ -15,7 +15,6 @@ * limitations under the License. */ #include -#include #include #include #include @@ -24,6 +23,7 @@ #include #include "common/str_tools.h" +#include "test_utils.h" std::string generate_random_string(size_t length) { const std::string characters = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"; @@ -118,7 +118,7 @@ void test_comparison_perf(const int n_entries, const size_t str_len) { std::cout << "custom map lookup test, total time for " << n_iters << " iters: " << diff_time.tv_sec << "s " << diff_time.tv_usec << "us \n"; - assert(sum1 == sum2); + nixl_exit_on_failure((sum1 == sum2), "Test failed", "test"); gettimeofday(&start_time, NULL); for(int i = 0; i