Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 0 additions & 4 deletions .gitlab/test_cpp.sh
Original file line number Diff line number Diff line change
Expand Up @@ -68,10 +68,6 @@ if $TEST_LIBFABRIC ; then
fi
./bin/nixl_etcd_example
./bin/ucx_backend_test
# Skip UCX_MO backend test on GPU worker, fails VRAM transfers
if ! $HAS_GPU ; then
./bin/ucx_mo_backend_test
fi
mkdir -p /tmp/telemetry_test
NIXL_TELEMETRY_ENABLE=y NIXL_TELEMETRY_DIR=/tmp/telemetry_test ./bin/agent_example &
sleep 1
Expand Down
2 changes: 1 addition & 1 deletion benchmark/kvbench/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@ These arguments are used by both `plan` and `profile` commands:
| -------- | ----------- |
| `--source` | Source of the nixl descriptors [file, memory, gpu] (default: file) |
| `--destination` | Destination of the nixl descriptors [file, memory, gpu] (default: memory) |
| `--backend` | Communication backend [UCX, UCX_MO, GDS, GDS_MT, POSIX, GPUNETIO, Mooncake, HF3FS, OBJ] (default: UCX) |
| `--backend` | Communication backend [UCX, GDS, GDS_MT, POSIX, GPUNETIO, Mooncake, HF3FS, OBJ] (default: UCX) |
| `--worker_type` | Worker to use to transfer data [nixl, nvshmem] (default: nixl) |
| `--initiator_seg_type` | Memory segment type for initiator [DRAM, VRAM] (default: DRAM) |
| `--target_seg_type` | Memory segment type for target [DRAM, VRAM] (default: DRAM) |
Expand Down
2 changes: 1 addition & 1 deletion benchmark/kvbench/commands/args.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ def nixl_bench_args(func):
func = click.option(
"--backend",
type=str,
help="Communication backend [UCX, UCX_MO, GDS, GDS_MT, POSIX, GPUNETIO, Mooncake, HF3FS, OBJ] (default: UCX)",
help="Communication backend [UCX, GDS, GDS_MT, POSIX, GPUNETIO, Mooncake, HF3FS, OBJ] (default: UCX)",
)(func)
func = click.option(
"--worker_type",
Expand Down
4 changes: 2 additions & 2 deletions benchmark/kvbench/commands/nixlbench.py
Original file line number Diff line number Diff line change
Expand Up @@ -205,7 +205,7 @@ def _configure_posix(self, source: str, destination: str):
raise ValueError(f"Invalid source for POSIX/HF3FS: {source}")

def _configure_ucx(self, backend: str, source: str, destination: str):
"""Configure UCX, UCX_MO, GPUNETIO, and Mooncake plugins (same logic for all)"""
"""Configure UCX, GPUNETIO, and Mooncake plugins (same logic for all)"""
arg_to_seg_type = {
"memory": "DRAM",
"gpu": "VRAM",
Expand Down Expand Up @@ -241,7 +241,7 @@ def configure_segment_type(self, backend: str, source: str, destination: str):
self._configure_gds(source, destination)
elif backend_lower in ["posix", "hf3fs"]:
self._configure_posix(source, destination)
elif backend_lower in ["ucx", "ucx_mo", "gpunetio", "mooncake"]:
elif backend_lower in ["ucx", "gpunetio", "mooncake"]:
self._configure_ucx(backend_lower, source, destination)
elif backend_lower == "obj":
self._configure_obj(source, destination)
Expand Down
9 changes: 3 additions & 6 deletions benchmark/nixlbench/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ A comprehensive benchmarking tool for the NVIDIA Inference Xfer Library (NIXL) t

## Features

- **Multiple Communication Backends**: UCX, UCX_MO, GPUNETIO, Mooncake, Libfabric for network communication
- **Multiple Communication Backends**: UCX, GPUNETIO, Mooncake, Libfabric for network communication
- **Storage Backend Support**: GDS, GDS_MT, POSIX, HF3FS, OBJ (S3), GUSLI for storage operations
- **Flexible Communication Patterns**:
- **Pairwise**: Point-to-point communication between pairs
Expand Down Expand Up @@ -420,7 +420,7 @@ sudo systemctl start etcd && sudo systemctl enable etcd
```
--runtime_type NAME # Type of runtime to use [ETCD] (default: ETCD)
--worker_type NAME # Worker to use to transfer data [nixl, nvshmem] (default: nixl)
--backend NAME # Communication backend [UCX, UCX_MO, GDS, GDS_MT, POSIX, GPUNETIO, Mooncake, HF3FS, OBJ, GUSLI] (default: UCX)
--backend NAME # Communication backend [UCX, GDS, GDS_MT, POSIX, GPUNETIO, Mooncake, HF3FS, OBJ, GUSLI] (default: UCX)
--benchmark_group NAME # Name of benchmark group for parallel runs (default: default)
--etcd_endpoints URL # ETCD server URL for coordination (default: http://localhost:2379)
```
Expand Down Expand Up @@ -520,7 +520,7 @@ Note: storage_enable_direct is automatically enabled for GUSLI backend
NIXL Benchmark uses an ETCD key-value store for coordination between benchmark workers. This is useful in containerized or cloud-native environments.

**ETCD Requirements:**
- **Required**: Network backends (UCX, UCX_MO, GPUNETIO, Mooncake, Libfabric) and multi-node setups
- **Required**: Network backends (UCX, GPUNETIO, Mooncake, Libfabric) and multi-node setups
- **Optional**: Storage backends (GDS, GDS_MT, POSIX, HF3FS, OBJ, GUSLI) running as single instances
- **Required**: Storage backends when `--etcd_endpoints` is explicitly specified

Expand Down Expand Up @@ -565,9 +565,6 @@ The workers automatically coordinate ranks through ETCD as they connect.

# UCX with specific devices
./nixlbench --etcd_endpoints http://etcd-server:2379 --backend UCX --device_list mlx5_0,mlx5_1

# UCX Memory-Only variant
./nixlbench --etcd_endpoints http://etcd-server:2379 --backend UCX_MO
```

**GPUNETIO Backend:**
Expand Down
5 changes: 2 additions & 3 deletions benchmark/nixlbench/src/utils/utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ DEFINE_string(worker_type, XFERBENCH_WORKER_NIXL, "Type of worker [nixl, nvshmem
DEFINE_string(
backend,
XFERBENCH_BACKEND_UCX,
"Name of NIXL backend [UCX, UCX_MO, GDS, GDS_MT, POSIX, GPUNETIO, Mooncake, HF3FS, OBJ, GUSLI] \
"Name of NIXL backend [UCX, GDS, GDS_MT, POSIX, GPUNETIO, Mooncake, HF3FS, OBJ, GUSLI] \
(only used with nixl worker)");
DEFINE_string(initiator_seg_type, XFERBENCH_SEG_TYPE_DRAM, "Type of memory segment for initiator \
[DRAM, VRAM]. Note: Storage backends always use DRAM locally.");
Expand Down Expand Up @@ -429,8 +429,7 @@ xferBenchConfig::printConfig() {
}
printOption("Worker type (--worker_type=[nixl,nvshmem])", worker_type);
if (worker_type == XFERBENCH_WORKER_NIXL) {
printOption("Backend (--backend=[UCX,UCX_MO,GDS,GDS_MT,POSIX,Mooncake,HF3FS,OBJ])",
backend);
printOption("Backend (--backend=[UCX,GDS,GDS_MT,POSIX,Mooncake,HF3FS,OBJ])", backend);
printOption ("Enable pt (--enable_pt=[0,1])", std::to_string (enable_pt));
printOption("Progress threads (--progress_threads=N)", std::to_string(progress_threads));
printOption ("Device list (--device_list=dev1,dev2,...)", device_list);
Expand Down
1 change: 0 additions & 1 deletion benchmark/nixlbench/src/utils/utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,6 @@

// Backend types
#define XFERBENCH_BACKEND_UCX "UCX"
#define XFERBENCH_BACKEND_UCX_MO "UCX_MO"
#define XFERBENCH_BACKEND_LIBFABRIC "LIBFABRIC"
#define XFERBENCH_BACKEND_GDS "GDS"
#define XFERBENCH_BACKEND_GDS_MT "GDS_MT"
Expand Down
10 changes: 1 addition & 9 deletions benchmark/nixlbench/src/worker/nixl/nixl_worker.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,6 @@ xferBenchNixlWorker::xferBenchNixlWorker(int *argc, char ***argv, std::vector<st
agent->getAvailPlugins(plugins);

if (0 == xferBenchConfig::backend.compare(XFERBENCH_BACKEND_UCX) ||
0 == xferBenchConfig::backend.compare(XFERBENCH_BACKEND_UCX_MO) ||
0 == xferBenchConfig::backend.compare(XFERBENCH_BACKEND_LIBFABRIC) ||
0 == xferBenchConfig::backend.compare(XFERBENCH_BACKEND_GPUNETIO) ||
0 == xferBenchConfig::backend.compare(XFERBENCH_BACKEND_MOONCAKE) ||
Expand All @@ -126,23 +125,16 @@ xferBenchNixlWorker::xferBenchNixlWorker(int *argc, char ***argv, std::vector<st

agent->getPluginParams(backend_name, mems, backend_params);

if (0 == xferBenchConfig::backend.compare(XFERBENCH_BACKEND_UCX) ||
0 == xferBenchConfig::backend.compare(XFERBENCH_BACKEND_UCX_MO)) {
if (0 == xferBenchConfig::backend.compare(XFERBENCH_BACKEND_UCX)) {
backend_params["num_threads"] = std::to_string(xferBenchConfig::progress_threads);

// No need to set device_list if all is specified
// fallback to backend preference
if (devices[0] != "all" && devices.size() >= 1) {
if (isInitiator()) {
backend_params["device_list"] = devices[rank];
if (0 == xferBenchConfig::backend.compare(XFERBENCH_BACKEND_UCX_MO)) {
backend_params["num_ucx_engines"] = xferBenchConfig::num_initiator_dev;
}
} else {
backend_params["device_list"] = devices[rank - xferBenchConfig::num_initiator_dev];
if (0 == xferBenchConfig::backend.compare(XFERBENCH_BACKEND_UCX_MO)) {
backend_params["num_ucx_engines"] = xferBenchConfig::num_target_dev;
}
}
}

Expand Down
4 changes: 0 additions & 4 deletions src/core/meson.build
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,6 @@ if 'UCX' in static_plugins
nixl_lib_deps += [ ucx_backend_interface, asio_dep, cuda_dep ]
endif

if 'UCX_MO' in static_plugins
nixl_lib_deps += [ ucx_mo_backend_interface, cuda_dep ]
endif

if 'POSIX' in static_plugins
nixl_lib_deps += [ posix_backend_interface ]
endif
Expand Down
4 changes: 0 additions & 4 deletions src/core/nixl_plugin_manager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -403,10 +403,6 @@ void nixlPluginManager::registerBuiltinPlugins() {
NIXL_REGISTER_STATIC_PLUGIN(UCX)
#endif

#ifdef STATIC_PLUGIN_UCX_MO
NIXL_REGISTER_STATIC_PLUGIN(UCX_MO)
#endif

#ifdef STATIC_PLUGIN_GDS
#ifndef DISABLE_GDS_BACKEND
NIXL_REGISTER_STATIC_PLUGIN(GDS)
Expand Down
3 changes: 0 additions & 3 deletions src/plugins/meson.build
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,8 @@
# See the License for the specific language governing permissions and
# limitations under the License.

ucx_backend_inc_dirs = include_directories('./ucx')

if ucx_dep.found()
subdir('ucx')
subdir('ucx_mo')
endif

subdir('posix') # Always try to build POSIX backend, it will handle its own dependencies
Expand Down
54 changes: 0 additions & 54 deletions src/plugins/ucx_mo/meson.build

This file was deleted.

Loading