Skip to content

Commit f4748dc

Browse files
authored
Merge branch 'main' into destructor-race
2 parents a9cd35e + 2c5ebb2 commit f4748dc

File tree

4 files changed

+120
-18
lines changed

4 files changed

+120
-18
lines changed

benchmark/nixlbench/README.md

Lines changed: 35 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -160,7 +160,7 @@ meson configure build
160160
#### Device and Network Configuration
161161
```
162162
--device_list LIST # Comma-separated device names (default: all)
163-
--etcd_endpoints URL # ETCD server URL for coordination (default: http://localhost:2379)
163+
--etcd_endpoints URL # ETCD server URL for coordination (optional for storage backends)
164164
```
165165

166166
#### Storage Backend Options (GDS, GDS_MT, POSIX, HF3FS, OBJ)
@@ -208,11 +208,25 @@ meson configure build
208208

209209
NIXL Benchmark uses an ETCD key-value store for coordination between benchmark workers. This is useful in containerized or cloud-native environments.
210210

211-
To run the benchmark:
211+
**ETCD Requirements:**
212+
- **Required**: Network backends (UCX, UCX_MO, GPUNETIO, Mooncake) and multi-node setups
213+
- **Optional**: Storage backends (GDS, GDS_MT, POSIX, HF3FS, OBJ) running as single instances
214+
- **Required**: Storage backends when `--etcd_endpoints` is explicitly specified
215+
216+
**For multi-node benchmarks:**
212217

213218
1. Ensure ETCD server is running (e.g., `docker run -p 2379:2379 quay.io/coreos/etcd`
214219
2. Launch multiple nixlbench instances pointing to the same ETCD server
215220

221+
**For single-instance storage benchmarks:**
222+
```bash
223+
# No ETCD needed - just run directly
224+
./nixlbench --backend GDS --filepath /mnt/storage/testfile
225+
226+
# Or with explicit ETCD if coordination is needed
227+
./nixlbench --etcd_endpoints http://etcd-server:2379 --backend GDS --filepath /mnt/storage/testfile
228+
```
229+
216230
Note: etcd can be installed directly on host as well:
217231
```bash
218232
apt install etcd-server
@@ -255,26 +269,29 @@ The workers automatically coordinate ranks through ETCD as they connect.
255269

256270
**GDS (GPU Direct Storage)**
257271
```bash
258-
# Basic GDS benchmark
272+
# Basic GDS benchmark (no ETCD needed for single instance)
273+
./nixlbench --backend GDS --filepath /mnt/storage/testfile --storage_enable_direct
274+
275+
# GDS with ETCD coordination
259276
./nixlbench --etcd_endpoints http://etcd-server:2379 --backend GDS --filepath /mnt/storage/testfile --storage_enable_direct
260277

261278
# GDS with custom batch settings
262-
./nixlbench --etcd_endpoints http://etcd-server:2379 --backend GDS --filepath /mnt/storage/testfile --gds_batch_pool_size 64 --gds_batch_limit 256
279+
./nixlbench --backend GDS --filepath /mnt/storage/testfile --gds_batch_pool_size 64 --gds_batch_limit 256
263280
```
264281

265282
**GDS_MT (Multi-threaded GDS)**
266283
```bash
267-
# Multi-threaded GDS
268-
./nixlbench --etcd_endpoints http://etcd-server:2379 --backend GDS_MT --filepath /mnt/storage/testfile --gds_mt_num_threads 8
284+
# Multi-threaded GDS (no ETCD needed for single instance)
285+
./nixlbench --backend GDS_MT --filepath /mnt/storage/testfile --gds_mt_num_threads 8
269286
```
270287

271288
**POSIX Backend**
272289
```bash
273-
# POSIX with AIO
274-
./nixlbench --etcd_endpoints http://etcd-server:2379 --backend POSIX --filepath /mnt/storage/testfile --posix_api_type AIO
290+
# POSIX with AIO (no ETCD needed for single instance)
291+
./nixlbench --backend POSIX --filepath /mnt/storage/testfile --posix_api_type AIO
275292

276293
# POSIX with io_uring
277-
./nixlbench --etcd_endpoints http://etcd-server:2379 --backend POSIX --filepath /mnt/storage/testfile --posix_api_type URING --storage_enable_direct
294+
./nixlbench --backend POSIX --filepath /mnt/storage/testfile --posix_api_type URING --storage_enable_direct
278295
```
279296

280297
#### Worker Types
@@ -287,27 +304,30 @@ The workers automatically coordinate ranks through ETCD as they connect.
287304

288305
### Benchmarking the OBJ (S3) Backend
289306

290-
For OBJ plugin benchmarking run etcd-server and a single nixlbench instance.
307+
For OBJ plugin benchmarking, ETCD is optional for single instances.
291308

292309
Example:
293310
```bash
294-
# Basic S3 benchmark using environment variables
311+
# Basic S3 benchmark using environment variables (no ETCD needed)
295312
AWS_ACCESS_KEY_ID=<access_key> AWS_SECRET_ACCESS_KEY=<secret_key> AWS_DEFAULT_REGION=<region> \
296-
./nixlbench --etcd_endpoints http://etcd-server:2379 --backend OBJ --obj_bucket_name <bucket_name>
313+
./nixlbench --backend OBJ --obj_bucket_name <bucket_name>
297314

298-
# S3 benchmark using command line flags
299-
./nixlbench --etcd_endpoints http://etcd-server:2379 --backend OBJ \
315+
# S3 benchmark using command line flags (no ETCD needed)
316+
./nixlbench --backend OBJ \
300317
--obj_access_key <access_key> \
301318
--obj_secret_key <secret_key> \
302319
--obj_region <region> \
303320
--obj_bucket_name <bucket_name>
321+
322+
# S3 benchmark with ETCD coordination (if needed)
323+
./nixlbench --etcd_endpoints http://etcd-server:2379 --backend OBJ --obj_bucket_name <bucket_name>
304324
```
305325

306326
**Performance Considerations:**
307327
Transfer times are higher than local storage, so consider reducing iterations:
308328

309329
```bash
310-
./nixlbench --etcd_endpoints http://etcd-server:2379 --backend OBJ \
330+
./nixlbench --backend OBJ \
311331
--obj_bucket_name test-bucket \
312332
--warmup_iter 32 --num_iter 32 --large_blk_iter_ftr 2
313333
```

benchmark/nixlbench/src/utils/utils.cpp

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,9 @@ DEFINE_int32(gds_mt_num_threads, 1, "Number of threads used by GDS MT plugin (De
9797
// For example- 0:mlx5_0,mlx5_1,mlx5_2,1:mlx5_3,mlx5_4, ...
9898
DEFINE_string(device_list, "all", "Comma-separated device name to use for \
9999
communication (only used with nixl worker)");
100-
DEFINE_string(etcd_endpoints, "http://localhost:2379", "ETCD server endpoints for communication");
100+
DEFINE_string(etcd_endpoints,
101+
"",
102+
"ETCD server endpoints for communication (optional for storage backends)");
101103

102104
// POSIX options - only used when backend is POSIX
103105
DEFINE_string (posix_api_type,
@@ -279,6 +281,14 @@ xferBenchConfig::loadFromFlags() {
279281
posix_api_type = FLAGS_posix_api_type;
280282
storage_enable_direct = FLAGS_storage_enable_direct;
281283

284+
// Validate ETCD configuration
285+
if (!isStorageBackend() && etcd_endpoints.empty()) {
286+
// For non-storage backends, set default ETCD endpoint
287+
etcd_endpoints = "http://localhost:2379";
288+
std::cout << "Using default ETCD endpoint for non-storage backend: " << etcd_endpoints
289+
<< std::endl;
290+
}
291+
282292
if (worker_type == XFERBENCH_WORKER_NVSHMEM) {
283293
if (!((XFERBENCH_SEG_TYPE_VRAM == initiator_seg_type) &&
284294
(XFERBENCH_SEG_TYPE_VRAM == target_seg_type) &&
@@ -373,7 +383,11 @@ xferBenchConfig::printConfig() {
373383
printSeparator('*');
374384
printOption("Runtime (--runtime_type=[etcd])", runtime_type);
375385
if (runtime_type == XFERBENCH_RT_ETCD) {
376-
printOption("ETCD Endpoint ", etcd_endpoints);
386+
if (etcd_endpoints.empty()) {
387+
printOption("ETCD Endpoint ", "disabled (storage backend)");
388+
} else {
389+
printOption("ETCD Endpoint ", etcd_endpoints);
390+
}
377391
}
378392
printOption("Worker type (--worker_type=[nixl,nvshmem])", worker_type);
379393
if (worker_type == XFERBENCH_WORKER_NIXL) {

benchmark/nixlbench/src/worker/nixl/nixl_worker.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -748,6 +748,7 @@ int
748748
xferBenchNixlWorker::exchangeMetadata() {
749749
int meta_sz, ret = 0;
750750

751+
// Skip metadata exchange for storage backends or when ETCD is not available
751752
if (xferBenchConfig::isStorageBackend()) {
752753
return 0;
753754
}
@@ -1054,6 +1055,12 @@ xferBenchNixlWorker::poll(size_t block_size) {
10541055

10551056
int
10561057
xferBenchNixlWorker::synchronizeStart() {
1058+
// For storage backends without ETCD, no synchronization needed
1059+
if (xferBenchConfig::isStorageBackend() && xferBenchConfig::etcd_endpoints.empty()) {
1060+
std::cout << "Single instance storage backend - no synchronization needed" << std::endl;
1061+
return 0;
1062+
}
1063+
10571064
if (IS_PAIRWISE_AND_SG()) {
10581065
std::cout << "Waiting for all processes to start... (expecting " << rt->getSize()
10591066
<< " total: " << xferBenchConfig::num_initiator_dev << " initiators and "

benchmark/nixlbench/src/worker/worker.cpp

Lines changed: 62 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,60 @@
2121

2222
#include <unistd.h>
2323

24+
// Null runtime for storage backends that don't need ETCD
25+
class xferBenchNullRT : public xferBenchRT {
26+
public:
27+
xferBenchNullRT() {
28+
setSize(1);
29+
setRank(0);
30+
}
31+
32+
virtual ~xferBenchNullRT() {}
33+
34+
virtual int
35+
sendInt(int *buffer, int dest_rank) override {
36+
return 0;
37+
}
38+
39+
virtual int
40+
recvInt(int *buffer, int src_rank) override {
41+
return 0;
42+
}
43+
44+
virtual int
45+
broadcastInt(int *buffer, size_t count, int root_rank) override {
46+
return 0;
47+
}
48+
49+
virtual int
50+
sendChar(char *buffer, size_t count, int dest_rank) override {
51+
return 0;
52+
}
53+
54+
virtual int
55+
recvChar(char *buffer, size_t count, int src_rank) override {
56+
return 0;
57+
}
58+
59+
virtual int
60+
reduceSumDouble(double *local_value, double *global_value, int dest_rank) override {
61+
*global_value = *local_value;
62+
return 0;
63+
}
64+
65+
virtual int
66+
barrier(const std::string &barrier_id) override {
67+
return 0;
68+
}
69+
};
70+
2471
static xferBenchRT *createRT(int *terminate) {
72+
// For storage backends without ETCD endpoints, use null runtime
73+
if (xferBenchConfig::isStorageBackend() && xferBenchConfig::etcd_endpoints.empty()) {
74+
std::cout << "Using null runtime for storage backend without ETCD" << std::endl;
75+
return new xferBenchNullRT();
76+
}
77+
2578
if (XFERBENCH_RT_ETCD == xferBenchConfig::runtime_type) {
2679
int total = 2;
2780
if (XFERBENCH_MODE_SG == xferBenchConfig::mode) {
@@ -46,6 +99,11 @@ static xferBenchRT *createRT(int *terminate) {
4699
}
47100

48101
int xferBenchWorker::synchronize() {
102+
// For storage backends without ETCD, no synchronization needed
103+
if (xferBenchConfig::isStorageBackend() && xferBenchConfig::etcd_endpoints.empty()) {
104+
return 0;
105+
}
106+
49107
if (rt->barrier("sync") != 0) {
50108
std::cerr << "Failed to synchronize" << std::endl;
51109
// assuming this is a fatal error, continue benchmarking after synchronization failure does
@@ -67,7 +125,10 @@ xferBenchWorker::xferBenchWorker(int *argc, char ***argv) {
67125

68126
int rank = rt->getRank();
69127

70-
if (XFERBENCH_MODE_SG == xferBenchConfig::mode) {
128+
// For storage backends without ETCD, always act as initiator
129+
if (xferBenchConfig::isStorageBackend() && xferBenchConfig::etcd_endpoints.empty()) {
130+
name = "initiator";
131+
} else if (XFERBENCH_MODE_SG == xferBenchConfig::mode) {
71132
if (rank >= 0 && rank < xferBenchConfig::num_initiator_dev) {
72133
name = "initiator";
73134
} else {

0 commit comments

Comments
 (0)