Skip to content

Commit 640f65b

Browse files
committed
Hotfix: Add workload latency stats for dally-dfly
1 parent 3ee988e commit 640f65b

File tree

1 file changed

+61
-0
lines changed

1 file changed

+61
-0
lines changed

src/network-workloads/archived/model-net-synthetic-dally-dfly.c

+61
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,12 @@ static char group_name[MAX_NAME_LENGTH];
4646
static char lp_type_name[MAX_NAME_LENGTH];
4747
static int group_index, lp_type_index, rep_id, offset;
4848

49+
/* statistic values for final output */
50+
static tw_stime max_global_server_latency = 0.0;
51+
static tw_stime sum_global_server_latency = 0.0;
52+
static long long sum_global_messages_received = 0;
53+
static tw_stime mean_global_server_latency = 0.0;
54+
4955
/* type of events */
5056
enum svr_event
5157
{
@@ -74,12 +80,17 @@ struct svr_state
7480
tw_stime end_ts; /* time that we ended sending requests */
7581
int svr_id;
7682
int dest_id;
83+
84+
tw_stime max_server_latency; /* maximum measured packet latency observed by server */
85+
tw_stime sum_server_latency; /* running sum of measured latencies observed by server for calc of mean */
7786
};
7887

7988
struct svr_msg
8089
{
8190
enum svr_event svr_event_type;
8291
tw_lpid src; /* source of this request or ack */
92+
tw_stime msg_start_time;
93+
tw_stime saved_time; /* helper for reverse computation */
8394
int incremented_flag; /* helper for reverse computation */
8495
model_net_event_return event_rc;
8596
};
@@ -208,6 +219,8 @@ static void svr_init(
208219
ns->start_ts = 0.0;
209220
ns->dest_id = -1;
210221
ns->svr_id = codes_mapping_get_lp_relative_id(lp->gid, 0, 0);
222+
ns->max_server_latency = 0.0;
223+
ns->sum_server_latency = 0.0;
211224

212225
issue_event(ns, lp);
213226
return;
@@ -258,6 +271,7 @@ static void handle_kickoff_event(
258271

259272
m_local->svr_event_type = LOCAL;
260273
m_local->src = lp->gid;
274+
m_local->msg_start_time = tw_now(lp);
261275

262276
memcpy(m_remote, m_local, sizeof(svr_msg));
263277
m_remote->svr_event_type = REMOTE;
@@ -337,6 +351,11 @@ static void handle_remote_rev_event(
337351
(void)m;
338352
(void)lp;
339353
ns->msg_recvd_count--;
354+
355+
tw_stime packet_latency = tw_now(lp) - m->msg_start_time;
356+
ns->sum_server_latency -= packet_latency;
357+
if (b->c2)
358+
ns->max_server_latency = m->saved_time;
340359
}
341360

342361
static void handle_remote_event(
@@ -349,6 +368,15 @@ static void handle_remote_event(
349368
(void)m;
350369
(void)lp;
351370
ns->msg_recvd_count++;
371+
372+
tw_stime packet_latency = tw_now(lp) - m->msg_start_time;
373+
ns->sum_server_latency += packet_latency;
374+
if (packet_latency > ns->max_server_latency) {
375+
b->c2 = 1;
376+
m->saved_time = ns->max_server_latency;
377+
ns->max_server_latency = packet_latency;
378+
}
379+
352380
}
353381

354382
static void handle_local_rev_event(
@@ -387,6 +415,18 @@ static void svr_finalize(
387415
{
388416
ns->end_ts = tw_now(lp);
389417

418+
//add to the global running sums
419+
sum_global_server_latency += ns->sum_server_latency;
420+
sum_global_messages_received += ns->msg_recvd_count;
421+
422+
//compare to global maximum
423+
if (ns->max_server_latency > max_global_server_latency)
424+
max_global_server_latency = ns->max_server_latency;
425+
426+
//this server's mean
427+
// tw_stime mean_packet_latency = ns->sum_server_latency/ns->msg_recvd_count;
428+
429+
390430
//printf("server %llu recvd %d bytes in %f seconds, %f MiB/s sent_count %d recvd_count %d local_count %d \n", (unsigned long long)lp->gid, PAYLOAD_SZ*ns->msg_recvd_count, ns_to_s(ns->end_ts-ns->start_ts),
391431
// ((double)(PAYLOAD_SZ*ns->msg_sent_count)/(double)(1024*1024)/ns_to_s(ns->end_ts-ns->start_ts)), ns->msg_sent_count, ns->msg_recvd_count, ns->local_recvd_count);
392432
return;
@@ -439,6 +479,26 @@ static void svr_event(
439479
}
440480
}
441481

482+
// does MPI reduces across PEs to generate stats based on the global static variables in this file
483+
static void svr_report_stats()
484+
{
485+
long long total_received_messages;
486+
tw_stime total_sum_latency, max_latency, mean_latency;
487+
488+
489+
MPI_Reduce( &sum_global_messages_received, &total_received_messages, 1, MPI_LONG_LONG, MPI_SUM, 0, MPI_COMM_CODES);
490+
MPI_Reduce( &sum_global_server_latency, &total_sum_latency, 1,MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_CODES);
491+
MPI_Reduce( &max_global_server_latency, &max_latency, 1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_CODES);
492+
493+
mean_latency = total_sum_latency / total_received_messages;
494+
495+
if(!g_tw_mynode)
496+
{
497+
printf("\nSynthetic Workload LP Stats: Mean Message Latency: %lf us, Maximum Message Latency: %lf us, Total Messages Received: %lld\n",
498+
(float)mean_latency / 1000, (float)max_latency / 1000, total_received_messages);
499+
}
500+
}
501+
442502
int main(
443503
int argc,
444504
char **argv)
@@ -516,6 +576,7 @@ int main(
516576
assert(ret == 0 || !"lp_io_flush failure");
517577
}
518578
model_net_report_stats(net_id);
579+
svr_report_stats();
519580
tw_end();
520581
return 0;
521582
}

0 commit comments

Comments
 (0)