Skip to content

Commit

Permalink
Fix ACKNACK event mishandling of AANR_SILENT_NACK (#2049)
Browse files Browse the repository at this point in the history
* Fix ACKNACK event mishandling of AANR_SILENT_NACK

This case is supposed to not send anything at all, only rescheduling the event to send a
NACK when we are willing to do that.  The early out was missing, causing a (pure) ACK to
be generated and a failing assertion in the code updating the state of last ACK sent
because it was supposed to be unreachable.

In a release build it should not cause more damage than a sometimes sending ACK where
sending nothing was intended; in a debug build it would instead crash.

Signed-off-by: Erik Boasson <[email protected]>

* Consider an event about to be deleted as scheduled

The function is used only in one place, for verifying that an ACKNACK event is scheduled
if data is known to be missing.  It is also be ok if the event is pending deletion because
that is tied to the disappearance of the writer.

Signed-off-by: Erik Boasson <[email protected]>

---------

Signed-off-by: Erik Boasson <[email protected]>
  • Loading branch information
eboasson committed Jun 20, 2024
1 parent 80a4c2c commit d7db65c
Show file tree
Hide file tree
Showing 3 changed files with 50 additions and 53 deletions.
4 changes: 2 additions & 2 deletions src/core/ddsi/include/dds/ddsi/ddsi_xevent.h
Original file line number Diff line number Diff line change
Expand Up @@ -55,15 +55,15 @@ void ddsi_delete_xevent (struct ddsi_xevent *ev)
int ddsi_resched_xevent_if_earlier (struct ddsi_xevent *ev, ddsrt_mtime_t tsched)
ddsrt_nonnull_all;

/** @brief returns whether or not the event is scheduled
/** @brief returns whether or not the event is scheduled or pending deletion
* @component timed_events
*
* @remark: may be called from inside the event handler
*
* @param[in] ev the event for which to check whether it is scheduled
*
* @retval 0 not scheduled
* @retval 1 scheduled
* @retval 1 scheduled or pending deletion
*/
int ddsi_xevent_is_scheduled (struct ddsi_xevent *ev)
ddsrt_nonnull_all;
Expand Down
96 changes: 46 additions & 50 deletions src/core/ddsi/src/ddsi_acknack.c
Original file line number Diff line number Diff line change
Expand Up @@ -257,7 +257,7 @@ static enum ddsi_add_acknack_result get_acknack_info (const struct ddsi_proxy_wr
result = (bitmap_result == DDSI_REORDER_NACKMAP_ACK) ? AANR_ACK : AANR_SUPPRESSED_NACK;
break;
}

case DDSI_REORDER_NACKMAP_NACK: {
// [seq_base:0 .. seq_end_p1:0) + [seq_end_p1:frag_base .. seq_end_p1:frag_end_p1) if frag_end_p1 > 0
const ddsi_seqno_t seq_base = ddsi_from_seqno (info->acknack.set.bitmap_base);
Expand All @@ -266,20 +266,20 @@ static enum ddsi_add_acknack_result get_acknack_info (const struct ddsi_proxy_wr
const ddsi_seqno_t seq_end_p1 = seq_base + info->acknack.set.numbits;
const uint32_t frag_base = (info->nackfrag.seq > 0) ? info->nackfrag.set.bitmap_base : 0;
const uint32_t frag_end_p1 = (info->nackfrag.seq > 0) ? info->nackfrag.set.bitmap_base + info->nackfrag.set.numbits : 0;

/* Let caller know whether it is a nack, and, in steady state, set
final to prevent a response if it isn't. The initial
(pre-emptive) acknack is different: it'd be nice to get a
heartbeat in response.
Who cares about an answer to an acknowledgment!? -- actually,
that'd a very useful feature in combination with directed
heartbeats, or somesuch, to get reliability guarantees. */
nack_summary->seq_end_p1 = seq_end_p1;
nack_summary->frag_end_p1 = frag_end_p1;
nack_summary->seq_base = seq_base;
nack_summary->frag_base = frag_base;

// [seq_base:0 .. seq_end_p1:0) and [seq_end_p1:frag_base .. seq_end_p1:frag_end_p1) if frag_end_p1 > 0
if (seq_base > rwn->last_nack.seq_end_p1 || (seq_base == rwn->last_nack.seq_end_p1 && frag_base >= rwn->last_nack.frag_end_p1))
{
Expand Down Expand Up @@ -323,6 +323,10 @@ static enum ddsi_add_acknack_result get_acknack_info (const struct ddsi_proxy_wr
}
}

// all cases of enum ddsi_reorder_nackmap_result are covered above, so:
// result initialization is dead code (but needed because C and some compilers) and:
assert (result == AANR_ACK || result == AANR_NACK || result == AANR_SUPPRESSED_NACK);

if (result == AANR_ACK || result == AANR_SUPPRESSED_NACK)
{
// ACK and SUPPRESSED_NACK both end up being a pure ACK; send those only if we have to
Expand Down Expand Up @@ -384,6 +388,7 @@ void ddsi_sched_acknack_if_needed (struct ddsi_xevent *ev, struct ddsi_proxy_wri
}
}

#ifndef NDEBUG
static bool need_to_eventually_nack (enum ddsi_add_acknack_result aanr)
{
switch (aanr)
Expand All @@ -399,26 +404,42 @@ static bool need_to_eventually_nack (enum ddsi_add_acknack_result aanr)
}
return true;
}
#endif

static bool need_to_eventually_nack_intv (const struct ddsi_domaingv *gv, enum ddsi_add_acknack_result aanr, int64_t *intv)
static void resched_acknack_if_data_missing (struct ddsi_xevent *ev, struct ddsi_proxy_writer *pwr, struct ddsi_pwr_rd_match *rwn, ddsrt_mtime_t tnow, const enum ddsi_add_acknack_result aanr)
{
switch (aanr)
{
case AANR_SILENT_ACK:
case AANR_ACK:
assert (!need_to_eventually_nack (aanr));
return false;
// Sending something, nothing missing so no need to reschedule
break;

case AANR_SILENT_ACK:
// Not sending anything, nothing missing so no need to reschedule
break;

case AANR_NACK:
case AANR_NACKFRAG_ONLY:
*intv = gv->config.auto_resched_nack_delay;
// Sending a retransmit request now, reschedule because requesting data isn't a guarantee
// we'll get it.
(void) ddsi_resched_xevent_if_earlier (ev, ddsrt_mtime_add_duration (tnow, pwr->e.gv->config.auto_resched_nack_delay));
break;

case AANR_SILENT_NACK:
case AANR_SUPPRESSED_NACK:
*intv = gv->config.nack_delay;
case AANR_SUPPRESSED_NACK: {
// Not sending anything or only sending an ACK, despite knowing we are missing data.
//
// Rate-limit spontaneous (or "illegal") NACKs, do any further processing only if enough
// time has passed, else bail out after rescheduling it for the time at which we are
// willing to send it
const int64_t intv = pwr->e.gv->config.nack_delay;
ddsrt_mtime_t tnext = ddsrt_mtime_add_duration (rwn->t_last_nack, intv);
if (tnext.v < tnow.v)
tnext = ddsrt_mtime_add_duration (tnow, intv);
ddsi_resched_xevent_if_earlier (ev, tnext);
break;
}
}
assert (need_to_eventually_nack (aanr));
return true;
}

static struct ddsi_xmsg *make_and_resched_acknack (struct ddsi_xevent *ev, struct ddsi_proxy_writer *pwr, struct ddsi_pwr_rd_match *rwn, ddsrt_mtime_t tnow)
Expand All @@ -433,44 +454,19 @@ static struct ddsi_xmsg *make_and_resched_acknack (struct ddsi_xevent *ev, struc
tnow.v >= ddsrt_mtime_add_duration (rwn->t_last_ack, gv->config.ack_delay).v,
tnow.v >= ddsrt_mtime_add_duration (rwn->t_last_nack, gv->config.nack_delay).v);

// Trivial case: no data missing, suppressing an ACK now because of the rate
if (aanr == AANR_SILENT_ACK)
return NULL;

// Reschedule if there is data missing
if (rwn->heartbeat_since_ack || rwn->heartbeatfrag_since_ack)
{
// Responding to a heartbeat, so reschedule if there's something to NACK, then continue
// sending ACKNACK / NACK_FRAG
int64_t intv = 0;
if (need_to_eventually_nack_intv (gv, aanr, &intv))
(void) ddsi_resched_xevent_if_earlier (ev, ddsrt_mtime_add_duration (tnow, intv));
}
else
// Reschedule in cases there is data missing, bail out if not sending anything at all
resched_acknack_if_data_missing (ev, pwr, rwn, tnow, aanr);
switch (aanr)
{
// Not really allowed to send an ACKNACK by the spec, except we do it sometimes to recover
// from packet loss after an asymmetrical disconnect where the writer never has any reason
// to send a heartbeat
if (!need_to_eventually_nack (aanr))
{
// If there's nothing to NACK, don't send anything and don't reschedule
case AANR_SILENT_ACK:
case AANR_SILENT_NACK:
assert (!need_to_eventually_nack (aanr) || ddsi_xevent_is_scheduled (ev));
return NULL;
}
else
{
// Rate-limit spontaneous (or "illegal") NACKs, do any further processing only if enough
// time has passed, else bail out after rescheduling it for the time at which we are
// willing to send it
const int64_t intv = gv->config.auto_resched_nack_delay;
const ddsrt_mtime_t tsend = ddsrt_mtime_add_duration (rwn->t_last_nack, intv);
const ddsrt_mtime_t trepeat = ddsrt_mtime_add_duration (tnow, intv);
if (tnow.v < tsend.v)
{
(void) ddsi_resched_xevent_if_earlier (ev, tsend);
return NULL;
}
(void) ddsi_resched_xevent_if_earlier (ev, trepeat);
}
case AANR_ACK:
case AANR_NACK:
case AANR_NACKFRAG_ONLY:
case AANR_SUPPRESSED_NACK:
break;
}

// Committing to sending a message in response: update the state. Note that there's still a
Expand Down Expand Up @@ -529,7 +525,7 @@ static struct ddsi_xmsg *make_and_resched_acknack (struct ddsi_xevent *ev, struc
{
case AANR_SILENT_ACK:
case AANR_SILENT_NACK:
// no message: caught by the size = 0 check
// can't be reached because of early returns
assert (0);
break;
case AANR_ACK:
Expand Down
3 changes: 2 additions & 1 deletion src/core/ddsi/src/ddsi_xevent.c
Original file line number Diff line number Diff line change
Expand Up @@ -350,7 +350,8 @@ int ddsi_xevent_is_scheduled (struct ddsi_xevent *ev)
struct ddsi_xeventq *evq = ev->evq;
int is_scheduled;
ddsrt_mutex_lock (&evq->lock);
is_scheduled = (ev->tsched.v != TSCHED_DELETE && ev->tsched.v != DDS_NEVER);
// Also considers it scheduled if it is about to be deleted
is_scheduled = (ev->tsched.v != DDS_NEVER);
ddsrt_mutex_unlock (&evq->lock);
return is_scheduled;
}
Expand Down

0 comments on commit d7db65c

Please sign in to comment.