Skip to content

Commit

Permalink
Apply Zebra fpm backpressure patches to dplane_fpm_sonic (#21356)
Browse files Browse the repository at this point in the history
<!--
 Please make sure you've read and understood our contributing guidelines:
 https://github.com/Azure/SONiC/blob/gh-pages/CONTRIBUTING.md

 failure_prs.log skip_prs.log Make sure all your commits include a signature generated with `git commit -s` **

 If this is a bug fix, make sure your description includes "fixes #xxxx", or
 "closes #xxxx" or "resolves #xxxx"

 Please provide the following information:
-->

#### Why I did it

Reduce high CPU usage on zebra after performing port toggle on all interfaces simultaneously

#### How I did it

Apply zebra fpm backpressure patches from FRR mainline to dplane_fpm_sonic:
* zebra: Use built in data structure counter (FRRouting/frr#16221)
* Zebra fpm backpressure (FRRouting/frr#16220)

<!--
#### How to verify it

If PR needs to be backported, then the PR must be tested against the base branch and the earliest backport release branch and provide tested image version on these two branches. For example, if the PR is requested for master, 202211 and 202012, then the requester needs to provide test results on master and 202012.
-->

<!--
#### Which release branch to backport (provide reason below if selected)

- Note we only backport fixes to a release branch, *not* features!
- Please also provide a reason for the backporting below.
- e.g.
- [x] 202006

- [ ] 201811
- [ ] 201911
- [ ] 202006
- [ ] 202012
- [ ] 202106
- [ ] 202111
- [ ] 202205
- [ ] 202211
- [ ] 202305

-->
<!--
#### Tested branch (Please provide the tested image version)

- Please provide tested image version
- e.g.
- [x] 20201231.100

- [ ]
- [ ]
-->

<!--
#### Description for the changelog
Write a short (one line) summary that describes the changes in this
pull request for inclusion in the changelog:
-->

<!--
 Ensure to add label/tag for the feature raised. example - PR#2174 under sonic-utilities repo. where, Generic Config and Update feature has been labelled as GCU.
-->

<!--
#### Link to config_db schema for YANG module changes
Provide a link to config_db schema for the table for which YANG model
is defined
Link should point to correct section on https://github.com/Azure/sonic-buildimage/blob/master/src/sonic-yang-models/doc/Configuration.md
-->

<!--
#### A picture of a cute animal (not mandatory but encouraged)
-->
  • Loading branch information
mssonicbld authored Jan 9, 2025
1 parent 2e5e624 commit 829dcde
Showing 1 changed file with 38 additions and 23 deletions.
61 changes: 38 additions & 23 deletions src/sonic-frr/dplane_fpm_sonic/dplane_fpm_sonic.c
Original file line number Diff line number Diff line change
Expand Up @@ -204,8 +204,6 @@ struct fpm_nl_ctx {

/* Amount of data plane context processed. */
_Atomic uint32_t dplane_contexts;
/* Amount of data plane contexts enqueued. */
_Atomic uint32_t ctxqueue_len;
/* Peak amount of data plane contexts enqueued. */
_Atomic uint32_t ctxqueue_len_peak;

Expand Down Expand Up @@ -380,6 +378,12 @@ DEFUN(fpm_show_counters, fpm_show_counters_cmd,
FPM_STR
"FPM statistic counters\n")
{
uint32_t curr_queue_len;

frr_with_mutex (&gfnc->ctxqueue_mutex) {
curr_queue_len = dplane_ctx_queue_count(&gfnc->ctxqueue);
}

vty_out(vty, "%30s\n%30s\n", "FPM counters", "============");

#define SHOW_COUNTER(label, counter) \
Expand All @@ -393,8 +397,7 @@ DEFUN(fpm_show_counters, fpm_show_counters_cmd,
SHOW_COUNTER("Connection errors", gfnc->counters.connection_errors);
SHOW_COUNTER("Data plane items processed",
gfnc->counters.dplane_contexts);
SHOW_COUNTER("Data plane items enqueued",
gfnc->counters.ctxqueue_len);
SHOW_COUNTER("Data plane items enqueued", curr_queue_len);
SHOW_COUNTER("Data plane items queue peak",
gfnc->counters.ctxqueue_len_peak);
SHOW_COUNTER("Buffer full hits", gfnc->counters.buffer_full);
Expand All @@ -413,6 +416,12 @@ DEFUN(fpm_show_counters_json, fpm_show_counters_json_cmd,
"FPM statistic counters\n"
JSON_STR)
{
uint32_t curr_queue_len;

frr_with_mutex (&gfnc->ctxqueue_mutex) {
curr_queue_len = dplane_ctx_queue_count(&gfnc->ctxqueue);
}

struct json_object *jo;

jo = json_object_new_object();
Expand All @@ -426,8 +435,7 @@ DEFUN(fpm_show_counters_json, fpm_show_counters_json_cmd,
gfnc->counters.connection_errors);
json_object_int_add(jo, "data-plane-contexts",
gfnc->counters.dplane_contexts);
json_object_int_add(jo, "data-plane-contexts-queue",
gfnc->counters.ctxqueue_len);
json_object_int_add(jo, "data-plane-contexts-queue", curr_queue_len);
json_object_int_add(jo, "data-plane-contexts-queue-peak",
gfnc->counters.ctxqueue_len_peak);
json_object_int_add(jo, "buffer-full-hits", gfnc->counters.buffer_full);
Expand Down Expand Up @@ -1313,7 +1321,7 @@ static ssize_t netlink_srv6_vpn_route_msg_encode(int cmd,
&encap_src_addr, IPV6_MAX_BYTELEN))
return false;
if (!nl_attr_put(&req->n, datalen, FPM_ROUTE_ENCAP_SRV6_VPN_SID,
&nexthop->nh_srv6->seg6_segs,
&nexthop->nh_srv6->seg6_segs->seg[0],
IPV6_MAX_BYTELEN))
return false;
nl_attr_nest_end(&req->n, nest);
Expand Down Expand Up @@ -1992,8 +2000,6 @@ static void fpm_process_queue(struct event *t)

/* Account the processed entries. */
processed_contexts++;
atomic_fetch_sub_explicit(&fnc->counters.ctxqueue_len, 1,
memory_order_relaxed);

dplane_ctx_set_status(ctx, ZEBRA_DPLANE_REQUEST_SUCCESS);
dplane_provider_enqueue_out_ctx(fnc->prov, ctx);
Expand Down Expand Up @@ -2162,10 +2168,29 @@ static int fpm_nl_process(struct zebra_dplane_provider *prov)
struct zebra_dplane_ctx *ctx;
struct fpm_nl_ctx *fnc;
int counter, limit;
uint64_t cur_queue, peak_queue = 0, stored_peak_queue;
uint64_t cur_queue = 0, peak_queue = 0, stored_peak_queue;

fnc = dplane_provider_get_data(prov);
limit = dplane_provider_get_work_limit(prov);

frr_with_mutex (&fnc->ctxqueue_mutex) {
cur_queue = dplane_ctx_queue_count(&fnc->ctxqueue);
}

if (cur_queue >= (uint64_t)limit) {
if (IS_ZEBRA_DEBUG_FPM)
zlog_debug("%s: Already at a limit(%" PRIu64
") of internal work, hold off",
__func__, cur_queue);
limit = 0;
} else {
if (IS_ZEBRA_DEBUG_FPM)
zlog_debug("%s: current queue is %" PRIu64
", limiting to lesser amount of %" PRIu64,
__func__, cur_queue, limit - cur_queue);
limit -= cur_queue;
}

for (counter = 0; counter < limit; counter++) {
ctx = dplane_provider_dequeue_in_ctx(prov);
if (ctx == NULL)
Expand All @@ -2176,20 +2201,12 @@ static int fpm_nl_process(struct zebra_dplane_provider *prov)
* anyway.
*/
if (fnc->socket != -1 && fnc->connecting == false) {
/*
* Update the number of queued contexts *before*
* enqueueing, to ensure counter consistency.
*/
atomic_fetch_add_explicit(&fnc->counters.ctxqueue_len,
1, memory_order_relaxed);

frr_with_mutex (&fnc->ctxqueue_mutex) {
dplane_ctx_enqueue_tail(&fnc->ctxqueue, ctx);
cur_queue =
dplane_ctx_queue_count(&fnc->ctxqueue);
}

cur_queue = atomic_load_explicit(
&fnc->counters.ctxqueue_len,
memory_order_relaxed);
if (peak_queue < cur_queue)
peak_queue = cur_queue;
continue;
Expand All @@ -2206,9 +2223,7 @@ static int fpm_nl_process(struct zebra_dplane_provider *prov)
atomic_store_explicit(&fnc->counters.ctxqueue_len_peak,
peak_queue, memory_order_relaxed);

if (atomic_load_explicit(&fnc->counters.ctxqueue_len,
memory_order_relaxed)
> 0)
if (cur_queue > 0)
event_add_timer(fnc->fthread->master, fpm_process_queue,
fnc, 0, &fnc->t_dequeue);

Expand Down

0 comments on commit 829dcde

Please sign in to comment.