Skip to content

Commit

Permalink
mm/mglru: try to stop at high watermarks
Browse files Browse the repository at this point in the history
The initial MGLRU patchset didn't include the memcg LRU support, and it
relied on should_abort_scan(), added by commit f76c833 ("mm:
multi-gen LRU: optimize multiple memcgs"), to "backoff to avoid
overshooting their aggregate reclaim target by too much".

Later on when the memcg LRU was added, should_abort_scan() was deemed
unnecessary, and the test results [1] showed no side effects after it was
removed by commit a579086 ("mm: multi-gen LRU: remove eviction
fairness safeguard").

However, that test used memory.reclaim, which sets nr_to_reclaim to
SWAP_CLUSTER_MAX.  So it can overshoot only by SWAP_CLUSTER_MAX-1 pages,
i.e., from nr_reclaimed=nr_to_reclaim-1 to
nr_reclaimed=nr_to_reclaim+SWAP_CLUSTER_MAX-1.  Compared with the batch
size kswapd sets to nr_to_reclaim, SWAP_CLUSTER_MAX is tiny.  Therefore
that test isn't able to reproduce the worst case scenario, i.e., kswapd
overshooting GBs on large systems and "consuming 100% CPU" (see the Closes
tag).

Bring back a simplified version of should_abort_scan() on top of the memcg
LRU, so that kswapd stops when all eligible zones are above their
respective high watermarks plus a small delta to lower the chance of
KSWAPD_HIGH_WMARK_HIT_QUICKLY.  Note that this only applies to order-0
reclaim, meaning compaction-induced reclaim can still run wild (which is a
different problem).

On Android, launching 55 apps sequentially:
           Before     After      Change
  pgpgin   838377172  802955040  -4%
  pgpgout  38037080   34336300   -10%

[1] https://lore.kernel.org/[email protected]/

Link: https://lkml.kernel.org/r/[email protected]
Fixes: a579086 ("mm: multi-gen LRU: remove eviction fairness safeguard")
Signed-off-by: Yu Zhao <[email protected]>
Reported-by: Charan Teja Kalla <[email protected]>
Reported-by: Jaroslav Pulchart <[email protected]>
Closes: https://lore.kernel.org/CAK8fFZ4DY+GtBA40Pm7Nn5xCHy+51w3sfxPqkqpqakSXYyX+Wg@mail.gmail.com/
Tested-by: Jaroslav Pulchart <[email protected]>
Tested-by: Kalesh Singh <[email protected]>
Cc: Hillf Danton <[email protected]>
Cc: Kairui Song <[email protected]>
Cc: T.J. Mercier <[email protected]>
Cc: <[email protected]>
Signed-off-by: Andrew Morton <[email protected]>
  • Loading branch information
yuzhaogoogle authored and akpm00 committed Dec 13, 2023
1 parent 0814880 commit 5095a2b
Showing 1 changed file with 28 additions and 8 deletions.
36 changes: 28 additions & 8 deletions mm/vmscan.c
Original file line number Diff line number Diff line change
Expand Up @@ -4648,20 +4648,41 @@ static long get_nr_to_scan(struct lruvec *lruvec, struct scan_control *sc, bool
return try_to_inc_max_seq(lruvec, max_seq, sc, can_swap, false) ? -1 : 0;
}

static unsigned long get_nr_to_reclaim(struct scan_control *sc)
static bool should_abort_scan(struct lruvec *lruvec, struct scan_control *sc)
{
int i;
enum zone_watermarks mark;

/* don't abort memcg reclaim to ensure fairness */
if (!root_reclaim(sc))
return -1;
return false;

if (sc->nr_reclaimed >= max(sc->nr_to_reclaim, compact_gap(sc->order)))
return true;

/* check the order to exclude compaction-induced reclaim */
if (!current_is_kswapd() || sc->order)
return false;

return max(sc->nr_to_reclaim, compact_gap(sc->order));
mark = sysctl_numa_balancing_mode & NUMA_BALANCING_MEMORY_TIERING ?
WMARK_PROMO : WMARK_HIGH;

for (i = 0; i <= sc->reclaim_idx; i++) {
struct zone *zone = lruvec_pgdat(lruvec)->node_zones + i;
unsigned long size = wmark_pages(zone, mark) + MIN_LRU_BATCH;

if (managed_zone(zone) && !zone_watermark_ok(zone, 0, size, sc->reclaim_idx, 0))
return false;
}

/* kswapd should abort if all eligible zones are safe */
return true;
}

static bool try_to_shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
{
long nr_to_scan;
unsigned long scanned = 0;
unsigned long nr_to_reclaim = get_nr_to_reclaim(sc);
int swappiness = get_swappiness(lruvec, sc);

/* clean file folios are more likely to exist */
Expand All @@ -4683,7 +4704,7 @@ static bool try_to_shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
if (scanned >= nr_to_scan)
break;

if (sc->nr_reclaimed >= nr_to_reclaim)
if (should_abort_scan(lruvec, sc))
break;

cond_resched();
Expand Down Expand Up @@ -4744,7 +4765,6 @@ static void shrink_many(struct pglist_data *pgdat, struct scan_control *sc)
struct lru_gen_folio *lrugen;
struct mem_cgroup *memcg;
const struct hlist_nulls_node *pos;
unsigned long nr_to_reclaim = get_nr_to_reclaim(sc);

bin = first_bin = get_random_u32_below(MEMCG_NR_BINS);
restart:
Expand Down Expand Up @@ -4777,7 +4797,7 @@ static void shrink_many(struct pglist_data *pgdat, struct scan_control *sc)

rcu_read_lock();

if (sc->nr_reclaimed >= nr_to_reclaim)
if (should_abort_scan(lruvec, sc))
break;
}

Expand All @@ -4788,7 +4808,7 @@ static void shrink_many(struct pglist_data *pgdat, struct scan_control *sc)

mem_cgroup_put(memcg);

if (sc->nr_reclaimed >= nr_to_reclaim)
if (!is_a_nulls(pos))
return;

/* restart if raced with lru_gen_rotate_memcg() */
Expand Down

0 comments on commit 5095a2b

Please sign in to comment.