diff --git a/lib/scatterlist.c b/lib/scatterlist.c index e86231a44c3de5..a0cfbfdbb27e04 100644 --- a/lib/scatterlist.c +++ b/lib/scatterlist.c @@ -150,31 +150,12 @@ EXPORT_SYMBOL(sg_init_one); */ static struct scatterlist *sg_kmalloc(unsigned int nents, gfp_t gfp_mask) { - if (nents == SG_MAX_SINGLE_ALLOC) { - /* - * Kmemleak doesn't track page allocations as they are not - * commonly used (in a raw form) for kernel data structures. - * As we chain together a list of pages and then a normal - * kmalloc (tracked by kmemleak), in order to for that last - * allocation not to become decoupled (and thus a - * false-positive) we need to inform kmemleak of all the - * intermediate allocations. - */ - void *ptr = (void *) __get_free_page(gfp_mask); - kmemleak_alloc(ptr, PAGE_SIZE, 1, gfp_mask); - return ptr; - } else - return kmalloc_array(nents, sizeof(struct scatterlist), - gfp_mask); + return kmalloc_array(nents, sizeof(struct scatterlist), gfp_mask); } static void sg_kfree(struct scatterlist *sg, unsigned int nents) { - if (nents == SG_MAX_SINGLE_ALLOC) { - kmemleak_free(sg); - free_page((unsigned long) sg); - } else - kfree(sg); + kfree(sg); } /** diff --git a/mm/compaction.c b/mm/compaction.c index dbc9f86b19343f..6976966e2634b1 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -1780,7 +1780,7 @@ static int sysctl_compact_unevictable_allowed __read_mostly = CONFIG_COMPACT_UNE * aggressively the kernel should compact memory in the * background. It takes values in the range [0, 100]. */ -static unsigned int __read_mostly sysctl_compaction_proactiveness = 20; +static unsigned int __read_mostly sysctl_compaction_proactiveness; static int sysctl_extfrag_threshold = 500; static int __read_mostly sysctl_compact_memory; diff --git a/mm/internal.h b/mm/internal.h index a7d9e980429a55..c6f48221da1846 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -421,6 +421,7 @@ extern void prep_compound_page(struct page *page, unsigned int order); extern void post_alloc_hook(struct page *page, unsigned int order, gfp_t gfp_flags); extern int user_min_free_kbytes; +extern atomic_long_t kswapd_waiters; extern void free_unref_page(struct page *page, unsigned int order); extern void free_unref_page_list(struct list_head *list); diff --git a/mm/list_lru.c b/mm/list_lru.c index a05e5bef3b4007..0ead8e6651df0c 100644 --- a/mm/list_lru.c +++ b/mm/list_lru.c @@ -178,6 +178,7 @@ EXPORT_SYMBOL_GPL(list_lru_isolate_move); unsigned long list_lru_count_one(struct list_lru *lru, int nid, struct mem_cgroup *memcg) { +#if defined(CONFIG_MEMCG) && !defined(CONFIG_SLOB) struct list_lru_one *l; long count; @@ -190,6 +191,9 @@ unsigned long list_lru_count_one(struct list_lru *lru, count = 0; return count; +#else + return READ_ONCE(lru->node[nid].lru.nr_items); +#endif } EXPORT_SYMBOL_GPL(list_lru_count_one); diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 7d3460c7a480b5..bd2a12f4e04de2 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -204,6 +204,8 @@ EXPORT_SYMBOL(node_states); gfp_t gfp_allowed_mask __read_mostly = GFP_BOOT_MASK; +atomic_long_t kswapd_waiters = ATOMIC_LONG_INIT(0); + /* * A cached value of the page's pageblock's migratetype, used when the page is * put on a pcplist. Used to avoid the pageblock migratetype lookup when @@ -297,7 +299,7 @@ static compound_page_dtor * const compound_page_dtors[NR_COMPOUND_DTORS] = { int min_free_kbytes = 1024; int user_min_free_kbytes = -1; -static int watermark_boost_factor __read_mostly = 15000; +static int watermark_boost_factor __read_mostly; static int watermark_scale_factor = 10; /* movable_zone is the "real" zone pages in ZONE_MOVABLE are taken from */ @@ -2152,16 +2154,17 @@ __rmqueue(struct zone *zone, unsigned int order, int migratetype, } /* - * Obtain a specified number of elements from the buddy allocator, all under - * a single hold of the lock, for efficiency. Add them to the supplied list. - * Returns the number of new pages which were placed at *list. + * Obtain a specified number of elements from the buddy allocator, and relax the + * zone lock when needed. Add them to the supplied list. Returns the number of + * new pages which were placed at *list. */ static int rmqueue_bulk(struct zone *zone, unsigned int order, unsigned long count, struct list_head *list, int migratetype, unsigned int alloc_flags) { + const bool can_resched = !preempt_count() && !irqs_disabled(); unsigned long flags; - int i; + int i, last_mod = 0; spin_lock_irqsave(&zone->lock, flags); for (i = 0; i < count; ++i) { @@ -2170,6 +2173,18 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order, if (unlikely(page == NULL)) break; + /* Reschedule and ease the contention on the lock if needed */ + if (i + 1 < count && ((can_resched && need_resched()) || + spin_needbreak(&zone->lock))) { + __mod_zone_page_state(zone, NR_FREE_PAGES, + -((i + 1 - last_mod) << order)); + last_mod = i + 1; + spin_unlock_irqrestore(&zone->lock, flags); + if (can_resched) + cond_resched(); + spin_lock_irqsave(&zone->lock, flags); + } + /* * Split buddy pages returned by expand() are received here in * physical page order. The page is added to the tail of @@ -2186,7 +2201,7 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order, -(1 << order)); } - __mod_zone_page_state(zone, NR_FREE_PAGES, -(i << order)); + __mod_zone_page_state(zone, NR_FREE_PAGES, -((i - last_mod) << order)); spin_unlock_irqrestore(&zone->lock, flags); return i; @@ -3962,6 +3977,7 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, unsigned int cpuset_mems_cookie; unsigned int zonelist_iter_cookie; int reserve_flags; + bool woke_kswapd = false; restart: compaction_retries = 0; @@ -4001,8 +4017,13 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, goto nopage; } - if (alloc_flags & ALLOC_KSWAPD) + if (alloc_flags & ALLOC_KSWAPD) { + if (!woke_kswapd) { + atomic_long_inc(&kswapd_waiters); + woke_kswapd = true; + } wake_all_kswapds(order, gfp_mask, ac); + } /* * The adjusted alloc_flags might result in immediate success, so try @@ -4217,9 +4238,12 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, goto retry; } fail: - warn_alloc(gfp_mask, ac->nodemask, - "page allocation failure: order:%u", order); got_pg: + if (woke_kswapd) + atomic_long_dec(&kswapd_waiters); + if (!page) + warn_alloc(gfp_mask, ac->nodemask, + "page allocation failure: order:%u", order); return page; } diff --git a/mm/vmscan.c b/mm/vmscan.c index f76aa82682152e..27ad8ce5b74988 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -6901,7 +6901,7 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist, return 0; } -static bool allow_direct_reclaim(pg_data_t *pgdat) +static bool allow_direct_reclaim(pg_data_t *pgdat, bool using_kswapd) { struct zone *zone; unsigned long pfmemalloc_reserve = 0; @@ -6930,6 +6930,10 @@ static bool allow_direct_reclaim(pg_data_t *pgdat) wmark_ok = free_pages > pfmemalloc_reserve / 2; + /* The throttled direct reclaimer is now a kswapd waiter */ + if (unlikely(!using_kswapd && !wmark_ok)) + atomic_long_inc(&kswapd_waiters); + /* kswapd must be awake if processes are being throttled */ if (!wmark_ok && waitqueue_active(&pgdat->kswapd_wait)) { if (READ_ONCE(pgdat->kswapd_highest_zoneidx) > ZONE_NORMAL) @@ -6995,7 +6999,7 @@ static bool throttle_direct_reclaim(gfp_t gfp_mask, struct zonelist *zonelist, /* Throttle based on the first usable node */ pgdat = zone->zone_pgdat; - if (allow_direct_reclaim(pgdat)) + if (allow_direct_reclaim(pgdat, gfp_mask & __GFP_KSWAPD_RECLAIM)) goto out; break; } @@ -7017,11 +7021,14 @@ static bool throttle_direct_reclaim(gfp_t gfp_mask, struct zonelist *zonelist, */ if (!(gfp_mask & __GFP_FS)) wait_event_interruptible_timeout(pgdat->pfmemalloc_wait, - allow_direct_reclaim(pgdat), HZ); + allow_direct_reclaim(pgdat, true), HZ); else /* Throttle until kswapd wakes the process */ wait_event_killable(zone->zone_pgdat->pfmemalloc_wait, - allow_direct_reclaim(pgdat)); + allow_direct_reclaim(pgdat, true)); + + if (unlikely(!(gfp_mask & __GFP_KSWAPD_RECLAIM))) + atomic_long_dec(&kswapd_waiters); if (fatal_signal_pending(current)) return true; @@ -7519,14 +7526,15 @@ static int balance_pgdat(pg_data_t *pgdat, int order, int highest_zoneidx) * able to safely make forward progress. Wake them */ if (waitqueue_active(&pgdat->pfmemalloc_wait) && - allow_direct_reclaim(pgdat)) + allow_direct_reclaim(pgdat, true)) wake_up_all(&pgdat->pfmemalloc_wait); /* Check if kswapd should be suspending */ __fs_reclaim_release(_THIS_IP_); ret = try_to_freeze(); __fs_reclaim_acquire(_THIS_IP_); - if (ret || kthread_should_stop()) + if (ret || kthread_should_stop() || + !atomic_long_read(&kswapd_waiters)) break; /*