Skip to content

Commit 22f2ac5

Browse files
hnaztorvalds
authored andcommitted
mm: workingset: fix crash in shadow node shrinker caused by replace_page_cache_page()
Antonio reports the following crash when using fuse under memory pressure: kernel BUG at /build/linux-a2WvEb/linux-4.4.0/mm/workingset.c:346! invalid opcode: 0000 [hardkernel#1] SMP Modules linked in: all of them CPU: 2 PID: 63 Comm: kswapd0 Not tainted 4.4.0-36-generic hardkernel#55-Ubuntu Hardware name: System manufacturer System Product Name/P8H67-M PRO, BIOS 3904 04/27/2013 task: ffff88040cae6040 ti: ffff880407488000 task.ti: ffff880407488000 RIP: shadow_lru_isolate+0x181/0x190 Call Trace: __list_lru_walk_one.isra.3+0x8f/0x130 list_lru_walk_one+0x23/0x30 scan_shadow_nodes+0x34/0x50 shrink_slab.part.40+0x1ed/0x3d0 shrink_zone+0x2ca/0x2e0 kswapd+0x51e/0x990 kthread+0xd8/0xf0 ret_from_fork+0x3f/0x70 which corresponds to the following sanity check in the shadow node tracking: BUG_ON(node->count & RADIX_TREE_COUNT_MASK); The workingset code tracks radix tree nodes that exclusively contain shadow entries of evicted pages in them, and this (somewhat obscure) line checks whether there are real pages left that would interfere with reclaim of the radix tree node under memory pressure. While discussing ways how fuse might sneak pages into the radix tree past the workingset code, Miklos pointed to replace_page_cache_page(), and indeed there is a problem there: it properly accounts for the old page being removed - __delete_from_page_cache() does that - but then does a raw raw radix_tree_insert(), not accounting for the replacement page. Eventually the page count bits in node->count underflow while leaving the node incorrectly linked to the shadow node LRU. To address this, make sure replace_page_cache_page() uses the tracked page insertion code, page_cache_tree_insert(). This fixes the page accounting and makes sure page-containing nodes are properly unlinked from the shadow node LRU again. Also, make the sanity checks a bit less obscure by using the helpers for checking the number of pages and shadows in a radix tree node. Fixes: 449dd69 ("mm: keep page cache radix tree nodes in check") Link: http://lkml.kernel.org/r/[email protected] Signed-off-by: Johannes Weiner <[email protected]> Reported-by: Antonio SJ Musumeci <[email protected]> Debugged-by: Miklos Szeredi <[email protected]> Cc: <[email protected]> [3.15+] Signed-off-by: Andrew Morton <[email protected]> Signed-off-by: Linus Torvalds <[email protected]>
1 parent e3b3656 commit 22f2ac5

File tree

3 files changed

+63
-63
lines changed

3 files changed

+63
-63
lines changed

include/linux/swap.h

+2
Original file line numberDiff line numberDiff line change
@@ -257,6 +257,7 @@ static inline void workingset_node_pages_inc(struct radix_tree_node *node)
257257

258258
static inline void workingset_node_pages_dec(struct radix_tree_node *node)
259259
{
260+
VM_BUG_ON(!workingset_node_pages(node));
260261
node->count--;
261262
}
262263

@@ -272,6 +273,7 @@ static inline void workingset_node_shadows_inc(struct radix_tree_node *node)
272273

273274
static inline void workingset_node_shadows_dec(struct radix_tree_node *node)
274275
{
276+
VM_BUG_ON(!workingset_node_shadows(node));
275277
node->count -= 1U << RADIX_TREE_COUNT_SHIFT;
276278
}
277279

mm/filemap.c

+57-57
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,62 @@
110110
* ->tasklist_lock (memory_failure, collect_procs_ao)
111111
*/
112112

113+
static int page_cache_tree_insert(struct address_space *mapping,
114+
struct page *page, void **shadowp)
115+
{
116+
struct radix_tree_node *node;
117+
void **slot;
118+
int error;
119+
120+
error = __radix_tree_create(&mapping->page_tree, page->index, 0,
121+
&node, &slot);
122+
if (error)
123+
return error;
124+
if (*slot) {
125+
void *p;
126+
127+
p = radix_tree_deref_slot_protected(slot, &mapping->tree_lock);
128+
if (!radix_tree_exceptional_entry(p))
129+
return -EEXIST;
130+
131+
mapping->nrexceptional--;
132+
if (!dax_mapping(mapping)) {
133+
if (shadowp)
134+
*shadowp = p;
135+
if (node)
136+
workingset_node_shadows_dec(node);
137+
} else {
138+
/* DAX can replace empty locked entry with a hole */
139+
WARN_ON_ONCE(p !=
140+
(void *)(RADIX_TREE_EXCEPTIONAL_ENTRY |
141+
RADIX_DAX_ENTRY_LOCK));
142+
/* DAX accounts exceptional entries as normal pages */
143+
if (node)
144+
workingset_node_pages_dec(node);
145+
/* Wakeup waiters for exceptional entry lock */
146+
dax_wake_mapping_entry_waiter(mapping, page->index,
147+
false);
148+
}
149+
}
150+
radix_tree_replace_slot(slot, page);
151+
mapping->nrpages++;
152+
if (node) {
153+
workingset_node_pages_inc(node);
154+
/*
155+
* Don't track node that contains actual pages.
156+
*
157+
* Avoid acquiring the list_lru lock if already
158+
* untracked. The list_empty() test is safe as
159+
* node->private_list is protected by
160+
* mapping->tree_lock.
161+
*/
162+
if (!list_empty(&node->private_list))
163+
list_lru_del(&workingset_shadow_nodes,
164+
&node->private_list);
165+
}
166+
return 0;
167+
}
168+
113169
static void page_cache_tree_delete(struct address_space *mapping,
114170
struct page *page, void *shadow)
115171
{
@@ -561,7 +617,7 @@ int replace_page_cache_page(struct page *old, struct page *new, gfp_t gfp_mask)
561617

562618
spin_lock_irqsave(&mapping->tree_lock, flags);
563619
__delete_from_page_cache(old, NULL);
564-
error = radix_tree_insert(&mapping->page_tree, offset, new);
620+
error = page_cache_tree_insert(mapping, new, NULL);
565621
BUG_ON(error);
566622
mapping->nrpages++;
567623

@@ -584,62 +640,6 @@ int replace_page_cache_page(struct page *old, struct page *new, gfp_t gfp_mask)
584640
}
585641
EXPORT_SYMBOL_GPL(replace_page_cache_page);
586642

587-
static int page_cache_tree_insert(struct address_space *mapping,
588-
struct page *page, void **shadowp)
589-
{
590-
struct radix_tree_node *node;
591-
void **slot;
592-
int error;
593-
594-
error = __radix_tree_create(&mapping->page_tree, page->index, 0,
595-
&node, &slot);
596-
if (error)
597-
return error;
598-
if (*slot) {
599-
void *p;
600-
601-
p = radix_tree_deref_slot_protected(slot, &mapping->tree_lock);
602-
if (!radix_tree_exceptional_entry(p))
603-
return -EEXIST;
604-
605-
mapping->nrexceptional--;
606-
if (!dax_mapping(mapping)) {
607-
if (shadowp)
608-
*shadowp = p;
609-
if (node)
610-
workingset_node_shadows_dec(node);
611-
} else {
612-
/* DAX can replace empty locked entry with a hole */
613-
WARN_ON_ONCE(p !=
614-
(void *)(RADIX_TREE_EXCEPTIONAL_ENTRY |
615-
RADIX_DAX_ENTRY_LOCK));
616-
/* DAX accounts exceptional entries as normal pages */
617-
if (node)
618-
workingset_node_pages_dec(node);
619-
/* Wakeup waiters for exceptional entry lock */
620-
dax_wake_mapping_entry_waiter(mapping, page->index,
621-
false);
622-
}
623-
}
624-
radix_tree_replace_slot(slot, page);
625-
mapping->nrpages++;
626-
if (node) {
627-
workingset_node_pages_inc(node);
628-
/*
629-
* Don't track node that contains actual pages.
630-
*
631-
* Avoid acquiring the list_lru lock if already
632-
* untracked. The list_empty() test is safe as
633-
* node->private_list is protected by
634-
* mapping->tree_lock.
635-
*/
636-
if (!list_empty(&node->private_list))
637-
list_lru_del(&workingset_shadow_nodes,
638-
&node->private_list);
639-
}
640-
return 0;
641-
}
642-
643643
static int __add_to_page_cache_locked(struct page *page,
644644
struct address_space *mapping,
645645
pgoff_t offset, gfp_t gfp_mask,

mm/workingset.c

+4-6
Original file line numberDiff line numberDiff line change
@@ -418,21 +418,19 @@ static enum lru_status shadow_lru_isolate(struct list_head *item,
418418
* no pages, so we expect to be able to remove them all and
419419
* delete and free the empty node afterwards.
420420
*/
421-
422-
BUG_ON(!node->count);
423-
BUG_ON(node->count & RADIX_TREE_COUNT_MASK);
421+
BUG_ON(!workingset_node_shadows(node));
422+
BUG_ON(workingset_node_pages(node));
424423

425424
for (i = 0; i < RADIX_TREE_MAP_SIZE; i++) {
426425
if (node->slots[i]) {
427426
BUG_ON(!radix_tree_exceptional_entry(node->slots[i]));
428427
node->slots[i] = NULL;
429-
BUG_ON(node->count < (1U << RADIX_TREE_COUNT_SHIFT));
430-
node->count -= 1U << RADIX_TREE_COUNT_SHIFT;
428+
workingset_node_shadows_dec(node);
431429
BUG_ON(!mapping->nrexceptional);
432430
mapping->nrexceptional--;
433431
}
434432
}
435-
BUG_ON(node->count);
433+
BUG_ON(workingset_node_shadows(node));
436434
inc_node_state(page_pgdat(virt_to_page(node)), WORKINGSET_NODERECLAIM);
437435
if (!__radix_tree_delete_node(&mapping->page_tree, node))
438436
BUG();

0 commit comments

Comments
 (0)