Skip to content

Commit 14b4687

Browse files
hnaztorvalds
authored andcommitted
mm: workingset: move shadow entry tracking to radix tree exceptional tracking
Currently, we track the shadow entries in the page cache in the upper bits of the radix_tree_node->count, behind the back of the radix tree implementation. Because the radix tree code has no awareness of them, we rely on random subtleties throughout the implementation (such as the node->count != 1 check in the shrinking code, which is meant to exclude multi-entry nodes but also happens to skip nodes with only one shadow entry, as that's accounted in the upper bits). This is error prone and has, in fact, caused the bug fixed in d3798ae ("mm: filemap: don't plant shadow entries without radix tree node"). To remove these subtleties, this patch moves shadow entry tracking from the upper bits of node->count to the existing counter for exceptional entries. node->count goes back to being a simple counter of valid entries in the tree node and can be shrunk to a single byte. This vastly simplifies the page cache code. All accounting happens natively inside the radix tree implementation, and maintaining the LRU linkage of shadow nodes is consolidated into a single function in the workingset code that is called for leaf nodes affected by a change in the page cache tree. This also removes the last user of the __radix_delete_node() return value. Eliminate it. Link: http://lkml.kernel.org/r/20161117193211.GE23430@cmpxchg.org Signed-off-by: Johannes Weiner <hannes@cmpxchg.org> Reviewed-by: Jan Kara <jack@suse.cz> Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com> Cc: Hugh Dickins <hughd@google.com> Cc: Matthew Wilcox <mawilcox@linuxonhyperv.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
1 parent 4d693d0 commit 14b4687

File tree

6 files changed

+60
-138
lines changed

6 files changed

+60
-138
lines changed

include/linux/radix-tree.h

+2-6
Original file line numberDiff line numberDiff line change
@@ -80,14 +80,10 @@ static inline bool radix_tree_is_internal_node(void *ptr)
8080
#define RADIX_TREE_MAX_PATH (DIV_ROUND_UP(RADIX_TREE_INDEX_BITS, \
8181
RADIX_TREE_MAP_SHIFT))
8282

83-
/* Internally used bits of node->count */
84-
#define RADIX_TREE_COUNT_SHIFT (RADIX_TREE_MAP_SHIFT + 1)
85-
#define RADIX_TREE_COUNT_MASK ((1UL << RADIX_TREE_COUNT_SHIFT) - 1)
86-
8783
struct radix_tree_node {
8884
unsigned char shift; /* Bits remaining in each slot */
8985
unsigned char offset; /* Slot offset in parent */
90-
unsigned int count; /* Total entry count */
86+
unsigned char count; /* Total entry count */
9187
unsigned char exceptional; /* Exceptional entry count */
9288
union {
9389
struct {
@@ -270,7 +266,7 @@ void __radix_tree_replace(struct radix_tree_root *root,
270266
radix_tree_update_node_t update_node, void *private);
271267
void radix_tree_replace_slot(struct radix_tree_root *root,
272268
void **slot, void *item);
273-
bool __radix_tree_delete_node(struct radix_tree_root *root,
269+
void __radix_tree_delete_node(struct radix_tree_root *root,
274270
struct radix_tree_node *node);
275271
void *radix_tree_delete_item(struct radix_tree_root *, unsigned long, void *);
276272
void *radix_tree_delete(struct radix_tree_root *, unsigned long);

include/linux/swap.h

+1-33
Original file line numberDiff line numberDiff line change
@@ -246,39 +246,7 @@ struct swap_info_struct {
246246
void *workingset_eviction(struct address_space *mapping, struct page *page);
247247
bool workingset_refault(void *shadow);
248248
void workingset_activation(struct page *page);
249-
extern struct list_lru workingset_shadow_nodes;
250-
251-
static inline unsigned int workingset_node_pages(struct radix_tree_node *node)
252-
{
253-
return node->count & RADIX_TREE_COUNT_MASK;
254-
}
255-
256-
static inline void workingset_node_pages_inc(struct radix_tree_node *node)
257-
{
258-
node->count++;
259-
}
260-
261-
static inline void workingset_node_pages_dec(struct radix_tree_node *node)
262-
{
263-
VM_WARN_ON_ONCE(!workingset_node_pages(node));
264-
node->count--;
265-
}
266-
267-
static inline unsigned int workingset_node_shadows(struct radix_tree_node *node)
268-
{
269-
return node->count >> RADIX_TREE_COUNT_SHIFT;
270-
}
271-
272-
static inline void workingset_node_shadows_inc(struct radix_tree_node *node)
273-
{
274-
node->count += 1U << RADIX_TREE_COUNT_SHIFT;
275-
}
276-
277-
static inline void workingset_node_shadows_dec(struct radix_tree_node *node)
278-
{
279-
VM_WARN_ON_ONCE(!workingset_node_shadows(node));
280-
node->count -= 1U << RADIX_TREE_COUNT_SHIFT;
281-
}
249+
void workingset_update_node(struct radix_tree_node *node, void *private);
282250

283251
/* linux/mm/page_alloc.c */
284252
extern unsigned long totalram_pages;

lib/radix-tree.c

+6-19
Original file line numberDiff line numberDiff line change
@@ -541,12 +541,10 @@ static int radix_tree_extend(struct radix_tree_root *root,
541541
* radix_tree_shrink - shrink radix tree to minimum height
542542
* @root radix tree root
543543
*/
544-
static inline bool radix_tree_shrink(struct radix_tree_root *root,
544+
static inline void radix_tree_shrink(struct radix_tree_root *root,
545545
radix_tree_update_node_t update_node,
546546
void *private)
547547
{
548-
bool shrunk = false;
549-
550548
for (;;) {
551549
struct radix_tree_node *node = root->rnode;
552550
struct radix_tree_node *child;
@@ -606,26 +604,20 @@ static inline bool radix_tree_shrink(struct radix_tree_root *root,
606604
}
607605

608606
radix_tree_node_free(node);
609-
shrunk = true;
610607
}
611-
612-
return shrunk;
613608
}
614609

615-
static bool delete_node(struct radix_tree_root *root,
610+
static void delete_node(struct radix_tree_root *root,
616611
struct radix_tree_node *node,
617612
radix_tree_update_node_t update_node, void *private)
618613
{
619-
bool deleted = false;
620-
621614
do {
622615
struct radix_tree_node *parent;
623616

624617
if (node->count) {
625618
if (node == entry_to_node(root->rnode))
626-
deleted |= radix_tree_shrink(root, update_node,
627-
private);
628-
return deleted;
619+
radix_tree_shrink(root, update_node, private);
620+
return;
629621
}
630622

631623
parent = node->parent;
@@ -638,12 +630,9 @@ static bool delete_node(struct radix_tree_root *root,
638630
}
639631

640632
radix_tree_node_free(node);
641-
deleted = true;
642633

643634
node = parent;
644635
} while (node);
645-
646-
return deleted;
647636
}
648637

649638
/**
@@ -1595,13 +1584,11 @@ unsigned long radix_tree_locate_item(struct radix_tree_root *root, void *item)
15951584
* After clearing the slot at @index in @node from radix tree
15961585
* rooted at @root, call this function to attempt freeing the
15971586
* node and shrinking the tree.
1598-
*
1599-
* Returns %true if @node was freed, %false otherwise.
16001587
*/
1601-
bool __radix_tree_delete_node(struct radix_tree_root *root,
1588+
void __radix_tree_delete_node(struct radix_tree_root *root,
16021589
struct radix_tree_node *node)
16031590
{
1604-
return delete_node(root, node, NULL, NULL);
1591+
delete_node(root, node, NULL, NULL);
16051592
}
16061593

16071594
static inline void delete_sibling_entries(struct radix_tree_node *node,

mm/filemap.c

+5-49
Original file line numberDiff line numberDiff line change
@@ -132,37 +132,19 @@ static int page_cache_tree_insert(struct address_space *mapping,
132132
if (!dax_mapping(mapping)) {
133133
if (shadowp)
134134
*shadowp = p;
135-
if (node)
136-
workingset_node_shadows_dec(node);
137135
} else {
138136
/* DAX can replace empty locked entry with a hole */
139137
WARN_ON_ONCE(p !=
140138
(void *)(RADIX_TREE_EXCEPTIONAL_ENTRY |
141139
RADIX_DAX_ENTRY_LOCK));
142-
/* DAX accounts exceptional entries as normal pages */
143-
if (node)
144-
workingset_node_pages_dec(node);
145140
/* Wakeup waiters for exceptional entry lock */
146141
dax_wake_mapping_entry_waiter(mapping, page->index,
147142
false);
148143
}
149144
}
150-
radix_tree_replace_slot(&mapping->page_tree, slot, page);
145+
__radix_tree_replace(&mapping->page_tree, node, slot, page,
146+
workingset_update_node, mapping);
151147
mapping->nrpages++;
152-
if (node) {
153-
workingset_node_pages_inc(node);
154-
/*
155-
* Don't track node that contains actual pages.
156-
*
157-
* Avoid acquiring the list_lru lock if already
158-
* untracked. The list_empty() test is safe as
159-
* node->private_list is protected by
160-
* mapping->tree_lock.
161-
*/
162-
if (!list_empty(&node->private_list))
163-
list_lru_del(&workingset_shadow_nodes,
164-
&node->private_list);
165-
}
166148
return 0;
167149
}
168150

@@ -185,8 +167,6 @@ static void page_cache_tree_delete(struct address_space *mapping,
185167
__radix_tree_lookup(&mapping->page_tree, page->index + i,
186168
&node, &slot);
187169

188-
radix_tree_clear_tags(&mapping->page_tree, node, slot);
189-
190170
if (!node) {
191171
VM_BUG_ON_PAGE(nr != 1, page);
192172
/*
@@ -196,33 +176,9 @@ static void page_cache_tree_delete(struct address_space *mapping,
196176
shadow = NULL;
197177
}
198178

199-
radix_tree_replace_slot(&mapping->page_tree, slot, shadow);
200-
201-
if (!node)
202-
break;
203-
204-
workingset_node_pages_dec(node);
205-
if (shadow)
206-
workingset_node_shadows_inc(node);
207-
else
208-
if (__radix_tree_delete_node(&mapping->page_tree, node))
209-
continue;
210-
211-
/*
212-
* Track node that only contains shadow entries. DAX mappings
213-
* contain no shadow entries and may contain other exceptional
214-
* entries so skip those.
215-
*
216-
* Avoid acquiring the list_lru lock if already tracked.
217-
* The list_empty() test is safe as node->private_list is
218-
* protected by mapping->tree_lock.
219-
*/
220-
if (!dax_mapping(mapping) && !workingset_node_pages(node) &&
221-
list_empty(&node->private_list)) {
222-
node->private_data = mapping;
223-
list_lru_add(&workingset_shadow_nodes,
224-
&node->private_list);
225-
}
179+
radix_tree_clear_tags(&mapping->page_tree, node, slot);
180+
__radix_tree_replace(&mapping->page_tree, node, slot, shadow,
181+
workingset_update_node, mapping);
226182
}
227183

228184
if (shadow) {

mm/truncate.c

+3-18
Original file line numberDiff line numberDiff line change
@@ -44,28 +44,13 @@ static void clear_exceptional_entry(struct address_space *mapping,
4444
* without the tree itself locked. These unlocked entries
4545
* need verification under the tree lock.
4646
*/
47-
if (!__radix_tree_lookup(&mapping->page_tree, index, &node,
48-
&slot))
47+
if (!__radix_tree_lookup(&mapping->page_tree, index, &node, &slot))
4948
goto unlock;
5049
if (*slot != entry)
5150
goto unlock;
52-
radix_tree_replace_slot(&mapping->page_tree, slot, NULL);
51+
__radix_tree_replace(&mapping->page_tree, node, slot, NULL,
52+
workingset_update_node, mapping);
5353
mapping->nrexceptional--;
54-
if (!node)
55-
goto unlock;
56-
workingset_node_shadows_dec(node);
57-
/*
58-
* Don't track node without shadow entries.
59-
*
60-
* Avoid acquiring the list_lru lock if already untracked.
61-
* The list_empty() test is safe as node->private_list is
62-
* protected by mapping->tree_lock.
63-
*/
64-
if (!workingset_node_shadows(node) &&
65-
!list_empty(&node->private_list))
66-
list_lru_del(&workingset_shadow_nodes,
67-
&node->private_list);
68-
__radix_tree_delete_node(&mapping->page_tree, node);
6954
unlock:
7055
spin_unlock_irq(&mapping->tree_lock);
7156
}

mm/workingset.c

+43-13
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
#include <linux/atomic.h>
1111
#include <linux/module.h>
1212
#include <linux/swap.h>
13+
#include <linux/dax.h>
1314
#include <linux/fs.h>
1415
#include <linux/mm.h>
1516

@@ -334,18 +335,45 @@ void workingset_activation(struct page *page)
334335
* point where they would still be useful.
335336
*/
336337

337-
struct list_lru workingset_shadow_nodes;
338+
static struct list_lru shadow_nodes;
339+
340+
void workingset_update_node(struct radix_tree_node *node, void *private)
341+
{
342+
struct address_space *mapping = private;
343+
344+
/* Only regular page cache has shadow entries */
345+
if (dax_mapping(mapping) || shmem_mapping(mapping))
346+
return;
347+
348+
/*
349+
* Track non-empty nodes that contain only shadow entries;
350+
* unlink those that contain pages or are being freed.
351+
*
352+
* Avoid acquiring the list_lru lock when the nodes are
353+
* already where they should be. The list_empty() test is safe
354+
* as node->private_list is protected by &mapping->tree_lock.
355+
*/
356+
if (node->count && node->count == node->exceptional) {
357+
if (list_empty(&node->private_list)) {
358+
node->private_data = mapping;
359+
list_lru_add(&shadow_nodes, &node->private_list);
360+
}
361+
} else {
362+
if (!list_empty(&node->private_list))
363+
list_lru_del(&shadow_nodes, &node->private_list);
364+
}
365+
}
338366

339367
static unsigned long count_shadow_nodes(struct shrinker *shrinker,
340368
struct shrink_control *sc)
341369
{
342-
unsigned long shadow_nodes;
343370
unsigned long max_nodes;
371+
unsigned long nodes;
344372
unsigned long pages;
345373

346374
/* list_lru lock nests inside IRQ-safe mapping->tree_lock */
347375
local_irq_disable();
348-
shadow_nodes = list_lru_shrink_count(&workingset_shadow_nodes, sc);
376+
nodes = list_lru_shrink_count(&shadow_nodes, sc);
349377
local_irq_enable();
350378

351379
if (sc->memcg) {
@@ -372,10 +400,10 @@ static unsigned long count_shadow_nodes(struct shrinker *shrinker,
372400
*/
373401
max_nodes = pages >> (1 + RADIX_TREE_MAP_SHIFT - 3);
374402

375-
if (shadow_nodes <= max_nodes)
403+
if (nodes <= max_nodes)
376404
return 0;
377405

378-
return shadow_nodes - max_nodes;
406+
return nodes - max_nodes;
379407
}
380408

381409
static enum lru_status shadow_lru_isolate(struct list_head *item,
@@ -418,22 +446,25 @@ static enum lru_status shadow_lru_isolate(struct list_head *item,
418446
* no pages, so we expect to be able to remove them all and
419447
* delete and free the empty node afterwards.
420448
*/
421-
if (WARN_ON_ONCE(!workingset_node_shadows(node)))
449+
if (WARN_ON_ONCE(!node->exceptional))
422450
goto out_invalid;
423-
if (WARN_ON_ONCE(workingset_node_pages(node)))
451+
if (WARN_ON_ONCE(node->count != node->exceptional))
424452
goto out_invalid;
425453
for (i = 0; i < RADIX_TREE_MAP_SIZE; i++) {
426454
if (node->slots[i]) {
427455
if (WARN_ON_ONCE(!radix_tree_exceptional_entry(node->slots[i])))
428456
goto out_invalid;
457+
if (WARN_ON_ONCE(!node->exceptional))
458+
goto out_invalid;
429459
if (WARN_ON_ONCE(!mapping->nrexceptional))
430460
goto out_invalid;
431461
node->slots[i] = NULL;
432-
workingset_node_shadows_dec(node);
462+
node->exceptional--;
463+
node->count--;
433464
mapping->nrexceptional--;
434465
}
435466
}
436-
if (WARN_ON_ONCE(workingset_node_shadows(node)))
467+
if (WARN_ON_ONCE(node->exceptional))
437468
goto out_invalid;
438469
inc_node_state(page_pgdat(virt_to_page(node)), WORKINGSET_NODERECLAIM);
439470
__radix_tree_delete_node(&mapping->page_tree, node);
@@ -456,8 +487,7 @@ static unsigned long scan_shadow_nodes(struct shrinker *shrinker,
456487

457488
/* list_lru lock nests inside IRQ-safe mapping->tree_lock */
458489
local_irq_disable();
459-
ret = list_lru_shrink_walk(&workingset_shadow_nodes, sc,
460-
shadow_lru_isolate, NULL);
490+
ret = list_lru_shrink_walk(&shadow_nodes, sc, shadow_lru_isolate, NULL);
461491
local_irq_enable();
462492
return ret;
463493
}
@@ -496,15 +526,15 @@ static int __init workingset_init(void)
496526
pr_info("workingset: timestamp_bits=%d max_order=%d bucket_order=%u\n",
497527
timestamp_bits, max_order, bucket_order);
498528

499-
ret = list_lru_init_key(&workingset_shadow_nodes, &shadow_nodes_key);
529+
ret = list_lru_init_key(&shadow_nodes, &shadow_nodes_key);
500530
if (ret)
501531
goto err;
502532
ret = register_shrinker(&workingset_shadow_shrinker);
503533
if (ret)
504534
goto err_list_lru;
505535
return 0;
506536
err_list_lru:
507-
list_lru_destroy(&workingset_shadow_nodes);
537+
list_lru_destroy(&shadow_nodes);
508538
err:
509539
return ret;
510540
}

0 commit comments

Comments
 (0)