Skip to content

Commit

Permalink
mm: add PageWaiters indicating tasks are waiting for a page bit
Browse files Browse the repository at this point in the history
Add a new page flag, PageWaiters, to indicate the page waitqueue has
tasks waiting. This can be tested rather than testing waitqueue_active
which requires another cacheline load.

This bit is always set when the page has tasks on page_waitqueue(page),
and is set and cleared under the waitqueue lock. It may be set when
there are no tasks on the waitqueue, which will cause a harmless extra
wakeup check that will clears the bit.

The generic bit-waitqueue infrastructure is no longer used for pages.
Instead, waitqueues are used directly with a custom key type. The
generic code was not flexible enough to have PageWaiters manipulation
under the waitqueue lock (which simplifies concurrency).

This improves the performance of page lock intensive microbenchmarks by
2-3%.

Putting two bits in the same word opens the opportunity to remove the
memory barrier between clearing the lock bit and testing the waiters
bit, after some work on the arch primitives (e.g., ensuring memory
operand widths match and cover both bits).

Signed-off-by: Nicholas Piggin <[email protected]>
Cc: Dave Hansen <[email protected]>
Cc: Bob Peterson <[email protected]>
Cc: Steven Whitehouse <[email protected]>
Cc: Andrew Lutomirski <[email protected]>
Cc: Andreas Gruenbacher <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Mel Gorman <[email protected]>
Signed-off-by: Linus Torvalds <[email protected]>
  • Loading branch information
npiggin authored and torvalds committed Dec 25, 2016
1 parent 6326fec commit 6290602
Show file tree
Hide file tree
Showing 9 changed files with 174 additions and 50 deletions.
2 changes: 2 additions & 0 deletions include/linux/mm.h
Original file line number Diff line number Diff line change
Expand Up @@ -1758,6 +1758,8 @@ static inline spinlock_t *pmd_lock(struct mm_struct *mm, pmd_t *pmd)
return ptl;
}

extern void __init pagecache_init(void);

extern void free_area_init(unsigned long * zones_size);
extern void free_area_init_node(int nid, unsigned long * zones_size,
unsigned long zone_start_pfn, unsigned long *zholes_size);
Expand Down
9 changes: 9 additions & 0 deletions include/linux/page-flags.h
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@
*/
enum pageflags {
PG_locked, /* Page is locked. Don't touch. */
PG_waiters, /* Page has waiters, check its waitqueue */
PG_error,
PG_referenced,
PG_uptodate,
Expand Down Expand Up @@ -169,6 +170,9 @@ static __always_inline int PageCompound(struct page *page)
* for compound page all operations related to the page flag applied to
* head page.
*
* PF_ONLY_HEAD:
* for compound page, callers only ever operate on the head page.
*
* PF_NO_TAIL:
* modifications of the page flag must be done on small or head pages,
* checks can be done on tail pages too.
Expand All @@ -178,6 +182,9 @@ static __always_inline int PageCompound(struct page *page)
*/
#define PF_ANY(page, enforce) page
#define PF_HEAD(page, enforce) compound_head(page)
#define PF_ONLY_HEAD(page, enforce) ({ \
VM_BUG_ON_PGFLAGS(PageTail(page), page); \
page;})
#define PF_NO_TAIL(page, enforce) ({ \
VM_BUG_ON_PGFLAGS(enforce && PageTail(page), page); \
compound_head(page);})
Expand Down Expand Up @@ -255,6 +262,7 @@ static inline int TestClearPage##uname(struct page *page) { return 0; }
TESTSETFLAG_FALSE(uname) TESTCLEARFLAG_FALSE(uname)

__PAGEFLAG(Locked, locked, PF_NO_TAIL)
PAGEFLAG(Waiters, waiters, PF_ONLY_HEAD) __CLEARPAGEFLAG(Waiters, waiters, PF_ONLY_HEAD)
PAGEFLAG(Error, error, PF_NO_COMPOUND) TESTCLEARFLAG(Error, error, PF_NO_COMPOUND)
PAGEFLAG(Referenced, referenced, PF_HEAD)
TESTCLEARFLAG(Referenced, referenced, PF_HEAD)
Expand Down Expand Up @@ -743,6 +751,7 @@ static inline int page_has_private(struct page *page)

#undef PF_ANY
#undef PF_HEAD
#undef PF_ONLY_HEAD
#undef PF_NO_TAIL
#undef PF_NO_COMPOUND
#endif /* !__GENERATING_BOUNDS_H */
Expand Down
23 changes: 11 additions & 12 deletions include/linux/pagemap.h
Original file line number Diff line number Diff line change
Expand Up @@ -486,22 +486,14 @@ static inline int lock_page_or_retry(struct page *page, struct mm_struct *mm,
* and for filesystems which need to wait on PG_private.
*/
extern void wait_on_page_bit(struct page *page, int bit_nr);

extern int wait_on_page_bit_killable(struct page *page, int bit_nr);
extern int wait_on_page_bit_killable_timeout(struct page *page,
int bit_nr, unsigned long timeout);

static inline int wait_on_page_locked_killable(struct page *page)
{
if (!PageLocked(page))
return 0;
return wait_on_page_bit_killable(compound_head(page), PG_locked);
}
extern void wake_up_page_bit(struct page *page, int bit_nr);

extern wait_queue_head_t *page_waitqueue(struct page *page);
static inline void wake_up_page(struct page *page, int bit)
{
__wake_up_bit(page_waitqueue(page), &page->flags, bit);
if (!PageWaiters(page))
return;
wake_up_page_bit(page, bit);
}

/*
Expand All @@ -517,6 +509,13 @@ static inline void wait_on_page_locked(struct page *page)
wait_on_page_bit(compound_head(page), PG_locked);
}

static inline int wait_on_page_locked_killable(struct page *page)
{
if (!PageLocked(page))
return 0;
return wait_on_page_bit_killable(compound_head(page), PG_locked);
}

/*
* Wait for a page to complete writeback
*/
Expand Down
1 change: 0 additions & 1 deletion include/linux/writeback.h
Original file line number Diff line number Diff line change
Expand Up @@ -375,7 +375,6 @@ void global_dirty_limits(unsigned long *pbackground, unsigned long *pdirty);
unsigned long wb_calc_thresh(struct bdi_writeback *wb, unsigned long thresh);

void wb_update_bandwidth(struct bdi_writeback *wb, unsigned long start_time);
void page_writeback_init(void);
void balance_dirty_pages_ratelimited(struct address_space *mapping);
bool wb_over_bg_thresh(struct bdi_writeback *wb);

Expand Down
1 change: 1 addition & 0 deletions include/trace/events/mmflags.h
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@

#define __def_pageflag_names \
{1UL << PG_locked, "locked" }, \
{1UL << PG_waiters, "waiters" }, \
{1UL << PG_error, "error" }, \
{1UL << PG_referenced, "referenced" }, \
{1UL << PG_uptodate, "uptodate" }, \
Expand Down
3 changes: 1 addition & 2 deletions init/main.c
Original file line number Diff line number Diff line change
Expand Up @@ -647,9 +647,8 @@ asmlinkage __visible void __init start_kernel(void)
security_init();
dbg_late_init();
vfs_caches_init();
pagecache_init();
signals_init();
/* rootfs populating might need page-writeback */
page_writeback_init();
proc_root_init();
nsfs_init();
cpuset_init();
Expand Down
181 changes: 146 additions & 35 deletions mm/filemap.c
Original file line number Diff line number Diff line change
Expand Up @@ -739,45 +739,159 @@ EXPORT_SYMBOL(__page_cache_alloc);
* at a cost of "thundering herd" phenomena during rare hash
* collisions.
*/
wait_queue_head_t *page_waitqueue(struct page *page)
#define PAGE_WAIT_TABLE_BITS 8
#define PAGE_WAIT_TABLE_SIZE (1 << PAGE_WAIT_TABLE_BITS)
static wait_queue_head_t page_wait_table[PAGE_WAIT_TABLE_SIZE] __cacheline_aligned;

static wait_queue_head_t *page_waitqueue(struct page *page)
{
return bit_waitqueue(page, 0);
return &page_wait_table[hash_ptr(page, PAGE_WAIT_TABLE_BITS)];
}
EXPORT_SYMBOL(page_waitqueue);

void wait_on_page_bit(struct page *page, int bit_nr)
void __init pagecache_init(void)
{
DEFINE_WAIT_BIT(wait, &page->flags, bit_nr);
int i;

if (test_bit(bit_nr, &page->flags))
__wait_on_bit(page_waitqueue(page), &wait, bit_wait_io,
TASK_UNINTERRUPTIBLE);
for (i = 0; i < PAGE_WAIT_TABLE_SIZE; i++)
init_waitqueue_head(&page_wait_table[i]);

page_writeback_init();
}
EXPORT_SYMBOL(wait_on_page_bit);

int wait_on_page_bit_killable(struct page *page, int bit_nr)
struct wait_page_key {
struct page *page;
int bit_nr;
int page_match;
};

struct wait_page_queue {
struct page *page;
int bit_nr;
wait_queue_t wait;
};

static int wake_page_function(wait_queue_t *wait, unsigned mode, int sync, void *arg)
{
DEFINE_WAIT_BIT(wait, &page->flags, bit_nr);
struct wait_page_key *key = arg;
struct wait_page_queue *wait_page
= container_of(wait, struct wait_page_queue, wait);

if (wait_page->page != key->page)
return 0;
key->page_match = 1;

if (!test_bit(bit_nr, &page->flags))
if (wait_page->bit_nr != key->bit_nr)
return 0;
if (test_bit(key->bit_nr, &key->page->flags))
return 0;

return __wait_on_bit(page_waitqueue(page), &wait,
bit_wait_io, TASK_KILLABLE);
return autoremove_wake_function(wait, mode, sync, key);
}

int wait_on_page_bit_killable_timeout(struct page *page,
int bit_nr, unsigned long timeout)
void wake_up_page_bit(struct page *page, int bit_nr)
{
DEFINE_WAIT_BIT(wait, &page->flags, bit_nr);
wait_queue_head_t *q = page_waitqueue(page);
struct wait_page_key key;
unsigned long flags;

wait.key.timeout = jiffies + timeout;
if (!test_bit(bit_nr, &page->flags))
return 0;
return __wait_on_bit(page_waitqueue(page), &wait,
bit_wait_io_timeout, TASK_KILLABLE);
key.page = page;
key.bit_nr = bit_nr;
key.page_match = 0;

spin_lock_irqsave(&q->lock, flags);
__wake_up_locked_key(q, TASK_NORMAL, &key);
/*
* It is possible for other pages to have collided on the waitqueue
* hash, so in that case check for a page match. That prevents a long-
* term waiter
*
* It is still possible to miss a case here, when we woke page waiters
* and removed them from the waitqueue, but there are still other
* page waiters.
*/
if (!waitqueue_active(q) || !key.page_match) {
ClearPageWaiters(page);
/*
* It's possible to miss clearing Waiters here, when we woke
* our page waiters, but the hashed waitqueue has waiters for
* other pages on it.
*
* That's okay, it's a rare case. The next waker will clear it.
*/
}
spin_unlock_irqrestore(&q->lock, flags);
}
EXPORT_SYMBOL(wake_up_page_bit);

static inline int wait_on_page_bit_common(wait_queue_head_t *q,
struct page *page, int bit_nr, int state, bool lock)
{
struct wait_page_queue wait_page;
wait_queue_t *wait = &wait_page.wait;
int ret = 0;

init_wait(wait);
wait->func = wake_page_function;
wait_page.page = page;
wait_page.bit_nr = bit_nr;

for (;;) {
spin_lock_irq(&q->lock);

if (likely(list_empty(&wait->task_list))) {
if (lock)
__add_wait_queue_tail_exclusive(q, wait);
else
__add_wait_queue(q, wait);
SetPageWaiters(page);
}

set_current_state(state);

spin_unlock_irq(&q->lock);

if (likely(test_bit(bit_nr, &page->flags))) {
io_schedule();
if (unlikely(signal_pending_state(state, current))) {
ret = -EINTR;
break;
}
}

if (lock) {
if (!test_and_set_bit_lock(bit_nr, &page->flags))
break;
} else {
if (!test_bit(bit_nr, &page->flags))
break;
}
}

finish_wait(q, wait);

/*
* A signal could leave PageWaiters set. Clearing it here if
* !waitqueue_active would be possible (by open-coding finish_wait),
* but still fail to catch it in the case of wait hash collision. We
* already can fail to clear wait hash collision cases, so don't
* bother with signals either.
*/

return ret;
}

void wait_on_page_bit(struct page *page, int bit_nr)
{
wait_queue_head_t *q = page_waitqueue(page);
wait_on_page_bit_common(q, page, bit_nr, TASK_UNINTERRUPTIBLE, false);
}
EXPORT_SYMBOL(wait_on_page_bit);

int wait_on_page_bit_killable(struct page *page, int bit_nr)
{
wait_queue_head_t *q = page_waitqueue(page);
return wait_on_page_bit_common(q, page, bit_nr, TASK_KILLABLE, false);
}
EXPORT_SYMBOL_GPL(wait_on_page_bit_killable_timeout);

/**
* add_page_wait_queue - Add an arbitrary waiter to a page's wait queue
Expand All @@ -793,6 +907,7 @@ void add_page_wait_queue(struct page *page, wait_queue_t *waiter)

spin_lock_irqsave(&q->lock, flags);
__add_wait_queue(q, waiter);
SetPageWaiters(page);
spin_unlock_irqrestore(&q->lock, flags);
}
EXPORT_SYMBOL_GPL(add_page_wait_queue);
Expand Down Expand Up @@ -874,23 +989,19 @@ EXPORT_SYMBOL_GPL(page_endio);
* __lock_page - get a lock on the page, assuming we need to sleep to get it
* @page: the page to lock
*/
void __lock_page(struct page *page)
void __lock_page(struct page *__page)
{
struct page *page_head = compound_head(page);
DEFINE_WAIT_BIT(wait, &page_head->flags, PG_locked);

__wait_on_bit_lock(page_waitqueue(page_head), &wait, bit_wait_io,
TASK_UNINTERRUPTIBLE);
struct page *page = compound_head(__page);
wait_queue_head_t *q = page_waitqueue(page);
wait_on_page_bit_common(q, page, PG_locked, TASK_UNINTERRUPTIBLE, true);
}
EXPORT_SYMBOL(__lock_page);

int __lock_page_killable(struct page *page)
int __lock_page_killable(struct page *__page)
{
struct page *page_head = compound_head(page);
DEFINE_WAIT_BIT(wait, &page_head->flags, PG_locked);

return __wait_on_bit_lock(page_waitqueue(page_head), &wait,
bit_wait_io, TASK_KILLABLE);
struct page *page = compound_head(__page);
wait_queue_head_t *q = page_waitqueue(page);
return wait_on_page_bit_common(q, page, PG_locked, TASK_KILLABLE, true);
}
EXPORT_SYMBOL_GPL(__lock_page_killable);

Expand Down
2 changes: 2 additions & 0 deletions mm/internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@
/* Do not use these with a slab allocator */
#define GFP_SLAB_BUG_MASK (__GFP_DMA32|__GFP_HIGHMEM|~__GFP_BITS_MASK)

void page_writeback_init(void);

int do_swap_page(struct vm_fault *vmf);

void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *start_vma,
Expand Down
2 changes: 2 additions & 0 deletions mm/swap.c
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ static void __page_cache_release(struct page *page)
del_page_from_lru_list(page, lruvec, page_off_lru(page));
spin_unlock_irqrestore(zone_lru_lock(zone), flags);
}
__ClearPageWaiters(page);
mem_cgroup_uncharge(page);
}

Expand Down Expand Up @@ -784,6 +785,7 @@ void release_pages(struct page **pages, int nr, bool cold)

/* Clear Active bit in case of parallel mark_page_accessed */
__ClearPageActive(page);
__ClearPageWaiters(page);

list_add(&page->lru, &pages_to_free);
}
Expand Down

0 comments on commit 6290602

Please sign in to comment.