Skip to content

Commit

Permalink
shmem,percpu_counter: add _limited_add(fbc, limit, amount)
Browse files Browse the repository at this point in the history
Percpu counter's compare and add are separate functions: without locking
around them (which would defeat their purpose), it has been possible to
overflow the intended limit.  Imagine all the other CPUs fallocating tmpfs
huge pages to the limit, in between this CPU's compare and its add.

I have not seen reports of that happening; but tmpfs's recent addition of
dquot_alloc_block_nodirty() in between the compare and the add makes it
even more likely, and I'd be uncomfortable to leave it unfixed.

Introduce percpu_counter_limited_add(fbc, limit, amount) to prevent it.

I believe this implementation is correct, and slightly more efficient than
the combination of compare and add (taking the lock once rather than twice
when nearing full - the last 128MiB of a tmpfs volume on a machine with
128 CPUs and 4KiB pages); but it does beg for a better design - when
nearing full, there is no new batching, but the costly percpu counter sum
across CPUs still has to be done, while locked.

Follow __percpu_counter_sum()'s example, including cpu_dying_mask as well
as cpu_online_mask: but shouldn't __percpu_counter_compare() and
__percpu_counter_limited_add() then be adding a num_dying_cpus() to
num_online_cpus(), when they calculate the maximum which could be held
across CPUs?  But the times when it matters would be vanishingly rare.

Link: https://lkml.kernel.org/r/[email protected]
Signed-off-by: Hugh Dickins <[email protected]>
Reviewed-by: Jan Kara <[email protected]>
Cc: Tim Chen <[email protected]>
Cc: Dave Chinner <[email protected]>
Cc: Darrick J. Wong <[email protected]>
Cc: Axel Rasmussen <[email protected]>
Cc: Carlos Maiolino <[email protected]>
Cc: Christian Brauner <[email protected]>
Cc: Chuck Lever <[email protected]>
Cc: Johannes Weiner <[email protected]>
Cc: Matthew Wilcox (Oracle) <[email protected]>
Signed-off-by: Andrew Morton <[email protected]>
  • Loading branch information
Hugh Dickins authored and akpm00 committed Oct 4, 2023
1 parent 87556dd commit c7fd543
Show file tree
Hide file tree
Showing 3 changed files with 81 additions and 5 deletions.
23 changes: 23 additions & 0 deletions include/linux/percpu_counter.h
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,8 @@ void percpu_counter_add_batch(struct percpu_counter *fbc, s64 amount,
s32 batch);
s64 __percpu_counter_sum(struct percpu_counter *fbc);
int __percpu_counter_compare(struct percpu_counter *fbc, s64 rhs, s32 batch);
bool __percpu_counter_limited_add(struct percpu_counter *fbc, s64 limit,
s64 amount, s32 batch);
void percpu_counter_sync(struct percpu_counter *fbc);

static inline int percpu_counter_compare(struct percpu_counter *fbc, s64 rhs)
Expand All @@ -69,6 +71,13 @@ static inline void percpu_counter_add(struct percpu_counter *fbc, s64 amount)
percpu_counter_add_batch(fbc, amount, percpu_counter_batch);
}

static inline bool
percpu_counter_limited_add(struct percpu_counter *fbc, s64 limit, s64 amount)
{
return __percpu_counter_limited_add(fbc, limit, amount,
percpu_counter_batch);
}

/*
* With percpu_counter_add_local() and percpu_counter_sub_local(), counts
* are accumulated in local per cpu counter and not in fbc->count until
Expand Down Expand Up @@ -185,6 +194,20 @@ percpu_counter_add(struct percpu_counter *fbc, s64 amount)
local_irq_restore(flags);
}

static inline bool
percpu_counter_limited_add(struct percpu_counter *fbc, s64 limit, s64 amount)
{
unsigned long flags;
s64 count;

local_irq_save(flags);
count = fbc->count + amount;
if (count <= limit)
fbc->count = count;
local_irq_restore(flags);
return count <= limit;
}

/* non-SMP percpu_counter_add_local is the same with percpu_counter_add */
static inline void
percpu_counter_add_local(struct percpu_counter *fbc, s64 amount)
Expand Down
53 changes: 53 additions & 0 deletions lib/percpu_counter.c
Original file line number Diff line number Diff line change
Expand Up @@ -278,6 +278,59 @@ int __percpu_counter_compare(struct percpu_counter *fbc, s64 rhs, s32 batch)
}
EXPORT_SYMBOL(__percpu_counter_compare);

/*
* Compare counter, and add amount if the total is within limit.
* Return true if amount was added, false if it would exceed limit.
*/
bool __percpu_counter_limited_add(struct percpu_counter *fbc,
s64 limit, s64 amount, s32 batch)
{
s64 count;
s64 unknown;
unsigned long flags;
bool good;

if (amount > limit)
return false;

local_irq_save(flags);
unknown = batch * num_online_cpus();
count = __this_cpu_read(*fbc->counters);

/* Skip taking the lock when safe */
if (abs(count + amount) <= batch &&
fbc->count + unknown <= limit) {
this_cpu_add(*fbc->counters, amount);
local_irq_restore(flags);
return true;
}

raw_spin_lock(&fbc->lock);
count = fbc->count + amount;

/* Skip percpu_counter_sum() when safe */
if (count + unknown > limit) {
s32 *pcount;
int cpu;

for_each_cpu_or(cpu, cpu_online_mask, cpu_dying_mask) {
pcount = per_cpu_ptr(fbc->counters, cpu);
count += *pcount;
}
}

good = count <= limit;
if (good) {
count = __this_cpu_read(*fbc->counters);
fbc->count += count + amount;
__this_cpu_sub(*fbc->counters, count);
}

raw_spin_unlock(&fbc->lock);
local_irq_restore(flags);
return good;
}

static int __init percpu_counter_startup(void)
{
int ret;
Expand Down
10 changes: 5 additions & 5 deletions mm/shmem.c
Original file line number Diff line number Diff line change
Expand Up @@ -217,15 +217,15 @@ static int shmem_inode_acct_blocks(struct inode *inode, long pages)

might_sleep(); /* when quotas */
if (sbinfo->max_blocks) {
if (percpu_counter_compare(&sbinfo->used_blocks,
sbinfo->max_blocks - pages) > 0)
if (!percpu_counter_limited_add(&sbinfo->used_blocks,
sbinfo->max_blocks, pages))
goto unacct;

err = dquot_alloc_block_nodirty(inode, pages);
if (err)
if (err) {
percpu_counter_sub(&sbinfo->used_blocks, pages);
goto unacct;

percpu_counter_add(&sbinfo->used_blocks, pages);
}
} else {
err = dquot_alloc_block_nodirty(inode, pages);
if (err)
Expand Down

0 comments on commit c7fd543

Please sign in to comment.