Skip to content

Commit

Permalink
padata: add basic support for multithreaded jobs
Browse files Browse the repository at this point in the history
Sometimes the kernel doesn't take full advantage of system memory
bandwidth, leading to a single CPU spending excessive time in
initialization paths where the data scales with memory size.

Multithreading naturally addresses this problem.

Extend padata, a framework that handles many parallel yet singlethreaded
jobs, to also handle multithreaded jobs by adding support for splitting up
the work evenly, specifying a minimum amount of work that's appropriate
for one helper thread to do, load balancing between helpers, and
coordinating them.

This is inspired by work from Pavel Tatashin and Steve Sistare.

Signed-off-by: Daniel Jordan <[email protected]>
Signed-off-by: Andrew Morton <[email protected]>
Tested-by: Josh Triplett <[email protected]>
Cc: Alexander Duyck <[email protected]>
Cc: Alex Williamson <[email protected]>
Cc: Dan Williams <[email protected]>
Cc: Dave Hansen <[email protected]>
Cc: David Hildenbrand <[email protected]>
Cc: Herbert Xu <[email protected]>
Cc: Jason Gunthorpe <[email protected]>
Cc: Jonathan Corbet <[email protected]>
Cc: Kirill Tkhai <[email protected]>
Cc: Michal Hocko <[email protected]>
Cc: Pavel Machek <[email protected]>
Cc: Pavel Tatashin <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Randy Dunlap <[email protected]>
Cc: Robert Elliott <[email protected]>
Cc: Shile Zhang <[email protected]>
Cc: Steffen Klassert <[email protected]>
Cc: Steven Sistare <[email protected]>
Cc: Tejun Heo <[email protected]>
Cc: Zi Yan <[email protected]>
Link: http://lkml.kernel.org/r/[email protected]
Signed-off-by: Linus Torvalds <[email protected]>
  • Loading branch information
danieljordan10 authored and torvalds committed Jun 4, 2020
1 parent 4611ce2 commit 004ed42
Show file tree
Hide file tree
Showing 2 changed files with 178 additions and 3 deletions.
29 changes: 29 additions & 0 deletions include/linux/padata.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@
*
* Copyright (C) 2008, 2009 secunet Security Networks AG
* Copyright (C) 2008, 2009 Steffen Klassert <[email protected]>
*
* Copyright (c) 2020 Oracle and/or its affiliates.
* Author: Daniel Jordan <[email protected]>
*/

#ifndef PADATA_H
Expand Down Expand Up @@ -130,6 +133,31 @@ struct padata_shell {
struct list_head list;
};

/**
* struct padata_mt_job - represents one multithreaded job
*
* @thread_fn: Called for each chunk of work that a padata thread does.
* @fn_arg: The thread function argument.
* @start: The start of the job (units are job-specific).
* @size: size of this node's work (units are job-specific).
* @align: Ranges passed to the thread function fall on this boundary, with the
* possible exceptions of the beginning and end of the job.
* @min_chunk: The minimum chunk size in job-specific units. This allows
* the client to communicate the minimum amount of work that's
* appropriate for one worker thread to do at once.
* @max_threads: Max threads to use for the job, actual number may be less
* depending on task size and minimum chunk size.
*/
struct padata_mt_job {
void (*thread_fn)(unsigned long start, unsigned long end, void *arg);
void *fn_arg;
unsigned long start;
unsigned long size;
unsigned long align;
unsigned long min_chunk;
int max_threads;
};

/**
* struct padata_instance - The overall control structure.
*
Expand Down Expand Up @@ -173,6 +201,7 @@ extern void padata_free_shell(struct padata_shell *ps);
extern int padata_do_parallel(struct padata_shell *ps,
struct padata_priv *padata, int *cb_cpu);
extern void padata_do_serial(struct padata_priv *padata);
extern void __init padata_do_multithreaded(struct padata_mt_job *job);
extern int padata_set_cpumask(struct padata_instance *pinst, int cpumask_type,
cpumask_var_t cpumask);
extern int padata_start(struct padata_instance *pinst);
Expand Down
152 changes: 149 additions & 3 deletions kernel/padata.c
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,9 @@
* Copyright (C) 2008, 2009 secunet Security Networks AG
* Copyright (C) 2008, 2009 Steffen Klassert <[email protected]>
*
* Copyright (c) 2020 Oracle and/or its affiliates.
* Author: Daniel Jordan <[email protected]>
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
Expand All @@ -21,6 +24,7 @@
* 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
*/

#include <linux/completion.h>
#include <linux/export.h>
#include <linux/cpumask.h>
#include <linux/err.h>
Expand All @@ -32,6 +36,8 @@
#include <linux/sysfs.h>
#include <linux/rcupdate.h>

#define PADATA_WORK_ONSTACK 1 /* Work's memory is on stack */

struct padata_work {
struct work_struct pw_work;
struct list_head pw_list; /* padata_free_works linkage */
Expand All @@ -42,7 +48,17 @@ static DEFINE_SPINLOCK(padata_works_lock);
static struct padata_work *padata_works;
static LIST_HEAD(padata_free_works);

struct padata_mt_job_state {
spinlock_t lock;
struct completion completion;
struct padata_mt_job *job;
int nworks;
int nworks_fini;
unsigned long chunk_size;
};

static void padata_free_pd(struct parallel_data *pd);
static void __init padata_mt_helper(struct work_struct *work);

static int padata_index_to_cpu(struct parallel_data *pd, int cpu_index)
{
Expand Down Expand Up @@ -81,18 +97,56 @@ static struct padata_work *padata_work_alloc(void)
}

static void padata_work_init(struct padata_work *pw, work_func_t work_fn,
void *data)
void *data, int flags)
{
INIT_WORK(&pw->pw_work, work_fn);
if (flags & PADATA_WORK_ONSTACK)
INIT_WORK_ONSTACK(&pw->pw_work, work_fn);
else
INIT_WORK(&pw->pw_work, work_fn);
pw->pw_data = data;
}

static int __init padata_work_alloc_mt(int nworks, void *data,
struct list_head *head)
{
int i;

spin_lock(&padata_works_lock);
/* Start at 1 because the current task participates in the job. */
for (i = 1; i < nworks; ++i) {
struct padata_work *pw = padata_work_alloc();

if (!pw)
break;
padata_work_init(pw, padata_mt_helper, data, 0);
list_add(&pw->pw_list, head);
}
spin_unlock(&padata_works_lock);

return i;
}

static void padata_work_free(struct padata_work *pw)
{
lockdep_assert_held(&padata_works_lock);
list_add(&pw->pw_list, &padata_free_works);
}

static void __init padata_works_free(struct list_head *works)
{
struct padata_work *cur, *next;

if (list_empty(works))
return;

spin_lock(&padata_works_lock);
list_for_each_entry_safe(cur, next, works, pw_list) {
list_del(&cur->pw_list);
padata_work_free(cur);
}
spin_unlock(&padata_works_lock);
}

static void padata_parallel_worker(struct work_struct *parallel_work)
{
struct padata_work *pw = container_of(parallel_work, struct padata_work,
Expand Down Expand Up @@ -168,7 +222,7 @@ int padata_do_parallel(struct padata_shell *ps,
pw = padata_work_alloc();
spin_unlock(&padata_works_lock);
if (pw) {
padata_work_init(pw, padata_parallel_worker, padata);
padata_work_init(pw, padata_parallel_worker, padata, 0);
queue_work(pinst->parallel_wq, &pw->pw_work);
} else {
/* Maximum works limit exceeded, run in the current task. */
Expand Down Expand Up @@ -409,6 +463,98 @@ static int pd_setup_cpumasks(struct parallel_data *pd,
return err;
}

static void __init padata_mt_helper(struct work_struct *w)
{
struct padata_work *pw = container_of(w, struct padata_work, pw_work);
struct padata_mt_job_state *ps = pw->pw_data;
struct padata_mt_job *job = ps->job;
bool done;

spin_lock(&ps->lock);

while (job->size > 0) {
unsigned long start, size, end;

start = job->start;
/* So end is chunk size aligned if enough work remains. */
size = roundup(start + 1, ps->chunk_size) - start;
size = min(size, job->size);
end = start + size;

job->start = end;
job->size -= size;

spin_unlock(&ps->lock);
job->thread_fn(start, end, job->fn_arg);
spin_lock(&ps->lock);
}

++ps->nworks_fini;
done = (ps->nworks_fini == ps->nworks);
spin_unlock(&ps->lock);

if (done)
complete(&ps->completion);
}

/**
* padata_do_multithreaded - run a multithreaded job
* @job: Description of the job.
*
* See the definition of struct padata_mt_job for more details.
*/
void __init padata_do_multithreaded(struct padata_mt_job *job)
{
/* In case threads finish at different times. */
static const unsigned long load_balance_factor = 4;
struct padata_work my_work, *pw;
struct padata_mt_job_state ps;
LIST_HEAD(works);
int nworks;

if (job->size == 0)
return;

/* Ensure at least one thread when size < min_chunk. */
nworks = max(job->size / job->min_chunk, 1ul);
nworks = min(nworks, job->max_threads);

if (nworks == 1) {
/* Single thread, no coordination needed, cut to the chase. */
job->thread_fn(job->start, job->start + job->size, job->fn_arg);
return;
}

spin_lock_init(&ps.lock);
init_completion(&ps.completion);
ps.job = job;
ps.nworks = padata_work_alloc_mt(nworks, &ps, &works);
ps.nworks_fini = 0;

/*
* Chunk size is the amount of work a helper does per call to the
* thread function. Load balance large jobs between threads by
* increasing the number of chunks, guarantee at least the minimum
* chunk size from the caller, and honor the caller's alignment.
*/
ps.chunk_size = job->size / (ps.nworks * load_balance_factor);
ps.chunk_size = max(ps.chunk_size, job->min_chunk);
ps.chunk_size = roundup(ps.chunk_size, job->align);

list_for_each_entry(pw, &works, pw_list)
queue_work(system_unbound_wq, &pw->pw_work);

/* Use the current thread, which saves starting a workqueue worker. */
padata_work_init(&my_work, padata_mt_helper, &ps, PADATA_WORK_ONSTACK);
padata_mt_helper(&my_work.pw_work);

/* Wait for all the helpers to finish. */
wait_for_completion(&ps.completion);

destroy_work_on_stack(&my_work.pw_work);
padata_works_free(&works);
}

static void __padata_list_init(struct padata_list *pd_list)
{
INIT_LIST_HEAD(&pd_list->list);
Expand Down

0 comments on commit 004ed42

Please sign in to comment.