Skip to content

Commit 2d5f076

Browse files
Lai Jiangshanhtejun
Lai Jiangshan
authored andcommitted
workqueue: split apply_workqueue_attrs() into 3 stages
Current apply_workqueue_attrs() includes pwqs-allocation and pwqs-installation, so when we batch multiple apply_workqueue_attrs()s as a transaction, we can't ensure the transaction must succeed or fail as a complete unit. To solve this, we split apply_workqueue_attrs() into three stages. The first stage does the preparation: allocation memory, pwqs. The second stage does the attrs-installaion and pwqs-installation. The third stage frees the allocated memory and (old or unused) pwqs. As the result, batching multiple apply_workqueue_attrs()s can succeed or fail as a complete unit: 1) batch do all the first stage for all the workqueues 2) only commit all when all the above succeed. This patch is a preparation for the next patch ("Allow modifying low level unbound workqueue cpumask") which will do a multiple apply_workqueue_attrs(). The patch doesn't have functionality changed except two minor adjustment: 1) free_unbound_pwq() for the error path is removed, we use the heavier version put_pwq_unlocked() instead since the error path is rare. this adjustment simplifies the code. 2) the memory-allocation is also moved into wq_pool_mutex. this is needed to avoid to do the further splitting. tj: minor updates to comments. Suggested-by: Tejun Heo <[email protected]> Cc: Christoph Lameter <[email protected]> Cc: Kevin Hilman <[email protected]> Cc: Lai Jiangshan <[email protected]> Cc: Mike Galbraith <[email protected]> Cc: Paul E. McKenney <[email protected]> Cc: Tejun Heo <[email protected]> Cc: Viresh Kumar <[email protected]> Cc: Frederic Weisbecker <[email protected]> Signed-off-by: Lai Jiangshan <[email protected]> Signed-off-by: Tejun Heo <[email protected]>
1 parent b787f68 commit 2d5f076

File tree

1 file changed

+115
-84
lines changed

1 file changed

+115
-84
lines changed

kernel/workqueue.c

Lines changed: 115 additions & 84 deletions
Original file line numberDiff line numberDiff line change
@@ -3425,17 +3425,6 @@ static struct pool_workqueue *alloc_unbound_pwq(struct workqueue_struct *wq,
34253425
return pwq;
34263426
}
34273427

3428-
/* undo alloc_unbound_pwq(), used only in the error path */
3429-
static void free_unbound_pwq(struct pool_workqueue *pwq)
3430-
{
3431-
lockdep_assert_held(&wq_pool_mutex);
3432-
3433-
if (pwq) {
3434-
put_unbound_pool(pwq->pool);
3435-
kmem_cache_free(pwq_cache, pwq);
3436-
}
3437-
}
3438-
34393428
/**
34403429
* wq_calc_node_mask - calculate a wq_attrs' cpumask for the specified node
34413430
* @attrs: the wq_attrs of interest
@@ -3498,42 +3487,48 @@ static struct pool_workqueue *numa_pwq_tbl_install(struct workqueue_struct *wq,
34983487
return old_pwq;
34993488
}
35003489

3501-
/**
3502-
* apply_workqueue_attrs - apply new workqueue_attrs to an unbound workqueue
3503-
* @wq: the target workqueue
3504-
* @attrs: the workqueue_attrs to apply, allocated with alloc_workqueue_attrs()
3505-
*
3506-
* Apply @attrs to an unbound workqueue @wq. Unless disabled, on NUMA
3507-
* machines, this function maps a separate pwq to each NUMA node with
3508-
* possibles CPUs in @attrs->cpumask so that work items are affine to the
3509-
* NUMA node it was issued on. Older pwqs are released as in-flight work
3510-
* items finish. Note that a work item which repeatedly requeues itself
3511-
* back-to-back will stay on its current pwq.
3512-
*
3513-
* Performs GFP_KERNEL allocations.
3514-
*
3515-
* Return: 0 on success and -errno on failure.
3516-
*/
3517-
int apply_workqueue_attrs(struct workqueue_struct *wq,
3518-
const struct workqueue_attrs *attrs)
3490+
/* context to store the prepared attrs & pwqs before applying */
3491+
struct apply_wqattrs_ctx {
3492+
struct workqueue_struct *wq; /* target workqueue */
3493+
struct workqueue_attrs *attrs; /* attrs to apply */
3494+
struct pool_workqueue *dfl_pwq;
3495+
struct pool_workqueue *pwq_tbl[];
3496+
};
3497+
3498+
/* free the resources after success or abort */
3499+
static void apply_wqattrs_cleanup(struct apply_wqattrs_ctx *ctx)
3500+
{
3501+
if (ctx) {
3502+
int node;
3503+
3504+
for_each_node(node)
3505+
put_pwq_unlocked(ctx->pwq_tbl[node]);
3506+
put_pwq_unlocked(ctx->dfl_pwq);
3507+
3508+
free_workqueue_attrs(ctx->attrs);
3509+
3510+
kfree(ctx);
3511+
}
3512+
}
3513+
3514+
/* allocate the attrs and pwqs for later installation */
3515+
static struct apply_wqattrs_ctx *
3516+
apply_wqattrs_prepare(struct workqueue_struct *wq,
3517+
const struct workqueue_attrs *attrs)
35193518
{
3519+
struct apply_wqattrs_ctx *ctx;
35203520
struct workqueue_attrs *new_attrs, *tmp_attrs;
3521-
struct pool_workqueue **pwq_tbl, *dfl_pwq;
3522-
int node, ret;
3521+
int node;
35233522

3524-
/* only unbound workqueues can change attributes */
3525-
if (WARN_ON(!(wq->flags & WQ_UNBOUND)))
3526-
return -EINVAL;
3523+
lockdep_assert_held(&wq_pool_mutex);
35273524

3528-
/* creating multiple pwqs breaks ordering guarantee */
3529-
if (WARN_ON((wq->flags & __WQ_ORDERED) && !list_empty(&wq->pwqs)))
3530-
return -EINVAL;
3525+
ctx = kzalloc(sizeof(*ctx) + nr_node_ids * sizeof(ctx->pwq_tbl[0]),
3526+
GFP_KERNEL);
35313527

3532-
pwq_tbl = kzalloc(nr_node_ids * sizeof(pwq_tbl[0]), GFP_KERNEL);
35333528
new_attrs = alloc_workqueue_attrs(GFP_KERNEL);
35343529
tmp_attrs = alloc_workqueue_attrs(GFP_KERNEL);
3535-
if (!pwq_tbl || !new_attrs || !tmp_attrs)
3536-
goto enomem;
3530+
if (!ctx || !new_attrs || !tmp_attrs)
3531+
goto out_free;
35373532

35383533
/* make a copy of @attrs and sanitize it */
35393534
copy_workqueue_attrs(new_attrs, attrs);
@@ -3546,76 +3541,112 @@ int apply_workqueue_attrs(struct workqueue_struct *wq,
35463541
*/
35473542
copy_workqueue_attrs(tmp_attrs, new_attrs);
35483543

3549-
/*
3550-
* CPUs should stay stable across pwq creations and installations.
3551-
* Pin CPUs, determine the target cpumask for each node and create
3552-
* pwqs accordingly.
3553-
*/
3554-
get_online_cpus();
3555-
3556-
mutex_lock(&wq_pool_mutex);
3557-
35583544
/*
35593545
* If something goes wrong during CPU up/down, we'll fall back to
35603546
* the default pwq covering whole @attrs->cpumask. Always create
35613547
* it even if we don't use it immediately.
35623548
*/
3563-
dfl_pwq = alloc_unbound_pwq(wq, new_attrs);
3564-
if (!dfl_pwq)
3565-
goto enomem_pwq;
3549+
ctx->dfl_pwq = alloc_unbound_pwq(wq, new_attrs);
3550+
if (!ctx->dfl_pwq)
3551+
goto out_free;
35663552

35673553
for_each_node(node) {
35683554
if (wq_calc_node_cpumask(attrs, node, -1, tmp_attrs->cpumask)) {
3569-
pwq_tbl[node] = alloc_unbound_pwq(wq, tmp_attrs);
3570-
if (!pwq_tbl[node])
3571-
goto enomem_pwq;
3555+
ctx->pwq_tbl[node] = alloc_unbound_pwq(wq, tmp_attrs);
3556+
if (!ctx->pwq_tbl[node])
3557+
goto out_free;
35723558
} else {
3573-
dfl_pwq->refcnt++;
3574-
pwq_tbl[node] = dfl_pwq;
3559+
ctx->dfl_pwq->refcnt++;
3560+
ctx->pwq_tbl[node] = ctx->dfl_pwq;
35753561
}
35763562
}
35773563

3578-
mutex_unlock(&wq_pool_mutex);
3564+
ctx->attrs = new_attrs;
3565+
ctx->wq = wq;
3566+
free_workqueue_attrs(tmp_attrs);
3567+
return ctx;
3568+
3569+
out_free:
3570+
free_workqueue_attrs(tmp_attrs);
3571+
free_workqueue_attrs(new_attrs);
3572+
apply_wqattrs_cleanup(ctx);
3573+
return NULL;
3574+
}
3575+
3576+
/* set attrs and install prepared pwqs, @ctx points to old pwqs on return */
3577+
static void apply_wqattrs_commit(struct apply_wqattrs_ctx *ctx)
3578+
{
3579+
int node;
35793580

35803581
/* all pwqs have been created successfully, let's install'em */
3581-
mutex_lock(&wq->mutex);
3582+
mutex_lock(&ctx->wq->mutex);
35823583

3583-
copy_workqueue_attrs(wq->unbound_attrs, new_attrs);
3584+
copy_workqueue_attrs(ctx->wq->unbound_attrs, ctx->attrs);
35843585

35853586
/* save the previous pwq and install the new one */
35863587
for_each_node(node)
3587-
pwq_tbl[node] = numa_pwq_tbl_install(wq, node, pwq_tbl[node]);
3588+
ctx->pwq_tbl[node] = numa_pwq_tbl_install(ctx->wq, node,
3589+
ctx->pwq_tbl[node]);
35883590

35893591
/* @dfl_pwq might not have been used, ensure it's linked */
3590-
link_pwq(dfl_pwq);
3591-
swap(wq->dfl_pwq, dfl_pwq);
3592+
link_pwq(ctx->dfl_pwq);
3593+
swap(ctx->wq->dfl_pwq, ctx->dfl_pwq);
35923594

3593-
mutex_unlock(&wq->mutex);
3595+
mutex_unlock(&ctx->wq->mutex);
3596+
}
35943597

3595-
/* put the old pwqs */
3596-
for_each_node(node)
3597-
put_pwq_unlocked(pwq_tbl[node]);
3598-
put_pwq_unlocked(dfl_pwq);
3598+
/**
3599+
* apply_workqueue_attrs - apply new workqueue_attrs to an unbound workqueue
3600+
* @wq: the target workqueue
3601+
* @attrs: the workqueue_attrs to apply, allocated with alloc_workqueue_attrs()
3602+
*
3603+
* Apply @attrs to an unbound workqueue @wq. Unless disabled, on NUMA
3604+
* machines, this function maps a separate pwq to each NUMA node with
3605+
* possibles CPUs in @attrs->cpumask so that work items are affine to the
3606+
* NUMA node it was issued on. Older pwqs are released as in-flight work
3607+
* items finish. Note that a work item which repeatedly requeues itself
3608+
* back-to-back will stay on its current pwq.
3609+
*
3610+
* Performs GFP_KERNEL allocations.
3611+
*
3612+
* Return: 0 on success and -errno on failure.
3613+
*/
3614+
int apply_workqueue_attrs(struct workqueue_struct *wq,
3615+
const struct workqueue_attrs *attrs)
3616+
{
3617+
struct apply_wqattrs_ctx *ctx;
3618+
int ret = -ENOMEM;
35993619

3600-
put_online_cpus();
3601-
ret = 0;
3602-
/* fall through */
3603-
out_free:
3604-
free_workqueue_attrs(tmp_attrs);
3605-
free_workqueue_attrs(new_attrs);
3606-
kfree(pwq_tbl);
3607-
return ret;
3620+
/* only unbound workqueues can change attributes */
3621+
if (WARN_ON(!(wq->flags & WQ_UNBOUND)))
3622+
return -EINVAL;
36083623

3609-
enomem_pwq:
3610-
free_unbound_pwq(dfl_pwq);
3611-
for_each_node(node)
3612-
if (pwq_tbl && pwq_tbl[node] != dfl_pwq)
3613-
free_unbound_pwq(pwq_tbl[node]);
3624+
/* creating multiple pwqs breaks ordering guarantee */
3625+
if (WARN_ON((wq->flags & __WQ_ORDERED) && !list_empty(&wq->pwqs)))
3626+
return -EINVAL;
3627+
3628+
/*
3629+
* CPUs should stay stable across pwq creations and installations.
3630+
* Pin CPUs, determine the target cpumask for each node and create
3631+
* pwqs accordingly.
3632+
*/
3633+
get_online_cpus();
3634+
3635+
mutex_lock(&wq_pool_mutex);
3636+
ctx = apply_wqattrs_prepare(wq, attrs);
36143637
mutex_unlock(&wq_pool_mutex);
3638+
3639+
/* the ctx has been prepared successfully, let's commit it */
3640+
if (ctx) {
3641+
apply_wqattrs_commit(ctx);
3642+
ret = 0;
3643+
}
3644+
36153645
put_online_cpus();
3616-
enomem:
3617-
ret = -ENOMEM;
3618-
goto out_free;
3646+
3647+
apply_wqattrs_cleanup(ctx);
3648+
3649+
return ret;
36193650
}
36203651

36213652
/**

0 commit comments

Comments
 (0)