Skip to content

Commit 6fc4dbc

Browse files
committed
padata: Replace delayed timer with immediate workqueue in padata_reorder
The function padata_reorder will use a timer when it cannot progress while completed jobs are outstanding (pd->reorder_objects > 0). This is suboptimal as if we do end up using the timer then it would have introduced a gratuitous delay of one second. In fact we can easily distinguish between whether completed jobs are outstanding and whether we can make progress. All we have to do is look at the next pqueue list. This patch does that by replacing pd->processed with pd->cpu so that the next pqueue is more accessible. A work queue is used instead of the original try_again to avoid hogging the CPU. Note that we don't bother removing the work queue in padata_flush_queues because the whole premise is broken. You cannot flush async crypto requests so it makes no sense to even try. A subsequent patch will fix it by replacing it with a ref counting scheme. Signed-off-by: Herbert Xu <[email protected]>
1 parent 97ac82d commit 6fc4dbc

File tree

2 files changed

+22
-88
lines changed

2 files changed

+22
-88
lines changed

include/linux/padata.h

Lines changed: 4 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,6 @@
1212
#include <linux/workqueue.h>
1313
#include <linux/spinlock.h>
1414
#include <linux/list.h>
15-
#include <linux/timer.h>
1615
#include <linux/notifier.h>
1716
#include <linux/kobject.h>
1817

@@ -73,18 +72,14 @@ struct padata_serial_queue {
7372
* @serial: List to wait for serialization after reordering.
7473
* @pwork: work struct for parallelization.
7574
* @swork: work struct for serialization.
76-
* @pd: Backpointer to the internal control structure.
7775
* @work: work struct for parallelization.
78-
* @reorder_work: work struct for reordering.
7976
* @num_obj: Number of objects that are processed by this cpu.
8077
* @cpu_index: Index of the cpu.
8178
*/
8279
struct padata_parallel_queue {
8380
struct padata_list parallel;
8481
struct padata_list reorder;
85-
struct parallel_data *pd;
8682
struct work_struct work;
87-
struct work_struct reorder_work;
8883
atomic_t num_obj;
8984
int cpu_index;
9085
};
@@ -110,10 +105,10 @@ struct padata_cpumask {
110105
* @reorder_objects: Number of objects waiting in the reorder queues.
111106
* @refcnt: Number of objects holding a reference on this parallel_data.
112107
* @max_seq_nr: Maximal used sequence number.
108+
* @cpu: Next CPU to be processed.
113109
* @cpumask: The cpumasks in use for parallel and serial workers.
110+
* @reorder_work: work struct for reordering.
114111
* @lock: Reorder lock.
115-
* @processed: Number of already processed objects.
116-
* @timer: Reorder timer.
117112
*/
118113
struct parallel_data {
119114
struct padata_instance *pinst;
@@ -122,10 +117,10 @@ struct parallel_data {
122117
atomic_t reorder_objects;
123118
atomic_t refcnt;
124119
atomic_t seq_nr;
120+
int cpu;
125121
struct padata_cpumask cpumask;
122+
struct work_struct reorder_work;
126123
spinlock_t lock ____cacheline_aligned;
127-
unsigned int processed;
128-
struct timer_list timer;
129124
};
130125

131126
/**

kernel/padata.c

Lines changed: 18 additions & 79 deletions
Original file line numberDiff line numberDiff line change
@@ -165,23 +165,12 @@ EXPORT_SYMBOL(padata_do_parallel);
165165
*/
166166
static struct padata_priv *padata_get_next(struct parallel_data *pd)
167167
{
168-
int cpu, num_cpus;
169-
unsigned int next_nr, next_index;
170168
struct padata_parallel_queue *next_queue;
171169
struct padata_priv *padata;
172170
struct padata_list *reorder;
171+
int cpu = pd->cpu;
173172

174-
num_cpus = cpumask_weight(pd->cpumask.pcpu);
175-
176-
/*
177-
* Calculate the percpu reorder queue and the sequence
178-
* number of the next object.
179-
*/
180-
next_nr = pd->processed;
181-
next_index = next_nr % num_cpus;
182-
cpu = padata_index_to_cpu(pd, next_index);
183173
next_queue = per_cpu_ptr(pd->pqueue, cpu);
184-
185174
reorder = &next_queue->reorder;
186175

187176
spin_lock(&reorder->lock);
@@ -192,7 +181,8 @@ static struct padata_priv *padata_get_next(struct parallel_data *pd)
192181
list_del_init(&padata->list);
193182
atomic_dec(&pd->reorder_objects);
194183

195-
pd->processed++;
184+
pd->cpu = cpumask_next_wrap(cpu, pd->cpumask.pcpu, -1,
185+
false);
196186

197187
spin_unlock(&reorder->lock);
198188
goto out;
@@ -215,6 +205,7 @@ static void padata_reorder(struct parallel_data *pd)
215205
struct padata_priv *padata;
216206
struct padata_serial_queue *squeue;
217207
struct padata_instance *pinst = pd->pinst;
208+
struct padata_parallel_queue *next_queue;
218209

219210
/*
220211
* We need to ensure that only one cpu can work on dequeueing of
@@ -246,7 +237,6 @@ static void padata_reorder(struct parallel_data *pd)
246237
* so exit immediately.
247238
*/
248239
if (PTR_ERR(padata) == -ENODATA) {
249-
del_timer(&pd->timer);
250240
spin_unlock_bh(&pd->lock);
251241
return;
252242
}
@@ -265,70 +255,29 @@ static void padata_reorder(struct parallel_data *pd)
265255

266256
/*
267257
* The next object that needs serialization might have arrived to
268-
* the reorder queues in the meantime, we will be called again
269-
* from the timer function if no one else cares for it.
258+
* the reorder queues in the meantime.
270259
*
271-
* Ensure reorder_objects is read after pd->lock is dropped so we see
272-
* an increment from another task in padata_do_serial. Pairs with
260+
* Ensure reorder queue is read after pd->lock is dropped so we see
261+
* new objects from another task in padata_do_serial. Pairs with
273262
* smp_mb__after_atomic in padata_do_serial.
274263
*/
275264
smp_mb();
276-
if (atomic_read(&pd->reorder_objects)
277-
&& !(pinst->flags & PADATA_RESET))
278-
mod_timer(&pd->timer, jiffies + HZ);
279-
else
280-
del_timer(&pd->timer);
281265

282-
return;
266+
next_queue = per_cpu_ptr(pd->pqueue, pd->cpu);
267+
if (!list_empty(&next_queue->reorder.list))
268+
queue_work(pinst->wq, &pd->reorder_work);
283269
}
284270

285271
static void invoke_padata_reorder(struct work_struct *work)
286272
{
287-
struct padata_parallel_queue *pqueue;
288273
struct parallel_data *pd;
289274

290275
local_bh_disable();
291-
pqueue = container_of(work, struct padata_parallel_queue, reorder_work);
292-
pd = pqueue->pd;
276+
pd = container_of(work, struct parallel_data, reorder_work);
293277
padata_reorder(pd);
294278
local_bh_enable();
295279
}
296280

297-
static void padata_reorder_timer(struct timer_list *t)
298-
{
299-
struct parallel_data *pd = from_timer(pd, t, timer);
300-
unsigned int weight;
301-
int target_cpu, cpu;
302-
303-
cpu = get_cpu();
304-
305-
/* We don't lock pd here to not interfere with parallel processing
306-
* padata_reorder() calls on other CPUs. We just need any CPU out of
307-
* the cpumask.pcpu set. It would be nice if it's the right one but
308-
* it doesn't matter if we're off to the next one by using an outdated
309-
* pd->processed value.
310-
*/
311-
weight = cpumask_weight(pd->cpumask.pcpu);
312-
target_cpu = padata_index_to_cpu(pd, pd->processed % weight);
313-
314-
/* ensure to call the reorder callback on the correct CPU */
315-
if (cpu != target_cpu) {
316-
struct padata_parallel_queue *pqueue;
317-
struct padata_instance *pinst;
318-
319-
/* The timer function is serialized wrt itself -- no locking
320-
* needed.
321-
*/
322-
pinst = pd->pinst;
323-
pqueue = per_cpu_ptr(pd->pqueue, target_cpu);
324-
queue_work_on(target_cpu, pinst->wq, &pqueue->reorder_work);
325-
} else {
326-
padata_reorder(pd);
327-
}
328-
329-
put_cpu();
330-
}
331-
332281
static void padata_serial_worker(struct work_struct *serial_work)
333282
{
334283
struct padata_serial_queue *squeue;
@@ -376,9 +325,8 @@ void padata_do_serial(struct padata_priv *padata)
376325

377326
cpu = get_cpu();
378327

379-
/* We need to run on the same CPU padata_do_parallel(.., padata, ..)
380-
* was called on -- or, at least, enqueue the padata object into the
381-
* correct per-cpu queue.
328+
/* We need to enqueue the padata object into the correct
329+
* per-cpu queue.
382330
*/
383331
if (cpu != padata->cpu) {
384332
reorder_via_wq = 1;
@@ -388,26 +336,20 @@ void padata_do_serial(struct padata_priv *padata)
388336
pqueue = per_cpu_ptr(pd->pqueue, cpu);
389337

390338
spin_lock(&pqueue->reorder.lock);
391-
atomic_inc(&pd->reorder_objects);
392339
list_add_tail(&padata->list, &pqueue->reorder.list);
340+
atomic_inc(&pd->reorder_objects);
393341
spin_unlock(&pqueue->reorder.lock);
394342

395343
/*
396-
* Ensure the atomic_inc of reorder_objects above is ordered correctly
344+
* Ensure the addition to the reorder list is ordered correctly
397345
* with the trylock of pd->lock in padata_reorder. Pairs with smp_mb
398346
* in padata_reorder.
399347
*/
400348
smp_mb__after_atomic();
401349

402350
put_cpu();
403351

404-
/* If we're running on the wrong CPU, call padata_reorder() via a
405-
* kernel worker.
406-
*/
407-
if (reorder_via_wq)
408-
queue_work_on(cpu, pd->pinst->wq, &pqueue->reorder_work);
409-
else
410-
padata_reorder(pd);
352+
padata_reorder(pd);
411353
}
412354
EXPORT_SYMBOL(padata_do_serial);
413355

@@ -463,14 +405,12 @@ static void padata_init_pqueues(struct parallel_data *pd)
463405
continue;
464406
}
465407

466-
pqueue->pd = pd;
467408
pqueue->cpu_index = cpu_index;
468409
cpu_index++;
469410

470411
__padata_list_init(&pqueue->reorder);
471412
__padata_list_init(&pqueue->parallel);
472413
INIT_WORK(&pqueue->work, padata_parallel_worker);
473-
INIT_WORK(&pqueue->reorder_work, invoke_padata_reorder);
474414
atomic_set(&pqueue->num_obj, 0);
475415
}
476416
}
@@ -498,12 +438,13 @@ static struct parallel_data *padata_alloc_pd(struct padata_instance *pinst,
498438

499439
padata_init_pqueues(pd);
500440
padata_init_squeues(pd);
501-
timer_setup(&pd->timer, padata_reorder_timer, 0);
502441
atomic_set(&pd->seq_nr, -1);
503442
atomic_set(&pd->reorder_objects, 0);
504443
atomic_set(&pd->refcnt, 0);
505444
pd->pinst = pinst;
506445
spin_lock_init(&pd->lock);
446+
pd->cpu = cpumask_first(pcpumask);
447+
INIT_WORK(&pd->reorder_work, invoke_padata_reorder);
507448

508449
return pd;
509450

@@ -538,8 +479,6 @@ static void padata_flush_queues(struct parallel_data *pd)
538479
flush_work(&pqueue->work);
539480
}
540481

541-
del_timer_sync(&pd->timer);
542-
543482
if (atomic_read(&pd->reorder_objects))
544483
padata_reorder(pd);
545484

0 commit comments

Comments
 (0)