@@ -165,23 +165,12 @@ EXPORT_SYMBOL(padata_do_parallel);
165165 */
166166static struct padata_priv * padata_get_next (struct parallel_data * pd )
167167{
168- int cpu , num_cpus ;
169- unsigned int next_nr , next_index ;
170168 struct padata_parallel_queue * next_queue ;
171169 struct padata_priv * padata ;
172170 struct padata_list * reorder ;
171+ int cpu = pd -> cpu ;
173172
174- num_cpus = cpumask_weight (pd -> cpumask .pcpu );
175-
176- /*
177- * Calculate the percpu reorder queue and the sequence
178- * number of the next object.
179- */
180- next_nr = pd -> processed ;
181- next_index = next_nr % num_cpus ;
182- cpu = padata_index_to_cpu (pd , next_index );
183173 next_queue = per_cpu_ptr (pd -> pqueue , cpu );
184-
185174 reorder = & next_queue -> reorder ;
186175
187176 spin_lock (& reorder -> lock );
@@ -192,7 +181,8 @@ static struct padata_priv *padata_get_next(struct parallel_data *pd)
192181 list_del_init (& padata -> list );
193182 atomic_dec (& pd -> reorder_objects );
194183
195- pd -> processed ++ ;
184+ pd -> cpu = cpumask_next_wrap (cpu , pd -> cpumask .pcpu , -1 ,
185+ false);
196186
197187 spin_unlock (& reorder -> lock );
198188 goto out ;
@@ -215,6 +205,7 @@ static void padata_reorder(struct parallel_data *pd)
215205 struct padata_priv * padata ;
216206 struct padata_serial_queue * squeue ;
217207 struct padata_instance * pinst = pd -> pinst ;
208+ struct padata_parallel_queue * next_queue ;
218209
219210 /*
220211 * We need to ensure that only one cpu can work on dequeueing of
@@ -246,7 +237,6 @@ static void padata_reorder(struct parallel_data *pd)
246237 * so exit immediately.
247238 */
248239 if (PTR_ERR (padata ) == - ENODATA ) {
249- del_timer (& pd -> timer );
250240 spin_unlock_bh (& pd -> lock );
251241 return ;
252242 }
@@ -265,70 +255,29 @@ static void padata_reorder(struct parallel_data *pd)
265255
266256 /*
267257 * The next object that needs serialization might have arrived to
268- * the reorder queues in the meantime, we will be called again
269- * from the timer function if no one else cares for it.
258+ * the reorder queues in the meantime.
270259 *
271- * Ensure reorder_objects is read after pd->lock is dropped so we see
272- * an increment from another task in padata_do_serial. Pairs with
260+ * Ensure reorder queue is read after pd->lock is dropped so we see
261+ * new objects from another task in padata_do_serial. Pairs with
273262 * smp_mb__after_atomic in padata_do_serial.
274263 */
275264 smp_mb ();
276- if (atomic_read (& pd -> reorder_objects )
277- && !(pinst -> flags & PADATA_RESET ))
278- mod_timer (& pd -> timer , jiffies + HZ );
279- else
280- del_timer (& pd -> timer );
281265
282- return ;
266+ next_queue = per_cpu_ptr (pd -> pqueue , pd -> cpu );
267+ if (!list_empty (& next_queue -> reorder .list ))
268+ queue_work (pinst -> wq , & pd -> reorder_work );
283269}
284270
285271static void invoke_padata_reorder (struct work_struct * work )
286272{
287- struct padata_parallel_queue * pqueue ;
288273 struct parallel_data * pd ;
289274
290275 local_bh_disable ();
291- pqueue = container_of (work , struct padata_parallel_queue , reorder_work );
292- pd = pqueue -> pd ;
276+ pd = container_of (work , struct parallel_data , reorder_work );
293277 padata_reorder (pd );
294278 local_bh_enable ();
295279}
296280
297- static void padata_reorder_timer (struct timer_list * t )
298- {
299- struct parallel_data * pd = from_timer (pd , t , timer );
300- unsigned int weight ;
301- int target_cpu , cpu ;
302-
303- cpu = get_cpu ();
304-
305- /* We don't lock pd here to not interfere with parallel processing
306- * padata_reorder() calls on other CPUs. We just need any CPU out of
307- * the cpumask.pcpu set. It would be nice if it's the right one but
308- * it doesn't matter if we're off to the next one by using an outdated
309- * pd->processed value.
310- */
311- weight = cpumask_weight (pd -> cpumask .pcpu );
312- target_cpu = padata_index_to_cpu (pd , pd -> processed % weight );
313-
314- /* ensure to call the reorder callback on the correct CPU */
315- if (cpu != target_cpu ) {
316- struct padata_parallel_queue * pqueue ;
317- struct padata_instance * pinst ;
318-
319- /* The timer function is serialized wrt itself -- no locking
320- * needed.
321- */
322- pinst = pd -> pinst ;
323- pqueue = per_cpu_ptr (pd -> pqueue , target_cpu );
324- queue_work_on (target_cpu , pinst -> wq , & pqueue -> reorder_work );
325- } else {
326- padata_reorder (pd );
327- }
328-
329- put_cpu ();
330- }
331-
332281static void padata_serial_worker (struct work_struct * serial_work )
333282{
334283 struct padata_serial_queue * squeue ;
@@ -376,9 +325,8 @@ void padata_do_serial(struct padata_priv *padata)
376325
377326 cpu = get_cpu ();
378327
379- /* We need to run on the same CPU padata_do_parallel(.., padata, ..)
380- * was called on -- or, at least, enqueue the padata object into the
381- * correct per-cpu queue.
328+ /* We need to enqueue the padata object into the correct
329+ * per-cpu queue.
382330 */
383331 if (cpu != padata -> cpu ) {
384332 reorder_via_wq = 1 ;
@@ -388,26 +336,20 @@ void padata_do_serial(struct padata_priv *padata)
388336 pqueue = per_cpu_ptr (pd -> pqueue , cpu );
389337
390338 spin_lock (& pqueue -> reorder .lock );
391- atomic_inc (& pd -> reorder_objects );
392339 list_add_tail (& padata -> list , & pqueue -> reorder .list );
340+ atomic_inc (& pd -> reorder_objects );
393341 spin_unlock (& pqueue -> reorder .lock );
394342
395343 /*
396- * Ensure the atomic_inc of reorder_objects above is ordered correctly
344+ * Ensure the addition to the reorder list is ordered correctly
397345 * with the trylock of pd->lock in padata_reorder. Pairs with smp_mb
398346 * in padata_reorder.
399347 */
400348 smp_mb__after_atomic ();
401349
402350 put_cpu ();
403351
404- /* If we're running on the wrong CPU, call padata_reorder() via a
405- * kernel worker.
406- */
407- if (reorder_via_wq )
408- queue_work_on (cpu , pd -> pinst -> wq , & pqueue -> reorder_work );
409- else
410- padata_reorder (pd );
352+ padata_reorder (pd );
411353}
412354EXPORT_SYMBOL (padata_do_serial );
413355
@@ -463,14 +405,12 @@ static void padata_init_pqueues(struct parallel_data *pd)
463405 continue ;
464406 }
465407
466- pqueue -> pd = pd ;
467408 pqueue -> cpu_index = cpu_index ;
468409 cpu_index ++ ;
469410
470411 __padata_list_init (& pqueue -> reorder );
471412 __padata_list_init (& pqueue -> parallel );
472413 INIT_WORK (& pqueue -> work , padata_parallel_worker );
473- INIT_WORK (& pqueue -> reorder_work , invoke_padata_reorder );
474414 atomic_set (& pqueue -> num_obj , 0 );
475415 }
476416}
@@ -498,12 +438,13 @@ static struct parallel_data *padata_alloc_pd(struct padata_instance *pinst,
498438
499439 padata_init_pqueues (pd );
500440 padata_init_squeues (pd );
501- timer_setup (& pd -> timer , padata_reorder_timer , 0 );
502441 atomic_set (& pd -> seq_nr , -1 );
503442 atomic_set (& pd -> reorder_objects , 0 );
504443 atomic_set (& pd -> refcnt , 0 );
505444 pd -> pinst = pinst ;
506445 spin_lock_init (& pd -> lock );
446+ pd -> cpu = cpumask_first (pcpumask );
447+ INIT_WORK (& pd -> reorder_work , invoke_padata_reorder );
507448
508449 return pd ;
509450
@@ -538,8 +479,6 @@ static void padata_flush_queues(struct parallel_data *pd)
538479 flush_work (& pqueue -> work );
539480 }
540481
541- del_timer_sync (& pd -> timer );
542-
543482 if (atomic_read (& pd -> reorder_objects ))
544483 padata_reorder (pd );
545484
0 commit comments