@@ -210,14 +210,15 @@ int xe_vm_add_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q)
210210 .num_fences = 1 ,
211211 };
212212 struct drm_exec * exec = & vm_exec .exec ;
213+ struct xe_validation_ctx ctx ;
213214 struct dma_fence * pfence ;
214215 int err ;
215216 bool wait ;
216217
217218 xe_assert (vm -> xe , xe_vm_in_preempt_fence_mode (vm ));
218219
219220 down_write (& vm -> lock );
220- err = drm_gpuvm_exec_lock ( & vm_exec );
221+ err = xe_validation_exec_lock ( & ctx , & vm_exec , & vm -> xe -> val );
221222 if (err )
222223 goto out_up_write ;
223224
@@ -249,7 +250,7 @@ int xe_vm_add_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q)
249250 xe_svm_notifier_unlock (vm );
250251
251252out_fini :
252- drm_exec_fini ( exec );
253+ xe_validation_ctx_fini ( & ctx );
253254out_up_write :
254255 up_write (& vm -> lock );
255256
@@ -313,39 +314,6 @@ void xe_vm_kill(struct xe_vm *vm, bool unlocked)
313314 /* TODO: Inform user the VM is banned */
314315}
315316
316- /**
317- * xe_vm_validate_should_retry() - Whether to retry after a validate error.
318- * @exec: The drm_exec object used for locking before validation.
319- * @err: The error returned from ttm_bo_validate().
320- * @end: A ktime_t cookie that should be set to 0 before first use and
321- * that should be reused on subsequent calls.
322- *
323- * With multiple active VMs, under memory pressure, it is possible that
324- * ttm_bo_validate() run into -EDEADLK and in such case returns -ENOMEM.
325- * Until ttm properly handles locking in such scenarios, best thing the
326- * driver can do is retry with a timeout. Check if that is necessary, and
327- * if so unlock the drm_exec's objects while keeping the ticket to prepare
328- * for a rerun.
329- *
330- * Return: true if a retry after drm_exec_init() is recommended;
331- * false otherwise.
332- */
333- bool xe_vm_validate_should_retry (struct drm_exec * exec , int err , ktime_t * end )
334- {
335- ktime_t cur ;
336-
337- if (err != - ENOMEM )
338- return false;
339-
340- cur = ktime_get ();
341- * end = * end ? : ktime_add_ms (cur , XE_VM_REBIND_RETRY_TIMEOUT_MS );
342- if (!ktime_before (cur , * end ))
343- return false;
344-
345- msleep (20 );
346- return true;
347- }
348-
349317static int xe_gpuvm_validate (struct drm_gpuvm_bo * vm_bo , struct drm_exec * exec )
350318{
351319 struct xe_vm * vm = gpuvm_to_vm (vm_bo -> vm );
@@ -476,10 +444,10 @@ void xe_vm_resume_rebind_worker(struct xe_vm *vm)
476444static void preempt_rebind_work_func (struct work_struct * w )
477445{
478446 struct xe_vm * vm = container_of (w , struct xe_vm , preempt .rebind_work );
447+ struct xe_validation_ctx ctx ;
479448 struct drm_exec exec ;
480449 unsigned int fence_count = 0 ;
481450 LIST_HEAD (preempt_fences );
482- ktime_t end = 0 ;
483451 int err = 0 ;
484452 long wait ;
485453 int __maybe_unused tries = 0 ;
@@ -507,18 +475,19 @@ static void preempt_rebind_work_func(struct work_struct *w)
507475 goto out_unlock_outer ;
508476 }
509477
510- drm_exec_init (& exec , DRM_EXEC_INTERRUPTIBLE_WAIT , 0 );
478+ err = xe_validation_ctx_init (& ctx , & vm -> xe -> val , & exec ,
479+ (struct xe_val_flags ) {.interruptible = true});
480+ if (err )
481+ goto out_unlock_outer ;
511482
512483 drm_exec_until_all_locked (& exec ) {
513484 bool done = false;
514485
515486 err = xe_preempt_work_begin (& exec , vm , & done );
516487 drm_exec_retry_on_contention (& exec );
488+ xe_validation_retry_on_oom (& ctx , & err );
517489 if (err || done ) {
518- drm_exec_fini (& exec );
519- if (err && xe_vm_validate_should_retry (& exec , err , & end ))
520- err = - EAGAIN ;
521-
490+ xe_validation_ctx_fini (& ctx );
522491 goto out_unlock_outer ;
523492 }
524493 }
@@ -566,7 +535,7 @@ static void preempt_rebind_work_func(struct work_struct *w)
566535 xe_svm_notifier_unlock (vm );
567536
568537out_unlock :
569- drm_exec_fini ( & exec );
538+ xe_validation_ctx_fini ( & ctx );
570539out_unlock_outer :
571540 if (err == - EAGAIN ) {
572541 trace_xe_vm_rebind_worker_retry (vm );
@@ -1164,20 +1133,19 @@ int xe_vm_lock_vma(struct drm_exec *exec, struct xe_vma *vma)
11641133
11651134static void xe_vma_destroy_unlocked (struct xe_vma * vma )
11661135{
1136+ struct xe_device * xe = xe_vma_vm (vma )-> xe ;
1137+ struct xe_validation_ctx ctx ;
11671138 struct drm_exec exec ;
1168- int err ;
1139+ int err = 0 ;
11691140
1170- drm_exec_init (& exec , 0 , 0 );
1171- drm_exec_until_all_locked (& exec ) {
1141+ xe_validation_guard (& ctx , & xe -> val , & exec , (struct xe_val_flags ) {}, err ) {
11721142 err = xe_vm_lock_vma (& exec , vma );
11731143 drm_exec_retry_on_contention (& exec );
11741144 if (XE_WARN_ON (err ))
11751145 break ;
1146+ xe_vma_destroy (vma , NULL );
11761147 }
1177-
1178- xe_vma_destroy (vma , NULL );
1179-
1180- drm_exec_fini (& exec );
1148+ xe_assert (xe , !err );
11811149}
11821150
11831151struct xe_vma *
@@ -2383,16 +2351,17 @@ static struct xe_vma *new_vma(struct xe_vm *vm, struct drm_gpuva_op_map *op,
23832351 struct xe_vma_mem_attr * attr , unsigned int flags )
23842352{
23852353 struct xe_bo * bo = op -> gem .obj ? gem_to_xe_bo (op -> gem .obj ) : NULL ;
2354+ struct xe_validation_ctx ctx ;
23862355 struct drm_exec exec ;
23872356 struct xe_vma * vma ;
23882357 int err = 0 ;
23892358
23902359 lockdep_assert_held_write (& vm -> lock );
23912360
23922361 if (bo ) {
2393- drm_exec_init ( & exec , DRM_EXEC_INTERRUPTIBLE_WAIT , 0 ) ;
2394- drm_exec_until_all_locked ( & exec ) {
2395- err = 0 ;
2362+ err = 0 ;
2363+ xe_validation_guard ( & ctx , & vm -> xe -> val , & exec ,
2364+ ( struct xe_val_flags ) {. interruptible = true}, err ) {
23962365 if (!bo -> vm ) {
23972366 err = drm_exec_lock_obj (& exec , xe_vm_obj (vm ));
23982367 drm_exec_retry_on_contention (& exec );
@@ -2401,27 +2370,35 @@ static struct xe_vma *new_vma(struct xe_vm *vm, struct drm_gpuva_op_map *op,
24012370 err = drm_exec_lock_obj (& exec , & bo -> ttm .base );
24022371 drm_exec_retry_on_contention (& exec );
24032372 }
2404- if (err ) {
2405- drm_exec_fini (& exec );
2373+ if (err )
24062374 return ERR_PTR (err );
2407- }
2408- }
2409- }
2410- vma = xe_vma_create (vm , bo , op -> gem .offset ,
2411- op -> va .addr , op -> va .addr +
2412- op -> va .range - 1 , attr , flags );
2413- if (IS_ERR (vma ))
2414- goto err_unlock ;
24152375
2416- if (xe_vma_is_userptr (vma ))
2417- err = xe_vma_userptr_pin_pages (to_userptr_vma (vma ));
2418- else if (!xe_vma_has_no_bo (vma ) && !bo -> vm )
2419- err = add_preempt_fences (vm , bo );
2376+ vma = xe_vma_create (vm , bo , op -> gem .offset ,
2377+ op -> va .addr , op -> va .addr +
2378+ op -> va .range - 1 , attr , flags );
2379+ if (IS_ERR (vma ))
2380+ return vma ;
24202381
2421- err_unlock :
2422- if (bo )
2423- drm_exec_fini (& exec );
2382+ if (!bo -> vm ) {
2383+ err = add_preempt_fences (vm , bo );
2384+ if (err ) {
2385+ prep_vma_destroy (vm , vma , false);
2386+ xe_vma_destroy (vma , NULL );
2387+ }
2388+ }
2389+ }
2390+ if (err )
2391+ return ERR_PTR (err );
2392+ } else {
2393+ vma = xe_vma_create (vm , NULL , op -> gem .offset ,
2394+ op -> va .addr , op -> va .addr +
2395+ op -> va .range - 1 , attr , flags );
2396+ if (IS_ERR (vma ))
2397+ return vma ;
24242398
2399+ if (xe_vma_is_userptr (vma ))
2400+ err = xe_vma_userptr_pin_pages (to_userptr_vma (vma ));
2401+ }
24252402 if (err ) {
24262403 prep_vma_destroy (vm , vma , false);
24272404 xe_vma_destroy_unlocked (vma );
@@ -3220,37 +3197,37 @@ static void vm_bind_ioctl_ops_fini(struct xe_vm *vm, struct xe_vma_ops *vops,
32203197static struct dma_fence * vm_bind_ioctl_ops_execute (struct xe_vm * vm ,
32213198 struct xe_vma_ops * vops )
32223199{
3200+ struct xe_validation_ctx ctx ;
32233201 struct drm_exec exec ;
32243202 struct dma_fence * fence ;
3225- int err ;
3203+ int err = 0 ;
32263204
32273205 lockdep_assert_held_write (& vm -> lock );
32283206
3229- drm_exec_init (& exec , DRM_EXEC_INTERRUPTIBLE_WAIT |
3230- DRM_EXEC_IGNORE_DUPLICATES , 0 );
3231- drm_exec_until_all_locked (& exec ) {
3207+ xe_validation_guard (& ctx , & vm -> xe -> val , & exec ,
3208+ ((struct xe_val_flags ) {
3209+ .interruptible = true,
3210+ .exec_ignore_duplicates = true,
3211+ }), err ) {
32323212 err = vm_bind_ioctl_ops_lock_and_prep (& exec , vm , vops );
32333213 drm_exec_retry_on_contention (& exec );
3234- if (err ) {
3235- fence = ERR_PTR (err );
3236- goto unlock ;
3237- }
3214+ xe_validation_retry_on_oom (& ctx , & err );
3215+ if (err )
3216+ return ERR_PTR (err );
32383217
32393218 xe_vm_set_validation_exec (vm , & exec );
32403219 fence = ops_execute (vm , vops );
32413220 xe_vm_set_validation_exec (vm , NULL );
32423221 if (IS_ERR (fence )) {
32433222 if (PTR_ERR (fence ) == - ENODATA )
32443223 vm_bind_ioctl_ops_fini (vm , vops , NULL );
3245- goto unlock ;
3224+ return fence ;
32463225 }
32473226
32483227 vm_bind_ioctl_ops_fini (vm , vops , fence );
32493228 }
32503229
3251- unlock :
3252- drm_exec_fini (& exec );
3253- return fence ;
3230+ return err ? ERR_PTR (err ) : fence ;
32543231}
32553232ALLOW_ERROR_INJECTION (vm_bind_ioctl_ops_execute , ERRNO );
32563233
0 commit comments