@@ -308,6 +308,134 @@ module attributes {transform.with_named_sequence} {
308
308
309
309
// -----
310
310
311
+ // CHECK-LABEL: func.func @no_hoisting_unknown_bound_loop
312
+ func.func @no_hoisting_unknown_bound_loop (%memref0: memref <20 xi32 >, %lb: index , %ub: index ) {
313
+ %c0_i32 = arith.constant 0 : i32
314
+ %c0 = arith.constant 0 : index
315
+ %c1 = arith.constant 1 : index
316
+
317
+ // %lb and %ub are unbounded, so do not hoist.
318
+ // CHECK: scf.for {{.*}} {
319
+ // CHECK-NEXT: vector.transfer_read
320
+ // CHECK-NEXT: "test.some_use"
321
+ scf.for %arg2 = %lb to %ub step %c1 {
322
+ %read = vector.transfer_read %memref0 [%c0 ], %c0_i32 {in_bounds = [true ]} : memref <20 xi32 >, vector <4 xi32 >
323
+ " test.some_use" (%read ) : (vector <4 xi32 >) ->()
324
+ }
325
+ return
326
+ }
327
+
328
+ module attributes {transform.with_named_sequence } {
329
+ transform.named_sequence @__transform_main (%arg1: !transform.any_op {transform.readonly }) {
330
+ %0 = transform.structured.match ops {[" func.func" ]} in %arg1
331
+ : (!transform.any_op ) -> !transform.any_op
332
+ transform.structured.hoist_redundant_vector_transfers %0 { verify_non_zero_trip }
333
+ : (!transform.any_op ) -> !transform.any_op
334
+ transform.yield
335
+ }
336
+ }
337
+
338
+ // -----
339
+
340
+ // CHECK-LABEL: func.func @no_hoisting_possibly_zero_trip_loop
341
+ func.func @no_hoisting_possibly_zero_trip_loop (%memref0: memref <20 xi32 >, %lb: index , %ub: index ) {
342
+ %c0_i32 = arith.constant 0 : i32
343
+ %c0 = arith.constant 0 : index
344
+ %c1 = arith.constant 1 : index
345
+
346
+ // %lb_0 is in range [%lb, 8], and %ub_0 is in range [4, %ub].
347
+ // Since %lb_0 could be greater than %ub_0, do not hoist.
348
+ %lb_0 = affine.min affine_map <(d0 ) -> (d0 , 8 )>(%lb )
349
+ %ub_0 = affine.max affine_map <(d0 ) -> (d0 , 4 )>(%ub )
350
+
351
+ // CHECK: scf.for {{.*}} {
352
+ // CHECK-NEXT: vector.transfer_read
353
+ // CHECK-NEXT: "test.some_use"
354
+ scf.for %arg2 = %lb_0 to %ub_0 step %c1 {
355
+ %read = vector.transfer_read %memref0 [%c0 ], %c0_i32 {in_bounds = [true ]} : memref <20 xi32 >, vector <4 xi32 >
356
+ " test.some_use" (%read ) : (vector <4 xi32 >) ->()
357
+ }
358
+ return
359
+ }
360
+
361
+ module attributes {transform.with_named_sequence } {
362
+ transform.named_sequence @__transform_main (%arg1: !transform.any_op {transform.readonly }) {
363
+ %0 = transform.structured.match ops {[" func.func" ]} in %arg1
364
+ : (!transform.any_op ) -> !transform.any_op
365
+ transform.structured.hoist_redundant_vector_transfers %0 { verify_non_zero_trip }
366
+ : (!transform.any_op ) -> !transform.any_op
367
+ transform.yield
368
+ }
369
+ }
370
+
371
+ // -----
372
+
373
+ // CHECK-LABEL: func.func @no_hoisting_possibly_zero_trip_loop_eq_lb_and_ub
374
+ func.func @no_hoisting_possibly_zero_trip_loop_eq_lb_and_ub (%memref0: memref <20 xi32 >, %lb: index , %ub: index ) {
375
+ %c0_i32 = arith.constant 0 : i32
376
+ %c0 = arith.constant 0 : index
377
+ %c1 = arith.constant 1 : index
378
+
379
+ // %lb_0 is in range [%lb, 8], and %ub_0 is in range [8, %ub].
380
+ // Since %lb_0 could be equal to %ub_0, do not hoist.
381
+ %lb_0 = affine.min affine_map <(d0 ) -> (d0 , 8 )>(%lb )
382
+ %ub_0 = affine.max affine_map <(d0 ) -> (d0 , 8 )>(%ub )
383
+
384
+ // CHECK: scf.for {{.*}} {
385
+ // CHECK-NEXT: vector.transfer_read
386
+ // CHECK-NEXT: "test.some_use"
387
+ scf.for %arg2 = %lb_0 to %ub_0 step %c1 {
388
+ %read = vector.transfer_read %memref0 [%c0 ], %c0_i32 {in_bounds = [true ]} : memref <20 xi32 >, vector <4 xi32 >
389
+ " test.some_use" (%read ) : (vector <4 xi32 >) ->()
390
+ }
391
+ return
392
+ }
393
+
394
+ module attributes {transform.with_named_sequence } {
395
+ transform.named_sequence @__transform_main (%arg1: !transform.any_op {transform.readonly }) {
396
+ %0 = transform.structured.match ops {[" func.func" ]} in %arg1
397
+ : (!transform.any_op ) -> !transform.any_op
398
+ transform.structured.hoist_redundant_vector_transfers %0 { verify_non_zero_trip }
399
+ : (!transform.any_op ) -> !transform.any_op
400
+ transform.yield
401
+ }
402
+ }
403
+
404
+ // -----
405
+
406
+ // CHECK-LABEL: func.func @hoisting_non_zero_trip_loop
407
+ func.func @hoisting_non_zero_trip_loop (%memref0: memref <20 xi32 >, %lb: index , %ub: index ) {
408
+ %c0_i32 = arith.constant 0 : i32
409
+ %c0 = arith.constant 0 : index
410
+ %c1 = arith.constant 1 : index
411
+
412
+ // %lb_0 is in range [%lb, 4], and %ub_0 is in range [8, %ub].
413
+ // Since %lb_0 is guaranteed to be less than %ub_0, hoisting is possible.
414
+ %lb_0 = affine.min affine_map <(d0 ) -> (d0 , 4 )>(%lb )
415
+ %ub_0 = affine.max affine_map <(d0 ) -> (d0 , 8 )>(%ub )
416
+
417
+ // CHECK: vector.transfer_read
418
+ // CHECK: scf.for {{.*}} {
419
+ // CHECK-NEXT: "test.some_use"
420
+ scf.for %arg2 = %lb_0 to %ub_0 step %c1 {
421
+ %read = vector.transfer_read %memref0 [%c0 ], %c0_i32 {in_bounds = [true ]} : memref <20 xi32 >, vector <4 xi32 >
422
+ " test.some_use" (%read ) : (vector <4 xi32 >) ->()
423
+ }
424
+ return
425
+ }
426
+
427
+ module attributes {transform.with_named_sequence } {
428
+ transform.named_sequence @__transform_main (%arg1: !transform.any_op {transform.readonly }) {
429
+ %0 = transform.structured.match ops {[" func.func" ]} in %arg1
430
+ : (!transform.any_op ) -> !transform.any_op
431
+ transform.structured.hoist_redundant_vector_transfers %0 { verify_non_zero_trip }
432
+ : (!transform.any_op ) -> !transform.any_op
433
+ transform.yield
434
+ }
435
+ }
436
+
437
+ // -----
438
+
311
439
// Regression test - `vector.transfer_read` below should not be hoisted.
312
440
// Indeed, %collapse_shape (written to by `vector.transfer_write`) and %alloca
313
441
// (read by `vector.transfer_read`) alias.
@@ -366,7 +494,7 @@ func.func @no_hoisting_collapse_shape_2(%vec: vector<1x12x1xi32>) {
366
494
%collapse_shape = memref.collapse_shape %alloca [[0 , 1 , 2 ]] : memref <1 x12 x1 xi32 > into memref <12 xi32 >
367
495
vector.transfer_write %vec , %alloca [%c0 , %c0 , %c0 ] {in_bounds = [true , true , true ]} : vector <1 x12 x1 xi32 >, memref <1 x12 x1 xi32 >
368
496
%read = vector.transfer_read %collapse_shape [%c0 ], %c0_i32 {in_bounds = [true ]} : memref <12 xi32 >, vector <12 xi32 >
369
- " prevent.dce " (%read ) : (vector <12 xi32 >) ->()
497
+ " test.some_use " (%read ) : (vector <12 xi32 >) ->()
370
498
}
371
499
return
372
500
}
0 commit comments