@@ -3463,6 +3463,81 @@ bb:
3463
3463
ret void
3464
3464
}
3465
3465
3466
+ ; Test a case that failed machine verification.
3467
+ define amdgpu_gs void @wqm_init_exec_switch (i32 %arg ) {
3468
+ ; GFX9-W64-LABEL: wqm_init_exec_switch:
3469
+ ; GFX9-W64: ; %bb.0:
3470
+ ; GFX9-W64-NEXT: s_mov_b64 exec, 0
3471
+ ; GFX9-W64-NEXT: v_cmp_lt_i32_e32 vcc, 0, v0
3472
+ ; GFX9-W64-NEXT: s_and_saveexec_b64 s[0:1], vcc
3473
+ ; GFX9-W64-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
3474
+ ; GFX9-W64-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1]
3475
+ ; GFX9-W64-NEXT: s_endpgm
3476
+ ;
3477
+ ; GFX10-W32-LABEL: wqm_init_exec_switch:
3478
+ ; GFX10-W32: ; %bb.0:
3479
+ ; GFX10-W32-NEXT: s_mov_b32 exec_lo, 0
3480
+ ; GFX10-W32-NEXT: s_mov_b32 s0, exec_lo
3481
+ ; GFX10-W32-NEXT: v_cmpx_lt_i32_e32 0, v0
3482
+ ; GFX10-W32-NEXT: s_xor_b32 s0, exec_lo, s0
3483
+ ; GFX10-W32-NEXT: s_andn2_saveexec_b32 s0, s0
3484
+ ; GFX10-W32-NEXT: s_endpgm
3485
+ call void @llvm.amdgcn.init.exec (i64 0 )
3486
+ switch i32 %arg , label %bb1 [
3487
+ i32 0 , label %bb3
3488
+ i32 1 , label %bb2
3489
+ ]
3490
+ bb1:
3491
+ ret void
3492
+ bb2:
3493
+ ret void
3494
+ bb3:
3495
+ ret void
3496
+ }
3497
+
3498
+ define amdgpu_gs void @wqm_init_exec_wwm () {
3499
+ ; GFX9-W64-LABEL: wqm_init_exec_wwm:
3500
+ ; GFX9-W64: ; %bb.0:
3501
+ ; GFX9-W64-NEXT: s_mov_b64 exec, 0
3502
+ ; GFX9-W64-NEXT: s_mov_b32 s1, 0
3503
+ ; GFX9-W64-NEXT: s_mov_b32 s0, s1
3504
+ ; GFX9-W64-NEXT: s_cmp_lg_u64 exec, 0
3505
+ ; GFX9-W64-NEXT: s_cselect_b64 s[2:3], -1, 0
3506
+ ; GFX9-W64-NEXT: s_cmp_lg_u64 s[0:1], 0
3507
+ ; GFX9-W64-NEXT: s_cselect_b64 s[0:1], -1, 0
3508
+ ; GFX9-W64-NEXT: s_xor_b64 s[0:1], s[2:3], s[0:1]
3509
+ ; GFX9-W64-NEXT: v_cndmask_b32_e64 v0, 0, 1.0, s[0:1]
3510
+ ; GFX9-W64-NEXT: v_mov_b32_e32 v1, 0
3511
+ ; GFX9-W64-NEXT: exp mrt0 off, off, off, off
3512
+ ; GFX9-W64-NEXT: s_endpgm
3513
+ ;
3514
+ ; GFX10-W32-LABEL: wqm_init_exec_wwm:
3515
+ ; GFX10-W32: ; %bb.0:
3516
+ ; GFX10-W32-NEXT: s_mov_b32 exec_lo, 0
3517
+ ; GFX10-W32-NEXT: s_mov_b32 s1, 0
3518
+ ; GFX10-W32-NEXT: s_cmp_lg_u64 exec, 0
3519
+ ; GFX10-W32-NEXT: s_mov_b32 s0, s1
3520
+ ; GFX10-W32-NEXT: s_cselect_b32 s2, -1, 0
3521
+ ; GFX10-W32-NEXT: s_cmp_lg_u64 s[0:1], 0
3522
+ ; GFX10-W32-NEXT: v_mov_b32_e32 v1, 0
3523
+ ; GFX10-W32-NEXT: s_cselect_b32 s0, -1, 0
3524
+ ; GFX10-W32-NEXT: s_xor_b32 s0, s2, s0
3525
+ ; GFX10-W32-NEXT: v_cndmask_b32_e64 v0, 0, 1.0, s0
3526
+ ; GFX10-W32-NEXT: exp mrt0 off, off, off, off
3527
+ ; GFX10-W32-NEXT: s_endpgm
3528
+ call void @llvm.amdgcn.init.exec (i64 0 )
3529
+ %i = call i64 @llvm.amdgcn.ballot.i64 (i1 true )
3530
+ %i1 = call i32 @llvm.amdgcn.wwm.i32 (i32 0 )
3531
+ %i2 = insertelement <2 x i32 > zeroinitializer , i32 %i1 , i64 0
3532
+ %i3 = bitcast <2 x i32 > %i2 to i64
3533
+ %i4 = icmp ne i64 %i , 0
3534
+ %i5 = icmp ne i64 %i3 , 0
3535
+ %i6 = xor i1 %i4 , %i5
3536
+ %i7 = uitofp i1 %i6 to float
3537
+ call void @llvm.amdgcn.exp.f32 (i32 0 , i32 0 , float %i7 , float 0 .0 , float 0 .0 , float 0 .0 , i1 false , i1 false )
3538
+ ret void
3539
+ }
3540
+
3466
3541
declare void @llvm.amdgcn.exp.f32 (i32 , i32 , float , float , float , float , i1 , i1 ) #1
3467
3542
declare void @llvm.amdgcn.image.store.1d.v4f32.i32 (<4 x float >, i32 , i32 , <8 x i32 >, i32 , i32 ) #1
3468
3543
0 commit comments