@@ -154,3 +154,27 @@ define float @fold_fmul_distributive(float %x, float %y) {
154
154
%fmul = fmul contract float %fadd , %x
155
155
ret float %fmul
156
156
}
157
+
158
+ ; test to make sure contract is not dropped such that we can generate fma from following mul/add
159
+ define amdgpu_kernel void @vec_mul_scalar_add_fma (<2 x float > %a , <2 x float > %b , float %c1 , ptr addrspace (1 ) %inptr ) {
160
+ ; GFX906-LABEL: vec_mul_scalar_add_fma:
161
+ ; GFX906: ; %bb.0:
162
+ ; GFX906-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
163
+ ; GFX906-NEXT: s_waitcnt lgkmcnt(0)
164
+ ; GFX906-NEXT: s_load_dword s5, s[0:1], 0x34
165
+ ; GFX906-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x3c
166
+ ; GFX906-NEXT: v_mov_b32_e32 v0, 0
167
+ ; GFX906-NEXT: v_mov_b32_e32 v1, s6
168
+ ; GFX906-NEXT: v_mul_f32_e32 v1, s4, v1
169
+ ; GFX906-NEXT: s_waitcnt lgkmcnt(0)
170
+ ; GFX906-NEXT: v_add_f32_e32 v1, s5, v1
171
+ ; GFX906-NEXT: global_store_dword v0, v1, s[2:3] offset:4
172
+ ; GFX906-NEXT: s_endpgm
173
+ %gep = getelementptr float , ptr addrspace (1 ) %inptr , i32 1
174
+ %c = shufflevector <2 x float > %a , <2 x float > poison, <2 x i32 > zeroinitializer
175
+ %mul = fmul contract <2 x float > %c , %b
176
+ %elv = extractelement <2 x float > %mul , i64 0
177
+ %add = fadd contract float %elv , %c1
178
+ store float %add , ptr addrspace (1 ) %gep , align 4
179
+ ret void
180
+ }
0 commit comments