@@ -8,29 +8,49 @@ typedef short __attribute__((ext_vector_type(2))) short2;
88typedef unsigned short __attribute__((ext_vector_type (2 ))) ushort2 ;
99
1010// CHECK-LABEL: @builtins_amdgcn_dl_insts
11- // CHECK: call float @llvm.amdgcn.fdot2
11+ // CHECK: call float @llvm.amdgcn.fdot2(<2 x half> %v2hA, <2 x half> %v2hB, float %fC, i1 false)
12+ // CHECK: call float @llvm.amdgcn.fdot2(<2 x half> %v2hA, <2 x half> %v2hB, float %fC, i1 true)
1213
13- // CHECK: call i32 @llvm.amdgcn.sdot2
14- // CHECK: call i32 @llvm.amdgcn.udot2
14+ // CHECK: call i32 @llvm.amdgcn.sdot2(<2 x i16> %v2ssA, <2 x i16> %v2ssB, i32 %siC, i1 false)
15+ // CHECK: call i32 @llvm.amdgcn.sdot2(<2 x i16> %v2ssA, <2 x i16> %v2ssB, i32 %siC, i1 true)
1516
16- // CHECK: call i32 @llvm.amdgcn.sdot4
17- // CHECK: call i32 @llvm.amdgcn.udot4
17+ // CHECK: call i32 @llvm.amdgcn.udot2(<2 x i16> %v2usA, <2 x i16> %v2usB, i32 %uiC, i1 false)
18+ // CHECK: call i32 @llvm.amdgcn.udot2(<2 x i16> %v2usA, <2 x i16> %v2usB, i32 %uiC, i1 true)
1819
19- // CHECK: call i32 @llvm.amdgcn.sdot8
20- // CHECK: call i32 @llvm.amdgcn.udot8
20+ // CHECK: call i32 @llvm.amdgcn.sdot4(i32 %siA, i32 %siB, i32 %siC, i1 false)
21+ // CHECK: call i32 @llvm.amdgcn.sdot4(i32 %siA, i32 %siB, i32 %siC, i1 true)
22+
23+ // CHECK: call i32 @llvm.amdgcn.udot4(i32 %uiA, i32 %uiB, i32 %uiC, i1 false)
24+ // CHECK: call i32 @llvm.amdgcn.udot4(i32 %uiA, i32 %uiB, i32 %uiC, i1 true)
25+
26+ // CHECK: call i32 @llvm.amdgcn.sdot8(i32 %siA, i32 %siB, i32 %siC, i1 false)
27+ // CHECK: call i32 @llvm.amdgcn.sdot8(i32 %siA, i32 %siB, i32 %siC, i1 true)
28+
29+ // CHECK: call i32 @llvm.amdgcn.udot8(i32 %uiA, i32 %uiB, i32 %uiC, i1 false)
30+ // CHECK: call i32 @llvm.amdgcn.udot8(i32 %uiA, i32 %uiB, i32 %uiC, i1 true)
2131kernel void builtins_amdgcn_dl_insts (
2232 global float * fOut , global int * siOut , global uint * uiOut ,
2333 half2 v2hA , half2 v2hB , float fC ,
2434 short2 v2ssA , short2 v2ssB , int siA , int siB , int siC ,
2535 ushort2 v2usA , ushort2 v2usB , uint uiA , uint uiB , uint uiC ) {
26- fOut [0 ] = __builtin_amdgcn_fdot2 (v2hA , v2hB , fC );
36+ fOut [0 ] = __builtin_amdgcn_fdot2 (v2hA , v2hB , fC , false);
37+ fOut [1 ] = __builtin_amdgcn_fdot2 (v2hA , v2hB , fC , true);
38+
39+ siOut [0 ] = __builtin_amdgcn_sdot2 (v2ssA , v2ssB , siC , false);
40+ siOut [1 ] = __builtin_amdgcn_sdot2 (v2ssA , v2ssB , siC , true);
41+
42+ uiOut [0 ] = __builtin_amdgcn_udot2 (v2usA , v2usB , uiC , false);
43+ uiOut [1 ] = __builtin_amdgcn_udot2 (v2usA , v2usB , uiC , true);
44+
45+ siOut [2 ] = __builtin_amdgcn_sdot4 (siA , siB , siC , false);
46+ siOut [3 ] = __builtin_amdgcn_sdot4 (siA , siB , siC , true);
2747
28- siOut [ 0 ] = __builtin_amdgcn_sdot2 ( v2ssA , v2ssB , siC );
29- uiOut [0 ] = __builtin_amdgcn_udot2 ( v2usA , v2usB , uiC );
48+ uiOut [ 2 ] = __builtin_amdgcn_udot4 ( uiA , uiB , uiC , false );
49+ uiOut [3 ] = __builtin_amdgcn_udot4 ( uiA , uiB , uiC , true );
3050
31- siOut [1 ] = __builtin_amdgcn_sdot4 (siA , siB , siC );
32- uiOut [ 1 ] = __builtin_amdgcn_udot4 ( uiA , uiB , uiC );
51+ siOut [4 ] = __builtin_amdgcn_sdot8 (siA , siB , siC , false );
52+ siOut [ 5 ] = __builtin_amdgcn_sdot8 ( siA , siB , siC , true );
3353
34- siOut [ 2 ] = __builtin_amdgcn_sdot8 ( siA , siB , siC );
35- uiOut [2 ] = __builtin_amdgcn_udot8 (uiA , uiB , uiC );
54+ uiOut [ 4 ] = __builtin_amdgcn_udot8 ( uiA , uiB , uiC , false );
55+ uiOut [5 ] = __builtin_amdgcn_udot8 (uiA , uiB , uiC , true );
3656}
0 commit comments