@@ -2794,6 +2794,41 @@ define hidden void @extract3744(ptr addrspace(1) %in0, ptr addrspace(1) %in1, pt
2794
2794
ret void
2795
2795
}
2796
2796
2797
+ declare i32 @llvm.amdgcn.perm (i32 , i32 , i32 )
2798
+
2799
+ define hidden void @extract_perm_3744 (ptr addrspace (1 ) %in0 , ptr addrspace (1 ) %in1 , ptr addrspace (1 ) %out0 ) {
2800
+ ; GFX10-LABEL: extract_perm_3744:
2801
+ ; GFX10: ; %bb.0:
2802
+ ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2803
+ ; GFX10-NEXT: global_load_dword v6, v[0:1], off
2804
+ ; GFX10-NEXT: global_load_dword v7, v[2:3], off
2805
+ ; GFX10-NEXT: s_waitcnt vmcnt(0)
2806
+ ; GFX10-NEXT: v_perm_b32 v0, v6, v7, 0x3070404
2807
+ ; GFX10-NEXT: global_store_dword v[4:5], v0, off
2808
+ ; GFX10-NEXT: s_setpc_b64 s[30:31]
2809
+ ;
2810
+ ; GFX9-LABEL: extract_perm_3744:
2811
+ ; GFX9: ; %bb.0:
2812
+ ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2813
+ ; GFX9-NEXT: global_load_dword v6, v[0:1], off
2814
+ ; GFX9-NEXT: global_load_dword v7, v[2:3], off
2815
+ ; GFX9-NEXT: s_mov_b32 s4, 0x3070404
2816
+ ; GFX9-NEXT: s_waitcnt vmcnt(0)
2817
+ ; GFX9-NEXT: v_perm_b32 v0, v6, v7, s4
2818
+ ; GFX9-NEXT: global_store_dword v[4:5], v0, off
2819
+ ; GFX9-NEXT: s_waitcnt vmcnt(0)
2820
+ ; GFX9-NEXT: s_setpc_b64 s[30:31]
2821
+ %vec1 = load <4 x i8 >, ptr addrspace (1 ) %in0 , align 4
2822
+ %vec2 = load <4 x i8 >, ptr addrspace (1 ) %in1 , align 4
2823
+ %cast1 = bitcast <4 x i8 > %vec1 to i32
2824
+ %cast2 = bitcast <4 x i8 > %vec2 to i32
2825
+ %lo24 = call i32 @llvm.amdgcn.perm (i32 %cast1 , i32 %cast1 , i32 201523200 )
2826
+ %hi8 = call i32 @llvm.amdgcn.perm (i32 %cast2 , i32 %cast2 , i32 51121164 )
2827
+ %res = or i32 %hi8 , %lo24
2828
+ store i32 %res , ptr addrspace (1 ) %out0 , align 4
2829
+ ret void
2830
+ }
2831
+
2797
2832
define hidden void @extract1347_v2i16 (ptr addrspace (1 ) %in0 , ptr addrspace (1 ) %in1 , ptr addrspace (1 ) %out0 ) {
2798
2833
; GFX10-LABEL: extract1347_v2i16:
2799
2834
; GFX10: ; %bb.0:
0 commit comments