@@ -1061,3 +1061,133 @@ bb:
10611061 ret <4 x i1 > %tmp4
10621062}
10631063
1064+ ; Regression reported on 057db2002bb3d79429db3c5fe436c8cefc50cb25
1065+ @d = external global <2 x i64 >, align 16
1066+ define void @constantfold_andn_mask () nounwind {
1067+ ; SSE-LABEL: constantfold_andn_mask:
1068+ ; SSE: # %bb.0: # %entry
1069+ ; SSE-NEXT: pushq %rax
1070+ ; SSE-NEXT: callq use@PLT
1071+ ; SSE-NEXT: movdqu (%rax), %xmm1
1072+ ; SSE-NEXT: movdqa {{.*#+}} xmm2 = [31,248,31,248,31,248,31,248,31,248,31,248,31,248,31,248]
1073+ ; SSE-NEXT: pand %xmm2, %xmm0
1074+ ; SSE-NEXT: pavgb %xmm2, %xmm0
1075+ ; SSE-NEXT: pandn %xmm1, %xmm0
1076+ ; SSE-NEXT: pand %xmm2, %xmm1
1077+ ; SSE-NEXT: pandn %xmm0, %xmm2
1078+ ; SSE-NEXT: por %xmm1, %xmm2
1079+ ; SSE-NEXT: movabsq $87960930222080, %rax # imm = 0x500000000000
1080+ ; SSE-NEXT: xorq d@GOTPCREL(%rip), %rax
1081+ ; SSE-NEXT: movdqa %xmm2, (%rax)
1082+ ; SSE-NEXT: popq %rax
1083+ ; SSE-NEXT: retq
1084+ ;
1085+ ; XOP-LABEL: constantfold_andn_mask:
1086+ ; XOP: # %bb.0: # %entry
1087+ ; XOP-NEXT: pushq %rax
1088+ ; XOP-NEXT: callq use@PLT
1089+ ; XOP-NEXT: vmovdqu (%rax), %xmm1
1090+ ; XOP-NEXT: vmovdqa {{.*#+}} xmm2 = [31,248,31,248,31,248,31,248,31,248,31,248,31,248,31,248]
1091+ ; XOP-NEXT: vpand %xmm2, %xmm1, %xmm3
1092+ ; XOP-NEXT: vpand %xmm2, %xmm0, %xmm0
1093+ ; XOP-NEXT: vpavgb %xmm2, %xmm0, %xmm0
1094+ ; XOP-NEXT: vpandn %xmm1, %xmm0, %xmm0
1095+ ; XOP-NEXT: vpandn %xmm0, %xmm2, %xmm0
1096+ ; XOP-NEXT: vpor %xmm0, %xmm3, %xmm0
1097+ ; XOP-NEXT: movabsq $87960930222080, %rax # imm = 0x500000000000
1098+ ; XOP-NEXT: xorq d@GOTPCREL(%rip), %rax
1099+ ; XOP-NEXT: vmovdqa %xmm0, (%rax)
1100+ ; XOP-NEXT: popq %rax
1101+ ; XOP-NEXT: retq
1102+ ;
1103+ ; AVX1-LABEL: constantfold_andn_mask:
1104+ ; AVX1: # %bb.0: # %entry
1105+ ; AVX1-NEXT: pushq %rax
1106+ ; AVX1-NEXT: callq use@PLT
1107+ ; AVX1-NEXT: vmovdqu (%rax), %xmm1
1108+ ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [31,248,31,248,31,248,31,248,31,248,31,248,31,248,31,248]
1109+ ; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm3
1110+ ; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
1111+ ; AVX1-NEXT: vpavgb %xmm2, %xmm0, %xmm0
1112+ ; AVX1-NEXT: vpandn %xmm1, %xmm0, %xmm0
1113+ ; AVX1-NEXT: vpandn %xmm0, %xmm2, %xmm0
1114+ ; AVX1-NEXT: vpor %xmm0, %xmm3, %xmm0
1115+ ; AVX1-NEXT: movabsq $87960930222080, %rax # imm = 0x500000000000
1116+ ; AVX1-NEXT: xorq d@GOTPCREL(%rip), %rax
1117+ ; AVX1-NEXT: vmovdqa %xmm0, (%rax)
1118+ ; AVX1-NEXT: popq %rax
1119+ ; AVX1-NEXT: retq
1120+ ;
1121+ ; AVX2-LABEL: constantfold_andn_mask:
1122+ ; AVX2: # %bb.0: # %entry
1123+ ; AVX2-NEXT: pushq %rax
1124+ ; AVX2-NEXT: callq use@PLT
1125+ ; AVX2-NEXT: vmovdqu (%rax), %xmm1
1126+ ; AVX2-NEXT: vpbroadcastw {{.*#+}} xmm2 = [63519,63519,63519,63519,63519,63519,63519,63519]
1127+ ; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm3
1128+ ; AVX2-NEXT: vpand %xmm2, %xmm0, %xmm0
1129+ ; AVX2-NEXT: vpavgb %xmm2, %xmm0, %xmm0
1130+ ; AVX2-NEXT: vpandn %xmm1, %xmm0, %xmm0
1131+ ; AVX2-NEXT: vpandn %xmm0, %xmm2, %xmm0
1132+ ; AVX2-NEXT: vpor %xmm0, %xmm3, %xmm0
1133+ ; AVX2-NEXT: movabsq $87960930222080, %rax # imm = 0x500000000000
1134+ ; AVX2-NEXT: xorq d@GOTPCREL(%rip), %rax
1135+ ; AVX2-NEXT: vmovdqa %xmm0, (%rax)
1136+ ; AVX2-NEXT: popq %rax
1137+ ; AVX2-NEXT: retq
1138+ ;
1139+ ; AVX512F-LABEL: constantfold_andn_mask:
1140+ ; AVX512F: # %bb.0: # %entry
1141+ ; AVX512F-NEXT: pushq %rax
1142+ ; AVX512F-NEXT: callq use@PLT
1143+ ; AVX512F-NEXT: vmovdqu (%rax), %xmm1
1144+ ; AVX512F-NEXT: vpbroadcastw {{.*#+}} xmm2 = [63519,63519,63519,63519,63519,63519,63519,63519]
1145+ ; AVX512F-NEXT: vpand %xmm2, %xmm0, %xmm0
1146+ ; AVX512F-NEXT: vpavgb %xmm2, %xmm0, %xmm0
1147+ ; AVX512F-NEXT: vpandn %xmm1, %xmm0, %xmm0
1148+ ; AVX512F-NEXT: vpternlogq $184, %zmm1, %zmm2, %zmm0
1149+ ; AVX512F-NEXT: movabsq $87960930222080, %rax # imm = 0x500000000000
1150+ ; AVX512F-NEXT: xorq d@GOTPCREL(%rip), %rax
1151+ ; AVX512F-NEXT: vmovdqa %xmm0, (%rax)
1152+ ; AVX512F-NEXT: popq %rax
1153+ ; AVX512F-NEXT: vzeroupper
1154+ ; AVX512F-NEXT: retq
1155+ ;
1156+ ; AVX512VL-LABEL: constantfold_andn_mask:
1157+ ; AVX512VL: # %bb.0: # %entry
1158+ ; AVX512VL-NEXT: pushq %rax
1159+ ; AVX512VL-NEXT: callq use@PLT
1160+ ; AVX512VL-NEXT: vmovdqu (%rax), %xmm1
1161+ ; AVX512VL-NEXT: vpbroadcastw {{.*#+}} xmm2 = [63519,63519,63519,63519,63519,63519,63519,63519]
1162+ ; AVX512VL-NEXT: vpand %xmm2, %xmm0, %xmm0
1163+ ; AVX512VL-NEXT: vpavgb %xmm2, %xmm0, %xmm0
1164+ ; AVX512VL-NEXT: vpandn %xmm1, %xmm0, %xmm0
1165+ ; AVX512VL-NEXT: vpternlogq $216, %xmm2, %xmm1, %xmm0
1166+ ; AVX512VL-NEXT: movabsq $87960930222080, %rax # imm = 0x500000000000
1167+ ; AVX512VL-NEXT: xorq d@GOTPCREL(%rip), %rax
1168+ ; AVX512VL-NEXT: vmovdqa %xmm0, (%rax)
1169+ ; AVX512VL-NEXT: popq %rax
1170+ ; AVX512VL-NEXT: retq
1171+ entry:
1172+ %call = call noundef <2 x i64 > @use ()
1173+ %_msret = load <2 x i64 >, ptr undef , align 8
1174+ %i = bitcast <2 x i64 > %_msret to <16 x i8 >
1175+ %i1 = bitcast <2 x i64 > %call to <16 x i8 >
1176+ %i2 = and <16 x i8 > %i , <i8 31 , i8 -8 , i8 31 , i8 -8 , i8 31 , i8 -8 , i8 31 , i8 -8 , i8 31 , i8 -8 , i8 31 , i8 -8 , i8 31 , i8 -8 , i8 31 , i8 -8 >
1177+ %i3 = and <16 x i8 > %i1 , <i8 31 , i8 -8 , i8 31 , i8 -8 , i8 31 , i8 -8 , i8 31 , i8 -8 , i8 31 , i8 -8 , i8 31 , i8 -8 , i8 31 , i8 -8 , i8 31 , i8 -8 >
1178+ %i4 = call <16 x i8 > @llvm.x86.sse2.pavg.b (<16 x i8 > <i8 31 , i8 -8 , i8 31 , i8 -8 , i8 31 , i8 -8 , i8 31 , i8 -8 , i8 31 , i8 -8 , i8 31 , i8 -8 , i8 31 , i8 -8 , i8 31 , i8 -8 >, <16 x i8 > %i3 )
1179+ %i5 = bitcast <16 x i8 > %i2 to <2 x i64 >
1180+ %i6 = bitcast <16 x i8 > %i4 to <2 x i64 >
1181+ %i7 = and <2 x i64 > %_msret , <i64 567462211834873824 , i64 567462211834873824 >
1182+ %i8 = xor <2 x i64 > zeroinitializer , <i64 -1 , i64 -1 >
1183+ %i9 = xor <2 x i64 > %i6 , <i64 -1 , i64 -1 >
1184+ %i10 = and <2 x i64 > %i8 , %i5
1185+ %i11 = and <2 x i64 > %i7 , %i9
1186+ %i12 = or <2 x i64 > zeroinitializer , %i10
1187+ %i13 = or <2 x i64 > %i12 , %i11
1188+ store <2 x i64 > %i13 , ptr inttoptr (i64 xor (i64 ptrtoint (ptr @d to i64 ), i64 87960930222080 ) to ptr ), align 16
1189+ ret void
1190+ }
1191+
1192+ declare <2 x i64 > @use ()
1193+ declare <16 x i8 > @llvm.x86.sse2.pavg.b (<16 x i8 >, <16 x i8 >)
0 commit comments