Skip to content

Commit 108d32a

Browse files
committed
[X86] X86FixupVectorConstants - load+sign-extend vector constants that can be stored in a truncated form
Reduce the size of the vector constant by storing it in the constant pool in a truncated form, and sign-extend it as part of the load. I intend to add the matching load+zero-extend handling in a future patch, but that requires some alterations to the existing MC shuffle comments handling first. I've extended the existing FixupConstant functionality to support these constant rebuilds as well - we still select the smallest stored constant entry and prefer vzload/broadcast/vextload for same bitwidth to avoid domain flips. NOTE: Some of the FixupConstant tables are currently created on the fly as they are dependent on the supported ISAs (HasAVX2 etc.) - should we split these (to allow initializer lists instead) and have duplicate FixupConstant calls to avoid so much stack use?
1 parent 40f6b7d commit 108d32a

File tree

223 files changed

+15235
-15523
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

223 files changed

+15235
-15523
lines changed

llvm/lib/Target/X86/X86FixupVectorConstants.cpp

+207-85
Large diffs are not rendered by default.

llvm/lib/Target/X86/X86MCInstLower.cpp

+61-1
Original file line numberDiff line numberDiff line change
@@ -1582,6 +1582,36 @@ static void printBroadcast(const MachineInstr *MI, MCStreamer &OutStreamer,
15821582
}
15831583
}
15841584

1585+
static bool printSignExtend(const MachineInstr *MI, MCStreamer &OutStreamer,
1586+
int SrcEltBits, int DstEltBits) {
1587+
auto *C = X86::getConstantFromPool(*MI, 1);
1588+
if (C && C->getType()->getScalarSizeInBits() == SrcEltBits) {
1589+
if (auto *CDS = dyn_cast<ConstantDataSequential>(C)) {
1590+
int NumElts = CDS->getNumElements();
1591+
std::string Comment;
1592+
raw_string_ostream CS(Comment);
1593+
1594+
const MachineOperand &DstOp = MI->getOperand(0);
1595+
CS << X86ATTInstPrinter::getRegisterName(DstOp.getReg()) << " = ";
1596+
CS << "[";
1597+
for (int i = 0; i != NumElts; ++i) {
1598+
if (i != 0)
1599+
CS << ",";
1600+
if (CDS->getElementType()->isIntegerTy()) {
1601+
APInt Elt = CDS->getElementAsAPInt(i).sext(DstEltBits);
1602+
printConstant(Elt, CS);
1603+
} else
1604+
CS << "?";
1605+
}
1606+
CS << "]";
1607+
OutStreamer.AddComment(CS.str());
1608+
return true;
1609+
}
1610+
}
1611+
1612+
return false;
1613+
}
1614+
15851615
void X86AsmPrinter::EmitSEHInstruction(const MachineInstr *MI) {
15861616
assert(MF->hasWinCFI() && "SEH_ instruction in function without WinCFI?");
15871617
assert((getSubtarget().isOSWindows() || TM.getTargetTriple().isUEFI()) &&
@@ -1844,7 +1874,7 @@ static void addConstantComments(const MachineInstr *MI,
18441874
case X86::VMOVQI2PQIrm:
18451875
case X86::VMOVQI2PQIZrm:
18461876
printZeroUpperMove(MI, OutStreamer, 64, 128, "mem[0],zero");
1847-
break;
1877+
break;
18481878

18491879
case X86::MOVSSrm:
18501880
case X86::VMOVSSrm:
@@ -1979,6 +2009,36 @@ static void addConstantComments(const MachineInstr *MI,
19792009
case X86::VPBROADCASTBZrm:
19802010
printBroadcast(MI, OutStreamer, 64, 8);
19812011
break;
2012+
2013+
#define MOVX_CASE(Prefix, Ext, Type, Suffix) \
2014+
case X86::Prefix##PMOV##Ext##Type##Suffix##rm:
2015+
2016+
#define CASE_MOVX_RM(Ext, Type) \
2017+
MOVX_CASE(, Ext, Type, ) \
2018+
MOVX_CASE(V, Ext, Type, ) \
2019+
MOVX_CASE(V, Ext, Type, Y) \
2020+
MOVX_CASE(V, Ext, Type, Z128) \
2021+
MOVX_CASE(V, Ext, Type, Z256) \
2022+
MOVX_CASE(V, Ext, Type, Z)
2023+
2024+
CASE_MOVX_RM(SX, BD)
2025+
printSignExtend(MI, OutStreamer, 8, 32);
2026+
break;
2027+
CASE_MOVX_RM(SX, BQ)
2028+
printSignExtend(MI, OutStreamer, 8, 64);
2029+
break;
2030+
CASE_MOVX_RM(SX, BW)
2031+
printSignExtend(MI, OutStreamer, 8, 16);
2032+
break;
2033+
CASE_MOVX_RM(SX, DQ)
2034+
printSignExtend(MI, OutStreamer, 32, 64);
2035+
break;
2036+
CASE_MOVX_RM(SX, WD)
2037+
printSignExtend(MI, OutStreamer, 16, 32);
2038+
break;
2039+
CASE_MOVX_RM(SX, WQ)
2040+
printSignExtend(MI, OutStreamer, 16, 64);
2041+
break;
19822042
}
19832043
}
19842044

llvm/test/CodeGen/X86/any_extend_vector_inreg_of_broadcast.ll

+53-57
Large diffs are not rendered by default.

llvm/test/CodeGen/X86/any_extend_vector_inreg_of_broadcast_from_memory.ll

+47-49
Large diffs are not rendered by default.

llvm/test/CodeGen/X86/avg.ll

+2-2
Original file line numberDiff line numberDiff line change
@@ -1191,7 +1191,7 @@ define void @avg_v16i16_const(ptr %a) nounwind {
11911191
;
11921192
; AVX1-LABEL: avg_v16i16_const:
11931193
; AVX1: # %bb.0:
1194-
; AVX1-NEXT: vmovdqa {{.*#+}} xmm0 = [0,1,2,3,4,5,6,7]
1194+
; AVX1-NEXT: vpmovsxbw {{.*#+}} xmm0 = [0,1,2,3,4,5,6,7]
11951195
; AVX1-NEXT: vpavgw (%rdi), %xmm0, %xmm1
11961196
; AVX1-NEXT: vpavgw 16(%rdi), %xmm0, %xmm0
11971197
; AVX1-NEXT: vmovdqu %xmm0, (%rax)
@@ -1241,7 +1241,7 @@ define void @avg_v32i16_const(ptr %a) nounwind {
12411241
;
12421242
; AVX1-LABEL: avg_v32i16_const:
12431243
; AVX1: # %bb.0:
1244-
; AVX1-NEXT: vmovdqa {{.*#+}} xmm0 = [0,1,2,3,4,5,6,7]
1244+
; AVX1-NEXT: vpmovsxbw {{.*#+}} xmm0 = [0,1,2,3,4,5,6,7]
12451245
; AVX1-NEXT: vpavgw (%rdi), %xmm0, %xmm1
12461246
; AVX1-NEXT: vpavgw 16(%rdi), %xmm0, %xmm2
12471247
; AVX1-NEXT: vpavgw 32(%rdi), %xmm0, %xmm3

llvm/test/CodeGen/X86/avx-vperm2x128.ll

+2-2
Original file line numberDiff line numberDiff line change
@@ -651,7 +651,7 @@ entry:
651651
define <8 x i32> @ld0_hi0_lo1_8i32(ptr %pa, <8 x i32> %b) nounwind uwtable readnone ssp {
652652
; AVX1-LABEL: ld0_hi0_lo1_8i32:
653653
; AVX1: # %bb.0: # %entry
654-
; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [1,2,3,4]
654+
; AVX1-NEXT: vpmovsxbd {{.*#+}} xmm1 = [1,2,3,4]
655655
; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm0
656656
; AVX1-NEXT: vpaddd 16(%rdi), %xmm1, %xmm1
657657
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
@@ -672,7 +672,7 @@ entry:
672672
define <8 x i32> @ld1_hi0_hi1_8i32(<8 x i32> %a, ptr %pb) nounwind uwtable readnone ssp {
673673
; AVX1-LABEL: ld1_hi0_hi1_8i32:
674674
; AVX1: # %bb.0: # %entry
675-
; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [1,2,3,4]
675+
; AVX1-NEXT: vpmovsxbd {{.*#+}} xmm1 = [1,2,3,4]
676676
; AVX1-NEXT: vpaddd 16(%rdi), %xmm1, %xmm2
677677
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
678678
; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm0

llvm/test/CodeGen/X86/avx2-arith.ll

+1-1
Original file line numberDiff line numberDiff line change
@@ -234,7 +234,7 @@ define <8 x i16> @mul_const8(<8 x i16> %x) {
234234
define <8 x i32> @mul_const9(<8 x i32> %x) {
235235
; CHECK-LABEL: mul_const9:
236236
; CHECK: # %bb.0:
237-
; CHECK-NEXT: vmovd {{.*#+}} xmm1 = [2,0,0,0]
237+
; CHECK-NEXT: vpmovsxbq {{.*#+}} xmm1 = [2,0]
238238
; CHECK-NEXT: vpmulld %ymm1, %ymm0, %ymm0
239239
; CHECK-NEXT: ret{{[l|q]}}
240240
%y = mul <8 x i32> %x, <i32 2, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>

0 commit comments

Comments
 (0)