Skip to content

Commit 74d3ba1

Browse files
bwendlingtstellar
authored andcommitted
[X86] Don't zero out %eax if both %al and %ah are used
The iterator over super and sub registers doesn't include both 8-bit registers in its list. So if both registers are used and only one of them is live on return, then we need to make sure that the other 8-bit register is also marked as live and not zeroed out. Reviewed By: nickdesaulniers Differential Revision: https://reviews.llvm.org/D139679 (cherry picked from commit 14d4cdd)
1 parent a8af9f6 commit 74d3ba1

File tree

2 files changed

+119
-1
lines changed

2 files changed

+119
-1
lines changed

llvm/lib/CodeGen/PrologEpilogInserter.cpp

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1237,7 +1237,13 @@ void PEI::insertZeroCallUsedRegs(MachineFunction &MF) {
12371237
if (!MO.isReg())
12381238
continue;
12391239

1240-
for (MCPhysReg SReg : TRI.sub_and_superregs_inclusive(MO.getReg()))
1240+
MCRegister Reg = MO.getReg();
1241+
1242+
// This picks up sibling registers (e.q. %al -> %ah).
1243+
for (MCRegUnitIterator Unit(Reg, &TRI); Unit.isValid(); ++Unit)
1244+
RegsToZero.reset(*Unit);
1245+
1246+
for (MCPhysReg SReg : TRI.sub_and_superregs_inclusive(Reg))
12411247
RegsToZero.reset(SReg);
12421248
}
12431249
}
Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,112 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc < %s -mtriple=i386-unknown-linux-gnu -opaque-pointers | FileCheck %s --check-prefix=I386
3+
;
4+
; Make sure we don't zero out %eax when both %ah and %al are used.
5+
;
6+
; PR1766: https://github.com/ClangBuiltLinux/linux/issues/1766
7+
8+
%struct.maple_subtree_state = type { ptr }
9+
10+
@mas_data_end_type = dso_local local_unnamed_addr global i32 0, align 4
11+
@ma_meta_end_mn_0_0_0_0_0_0 = dso_local local_unnamed_addr global i8 0, align 1
12+
@mt_pivots_0 = dso_local local_unnamed_addr global i8 0, align 1
13+
@mas_data_end___trans_tmp_2 = dso_local local_unnamed_addr global ptr null, align 4
14+
@mt_slots_0 = dso_local local_unnamed_addr global i8 0, align 1
15+
16+
define dso_local zeroext i1 @test1(ptr nocapture noundef readonly %0) local_unnamed_addr "zero-call-used-regs"="used-gpr" nounwind {
17+
; I386-LABEL: test1:
18+
; I386: # %bb.0:
19+
; I386-NEXT: pushl %ebx
20+
; I386-NEXT: subl $24, %esp
21+
; I386-NEXT: movl {{[0-9]+}}(%esp), %eax
22+
; I386-NEXT: movl (%eax), %eax
23+
; I386-NEXT: movzbl (%eax), %ebx
24+
; I386-NEXT: calll bar
25+
; I386-NEXT: testb %al, %al
26+
; I386-NEXT: # implicit-def: $al
27+
; I386-NEXT: # kill: killed $al
28+
; I386-NEXT: je .LBB0_6
29+
; I386-NEXT: # %bb.1:
30+
; I386-NEXT: cmpl $0, mas_data_end_type
31+
; I386-NEXT: je .LBB0_3
32+
; I386-NEXT: # %bb.2:
33+
; I386-NEXT: movzbl ma_meta_end_mn_0_0_0_0_0_0, %eax
34+
; I386-NEXT: movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
35+
; I386-NEXT: jmp .LBB0_6
36+
; I386-NEXT: .LBB0_3:
37+
; I386-NEXT: movb mt_pivots_0, %ah
38+
; I386-NEXT: movb %ah, %al
39+
; I386-NEXT: decb %al
40+
; I386-NEXT: movl mas_data_end___trans_tmp_2, %ecx
41+
; I386-NEXT: movsbl %al, %edx
42+
; I386-NEXT: cmpl $0, (%ecx,%edx,4)
43+
; I386-NEXT: je .LBB0_5
44+
; I386-NEXT: # %bb.4:
45+
; I386-NEXT: movb %al, %ah
46+
; I386-NEXT: .LBB0_5:
47+
; I386-NEXT: movb %ah, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
48+
; I386-NEXT: .LBB0_6:
49+
; I386-NEXT: movb mt_slots_0, %bh
50+
; I386-NEXT: leal {{[0-9]+}}(%esp), %eax
51+
; I386-NEXT: movl %eax, (%esp)
52+
; I386-NEXT: calll baz
53+
; I386-NEXT: subl $4, %esp
54+
; I386-NEXT: cmpb %bh, %bl
55+
; I386-NEXT: jae .LBB0_8
56+
; I386-NEXT: # %bb.7:
57+
; I386-NEXT: movsbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
58+
; I386-NEXT: movl %eax, (%esp)
59+
; I386-NEXT: calll gaz
60+
; I386-NEXT: .LBB0_8:
61+
; I386-NEXT: movb $1, %al
62+
; I386-NEXT: addl $24, %esp
63+
; I386-NEXT: popl %ebx
64+
; I386-NEXT: xorl %ecx, %ecx
65+
; I386-NEXT: xorl %edx, %edx
66+
; I386-NEXT: retl
67+
%2 = alloca %struct.maple_subtree_state, align 4
68+
%3 = load ptr, ptr %0, align 4
69+
%4 = load i8, ptr %3, align 1
70+
%5 = tail call zeroext i1 @bar()
71+
br i1 %5, label %6, label %20
72+
73+
6: ; preds = %1
74+
%7 = load i32, ptr @mas_data_end_type, align 4
75+
%8 = icmp eq i32 %7, 0
76+
br i1 %8, label %11, label %9
77+
78+
9: ; preds = %6
79+
%10 = load i8, ptr @ma_meta_end_mn_0_0_0_0_0_0, align 1
80+
br label %20
81+
82+
11: ; preds = %6
83+
%12 = load i8, ptr @mt_pivots_0, align 1
84+
%13 = add i8 %12, -1
85+
%14 = load ptr, ptr @mas_data_end___trans_tmp_2, align 4
86+
%15 = sext i8 %13 to i32
87+
%16 = getelementptr inbounds [1 x i32], ptr %14, i32 0, i32 %15
88+
%17 = load i32, ptr %16, align 4
89+
%18 = icmp eq i32 %17, 0
90+
%19 = select i1 %18, i8 %12, i8 %13
91+
br label %20
92+
93+
20: ; preds = %11, %9, %1
94+
%21 = phi i8 [ undef, %1 ], [ %10, %9 ], [ %19, %11 ]
95+
%22 = load i8, ptr @mt_slots_0, align 1
96+
call void @baz(ptr nonnull sret(%struct.maple_subtree_state) align 4 %2)
97+
%23 = icmp ult i8 %4, %22
98+
br i1 %23, label %24, label %25
99+
100+
24: ; preds = %20
101+
call void @gaz(i8 noundef signext %21)
102+
br label %25
103+
104+
25: ; preds = %20, %24
105+
ret i1 true
106+
}
107+
108+
declare dso_local zeroext i1 @bar(...) local_unnamed_addr
109+
110+
declare dso_local void @baz(ptr sret(%struct.maple_subtree_state) align 4, ...) local_unnamed_addr
111+
112+
declare dso_local void @gaz(i8 noundef signext) local_unnamed_addr

0 commit comments

Comments
 (0)