Skip to content

Commit 390efcf

Browse files
committed
x86_64: remove pointless jump to epilogue
1 parent d1efa5e commit 390efcf

File tree

1 file changed

+34
-34
lines changed

1 file changed

+34
-34
lines changed

src/arch/x86_64/CodeGen.zig

Lines changed: 34 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ end_di_column: u32,
7474
/// The value is an offset into the `Function` `code` from the beginning.
7575
/// To perform the reloc, write 32-bit signed little-endian integer
7676
/// which is a relative jump, based on the address following the reloc.
77-
exitlude_jump_relocs: std.ArrayListUnmanaged(Mir.Inst.Index) = .empty,
77+
epilogue_relocs: std.ArrayListUnmanaged(Mir.Inst.Index) = .empty,
7878

7979
reused_operands: std.StaticBitSet(Liveness.bpi - 1) = undefined,
8080
const_tracking: ConstTrackingMap = .empty,
@@ -930,7 +930,7 @@ pub fn generate(
930930
function.blocks.deinit(gpa);
931931
function.inst_tracking.deinit(gpa);
932932
function.const_tracking.deinit(gpa);
933-
function.exitlude_jump_relocs.deinit(gpa);
933+
function.epilogue_relocs.deinit(gpa);
934934
function.mir_instructions.deinit(gpa);
935935
function.mir_extra.deinit(gpa);
936936
function.mir_table.deinit(gpa);
@@ -2250,24 +2250,24 @@ fn gen(self: *CodeGen) InnerError!void {
22502250

22512251
try self.genBody(self.air.getMainBody());
22522252

2253-
// TODO can single exitlude jump reloc be elided? What if it is not at the end of the code?
2254-
// Example:
2255-
// pub fn main() void {
2256-
// maybeErr() catch return;
2257-
// unreachable;
2258-
// }
2259-
// Eliding the reloc will cause a miscompilation in this case.
2260-
for (self.exitlude_jump_relocs.items) |jmp_reloc| {
2261-
self.mir_instructions.items(.data)[jmp_reloc].inst.inst =
2262-
@intCast(self.mir_instructions.len);
2263-
}
2264-
2265-
try self.asmPseudo(.pseudo_dbg_epilogue_begin_none);
2266-
const backpatch_stack_dealloc = try self.asmPlaceholder();
2267-
const backpatch_pop_callee_preserved_regs = try self.asmPlaceholder();
2268-
try self.asmRegister(.{ ._, .pop }, .rbp);
2269-
try self.asmPseudoRegisterImmediate(.pseudo_cfi_def_cfa_ri_s, .rsp, .s(8));
2270-
try self.asmOpOnly(.{ ._, .ret });
2253+
const epilogue = if (self.epilogue_relocs.items.len > 0) epilogue: {
2254+
const epilogue_relocs_last_index = self.epilogue_relocs.items.len - 1;
2255+
for (if (self.epilogue_relocs.items[epilogue_relocs_last_index] == self.mir_instructions.len - 1) epilogue_relocs: {
2256+
_ = self.mir_instructions.pop();
2257+
break :epilogue_relocs self.epilogue_relocs.items[0..epilogue_relocs_last_index];
2258+
} else self.epilogue_relocs.items) |epilogue_reloc| self.performReloc(epilogue_reloc);
2259+
2260+
try self.asmPseudo(.pseudo_dbg_epilogue_begin_none);
2261+
const backpatch_stack_dealloc = try self.asmPlaceholder();
2262+
const backpatch_pop_callee_preserved_regs = try self.asmPlaceholder();
2263+
try self.asmRegister(.{ ._, .pop }, .rbp);
2264+
try self.asmPseudoRegisterImmediate(.pseudo_cfi_def_cfa_ri_s, .rsp, .s(8));
2265+
try self.asmOpOnly(.{ ._, .ret });
2266+
break :epilogue .{
2267+
.backpatch_stack_dealloc = backpatch_stack_dealloc,
2268+
.backpatch_pop_callee_preserved_regs = backpatch_pop_callee_preserved_regs,
2269+
};
2270+
} else null;
22712271

22722272
const frame_layout = try self.computeFrameLayout(fn_info.cc);
22732273
const need_frame_align = frame_layout.stack_mask != std.math.maxInt(u32);
@@ -2337,8 +2337,8 @@ fn gen(self: *CodeGen) InnerError!void {
23372337
});
23382338
}
23392339
}
2340-
if (need_frame_align or need_stack_adjust) {
2341-
self.mir_instructions.set(backpatch_stack_dealloc, switch (-frame_layout.save_reg_list.size(self.target)) {
2340+
if (epilogue) |e| if (need_frame_align or need_stack_adjust) {
2341+
self.mir_instructions.set(e.backpatch_stack_dealloc, switch (-frame_layout.save_reg_list.size(self.target)) {
23422342
0 => .{
23432343
.tag = .mov,
23442344
.ops = .rr,
@@ -2362,14 +2362,14 @@ fn gen(self: *CodeGen) InnerError!void {
23622362
} },
23632363
},
23642364
});
2365-
}
2365+
};
23662366
if (need_save_reg) {
23672367
self.mir_instructions.set(backpatch_push_callee_preserved_regs, .{
23682368
.tag = .pseudo,
23692369
.ops = .pseudo_push_reg_list,
23702370
.data = .{ .reg_list = frame_layout.save_reg_list },
23712371
});
2372-
self.mir_instructions.set(backpatch_pop_callee_preserved_regs, .{
2372+
if (epilogue) |e| self.mir_instructions.set(e.backpatch_pop_callee_preserved_regs, .{
23732373
.tag = .pseudo,
23742374
.ops = .pseudo_pop_reg_list,
23752375
.data = .{ .reg_list = frame_layout.save_reg_list },
@@ -10064,8 +10064,8 @@ fn genLazy(self: *CodeGen, lazy_sym: link.File.LazySymbol) InnerError!void {
1006410064
const ret_reg = param_regs[0];
1006510065
const enum_mcv = MCValue{ .register = param_regs[1] };
1006610066

10067-
const exitlude_jump_relocs = try self.gpa.alloc(Mir.Inst.Index, enum_ty.enumFieldCount(zcu));
10068-
defer self.gpa.free(exitlude_jump_relocs);
10067+
const epilogue_relocs = try self.gpa.alloc(Mir.Inst.Index, enum_ty.enumFieldCount(zcu));
10068+
defer self.gpa.free(epilogue_relocs);
1006910069

1007010070
const data_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp);
1007110071
const data_lock = self.register_manager.lockRegAssumeUnused(data_reg);
@@ -10074,7 +10074,7 @@ fn genLazy(self: *CodeGen, lazy_sym: link.File.LazySymbol) InnerError!void {
1007410074

1007510075
var data_off: i32 = 0;
1007610076
const tag_names = enum_ty.enumFields(zcu);
10077-
for (exitlude_jump_relocs, 0..) |*exitlude_jump_reloc, tag_index| {
10077+
for (epilogue_relocs, 0..) |*epilogue_reloc, tag_index| {
1007810078
const tag_name_len = tag_names.get(ip)[tag_index].length(ip);
1007910079
const tag_val = try pt.enumValueFieldIndex(enum_ty, @intCast(tag_index));
1008010080
const tag_mcv = try self.genTypedValue(tag_val);
@@ -10090,15 +10090,15 @@ fn genLazy(self: *CodeGen, lazy_sym: link.File.LazySymbol) InnerError!void {
1009010090
);
1009110091
try self.genSetMem(.{ .reg = ret_reg }, 8, .usize, .{ .immediate = tag_name_len }, .{});
1009210092

10093-
exitlude_jump_reloc.* = try self.asmJmpReloc(undefined);
10093+
epilogue_reloc.* = try self.asmJmpReloc(undefined);
1009410094
self.performReloc(skip_reloc);
1009510095

1009610096
data_off += @intCast(tag_name_len + 1);
1009710097
}
1009810098

1009910099
try self.asmOpOnly(.{ ._, .ud2 });
1010010100

10101-
for (exitlude_jump_relocs) |reloc| self.performReloc(reloc);
10101+
for (epilogue_relocs) |reloc| self.performReloc(reloc);
1010210102
try self.asmOpOnly(.{ ._, .ret });
1010310103
},
1010410104
else => return self.fail(
@@ -20171,7 +20171,7 @@ fn airRet(self: *CodeGen, inst: Air.Inst.Index, safety: bool) !void {
2017120171
// TODO optimization opportunity: figure out when we can emit this as a 2 byte instruction
2017220172
// which is available if the jump is 127 bytes or less forward.
2017320173
const jmp_reloc = try self.asmJmpReloc(undefined);
20174-
try self.exitlude_jump_relocs.append(self.gpa, jmp_reloc);
20174+
try self.epilogue_relocs.append(self.gpa, jmp_reloc);
2017520175
}
2017620176

2017720177
fn airRetLoad(self: *CodeGen, inst: Air.Inst.Index) !void {
@@ -20191,7 +20191,7 @@ fn airRetLoad(self: *CodeGen, inst: Air.Inst.Index) !void {
2019120191
// TODO optimization opportunity: figure out when we can emit this as a 2 byte instruction
2019220192
// which is available if the jump is 127 bytes or less forward.
2019320193
const jmp_reloc = try self.asmJmpReloc(undefined);
20194-
try self.exitlude_jump_relocs.append(self.gpa, jmp_reloc);
20194+
try self.epilogue_relocs.append(self.gpa, jmp_reloc);
2019520195
}
2019620196

2019720197
fn airCmp(self: *CodeGen, inst: Air.Inst.Index, op: std.math.CompareOperator) !void {
@@ -24543,7 +24543,7 @@ fn airMemset(self: *CodeGen, inst: Air.Inst.Index, safety: bool) !void {
2454324543
self.register_manager.lockRegAssumeUnused(dst_regs[0]),
2454424544
self.register_manager.lockRegAssumeUnused(dst_regs[1]),
2454524545
},
24546-
else => .{ null, null },
24546+
else => @splat(null),
2454724547
};
2454824548
for (dst_locks) |dst_lock| if (dst_lock) |lock| self.register_manager.unlockReg(lock);
2454924549

@@ -24682,7 +24682,7 @@ fn airMemcpy(self: *CodeGen, inst: Air.Inst.Index) !void {
2468224682
self.register_manager.lockRegAssumeUnused(dst_regs[0]),
2468324683
self.register_manager.lockReg(dst_regs[1]),
2468424684
},
24685-
else => .{ null, null },
24685+
else => @splat(null),
2468624686
};
2468724687
for (dst_locks) |dst_lock| if (dst_lock) |lock| self.register_manager.unlockReg(lock);
2468824688

@@ -24693,7 +24693,7 @@ fn airMemcpy(self: *CodeGen, inst: Air.Inst.Index) !void {
2469324693
self.register_manager.lockRegAssumeUnused(src_regs[0]),
2469424694
self.register_manager.lockRegAssumeUnused(src_regs[1]),
2469524695
},
24696-
else => .{ null, null },
24696+
else => @splat(null),
2469724697
};
2469824698
for (src_locks) |src_lock| if (src_lock) |lock| self.register_manager.unlockReg(lock);
2469924699

0 commit comments

Comments
 (0)