@@ -74,7 +74,7 @@ end_di_column: u32,
7474/// The value is an offset into the `Function` `code` from the beginning.
7575/// To perform the reloc, write 32-bit signed little-endian integer
7676/// which is a relative jump, based on the address following the reloc.
77- exitlude_jump_relocs : std.ArrayListUnmanaged(Mir.Inst.Index) = .empty,
77+ epilogue_relocs : std.ArrayListUnmanaged(Mir.Inst.Index) = .empty,
7878
7979reused_operands: std.StaticBitSet(Liveness.bpi - 1) = undefined,
8080const_tracking: ConstTrackingMap = .empty,
@@ -930,7 +930,7 @@ pub fn generate(
930930        function.blocks.deinit(gpa);
931931        function.inst_tracking.deinit(gpa);
932932        function.const_tracking.deinit(gpa);
933-         function.exitlude_jump_relocs .deinit(gpa);
933+         function.epilogue_relocs .deinit(gpa);
934934        function.mir_instructions.deinit(gpa);
935935        function.mir_extra.deinit(gpa);
936936        function.mir_table.deinit(gpa);
@@ -2250,24 +2250,24 @@ fn gen(self: *CodeGen) InnerError!void {
22502250
22512251        try self.genBody(self.air.getMainBody());
22522252
2253-         // TODO can single exitlude jump reloc be elided? What  if it is not at the end of the code? 
2254-         // Example: 
2255-         // pub fn main() void  {
2256-         //      maybeErr() catch return ;
2257-         //      unreachable ;
2258-         // } 
2259-         // Eliding the reloc will cause a miscompilation in this case. 
2260-         for ( self.exitlude_jump_relocs.items) |jmp_reloc| { 
2261-             self.mir_instructions.items(.data)[jmp_reloc].inst.inst = 
2262-                 @intCast( self.mir_instructions.len );
2263-         } 
2264- 
2265-         try self.asmPseudo(.pseudo_dbg_epilogue_begin_none );
2266-         const backpatch_stack_dealloc = try self.asmPlaceholder(); 
2267-         const backpatch_pop_callee_preserved_regs = try self.asmPlaceholder(); 
2268-         try self.asmRegister(.{ ._, .pop }, .rbp); 
2269-         try self.asmPseudoRegisterImmediate(.pseudo_cfi_def_cfa_ri_s, .rsp, .s(8)) ;
2270-         try self.asmOpOnly(.{ ._, .ret }) ;
2253+         const epilogue =  if (self.epilogue_relocs.items.len > 0) epilogue: { 
2254+             const epilogue_relocs_last_index = self.epilogue_relocs.items.len - 1; 
2255+             for (if (self.epilogue_relocs.items[epilogue_relocs_last_index] == self.mir_instructions.len - 1) epilogue_relocs:  {
2256+                 _ = self.mir_instructions.pop() ;
2257+                 break :epilogue_relocs self.epilogue_relocs.items[0..epilogue_relocs_last_index] ;
2258+             } else self.epilogue_relocs.items) |epilogue_reloc| self.performReloc(epilogue_reloc); 
2259+ 
2260+             try  self.asmPseudo(.pseudo_dbg_epilogue_begin_none); 
2261+             const backpatch_stack_dealloc = try  self.asmPlaceholder(); 
2262+             const backpatch_pop_callee_preserved_regs = try  self.asmPlaceholder( );
2263+             try self.asmRegister(.{ ._, .pop }, .rbp); 
2264+             try self.asmPseudoRegisterImmediate(.pseudo_cfi_def_cfa_ri_s, .rsp, .s(8)); 
2265+              try self.asmOpOnly(.{ ._, .ret } );
2266+             break :epilogue .{ 
2267+                 .backpatch_stack_dealloc = backpatch_stack_dealloc, 
2268+                 .backpatch_pop_callee_preserved_regs = backpatch_pop_callee_preserved_regs, 
2269+             } ;
2270+         } else null ;
22712271
22722272        const frame_layout = try self.computeFrameLayout(fn_info.cc);
22732273        const need_frame_align = frame_layout.stack_mask != std.math.maxInt(u32);
@@ -2337,8 +2337,8 @@ fn gen(self: *CodeGen) InnerError!void {
23372337                });
23382338            }
23392339        }
2340-         if (need_frame_align or need_stack_adjust) {
2341-             self.mir_instructions.set(backpatch_stack_dealloc, switch (-frame_layout.save_reg_list.size(self.target)) {
2340+         if (epilogue) |e| if ( need_frame_align or need_stack_adjust) {
2341+             self.mir_instructions.set(e. backpatch_stack_dealloc, switch (-frame_layout.save_reg_list.size(self.target)) {
23422342                0 => .{
23432343                    .tag = .mov,
23442344                    .ops = .rr,
@@ -2362,14 +2362,14 @@ fn gen(self: *CodeGen) InnerError!void {
23622362                    } },
23632363                },
23642364            });
2365-         }
2365+         }; 
23662366        if (need_save_reg) {
23672367            self.mir_instructions.set(backpatch_push_callee_preserved_regs, .{
23682368                .tag = .pseudo,
23692369                .ops = .pseudo_push_reg_list,
23702370                .data = .{ .reg_list = frame_layout.save_reg_list },
23712371            });
2372-             self.mir_instructions.set(backpatch_pop_callee_preserved_regs, .{
2372+             if (epilogue) |e|  self.mir_instructions.set(e. backpatch_pop_callee_preserved_regs, .{
23732373                .tag = .pseudo,
23742374                .ops = .pseudo_pop_reg_list,
23752375                .data = .{ .reg_list = frame_layout.save_reg_list },
@@ -10064,8 +10064,8 @@ fn genLazy(self: *CodeGen, lazy_sym: link.File.LazySymbol) InnerError!void {
1006410064            const ret_reg = param_regs[0];
1006510065            const enum_mcv = MCValue{ .register = param_regs[1] };
1006610066
10067-             const exitlude_jump_relocs  = try self.gpa.alloc(Mir.Inst.Index, enum_ty.enumFieldCount(zcu));
10068-             defer self.gpa.free(exitlude_jump_relocs );
10067+             const epilogue_relocs  = try self.gpa.alloc(Mir.Inst.Index, enum_ty.enumFieldCount(zcu));
10068+             defer self.gpa.free(epilogue_relocs );
1006910069
1007010070            const data_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp);
1007110071            const data_lock = self.register_manager.lockRegAssumeUnused(data_reg);
@@ -10074,7 +10074,7 @@ fn genLazy(self: *CodeGen, lazy_sym: link.File.LazySymbol) InnerError!void {
1007410074
1007510075            var data_off: i32 = 0;
1007610076            const tag_names = enum_ty.enumFields(zcu);
10077-             for (exitlude_jump_relocs , 0..) |*exitlude_jump_reloc , tag_index| {
10077+             for (epilogue_relocs , 0..) |*epilogue_reloc , tag_index| {
1007810078                const tag_name_len = tag_names.get(ip)[tag_index].length(ip);
1007910079                const tag_val = try pt.enumValueFieldIndex(enum_ty, @intCast(tag_index));
1008010080                const tag_mcv = try self.genTypedValue(tag_val);
@@ -10090,15 +10090,15 @@ fn genLazy(self: *CodeGen, lazy_sym: link.File.LazySymbol) InnerError!void {
1009010090                );
1009110091                try self.genSetMem(.{ .reg = ret_reg }, 8, .usize, .{ .immediate = tag_name_len }, .{});
1009210092
10093-                 exitlude_jump_reloc .* = try self.asmJmpReloc(undefined);
10093+                 epilogue_reloc .* = try self.asmJmpReloc(undefined);
1009410094                self.performReloc(skip_reloc);
1009510095
1009610096                data_off += @intCast(tag_name_len + 1);
1009710097            }
1009810098
1009910099            try self.asmOpOnly(.{ ._, .ud2 });
1010010100
10101-             for (exitlude_jump_relocs ) |reloc| self.performReloc(reloc);
10101+             for (epilogue_relocs ) |reloc| self.performReloc(reloc);
1010210102            try self.asmOpOnly(.{ ._, .ret });
1010310103        },
1010410104        else => return self.fail(
@@ -20171,7 +20171,7 @@ fn airRet(self: *CodeGen, inst: Air.Inst.Index, safety: bool) !void {
2017120171    // TODO optimization opportunity: figure out when we can emit this as a 2 byte instruction
2017220172    // which is available if the jump is 127 bytes or less forward.
2017320173    const jmp_reloc = try self.asmJmpReloc(undefined);
20174-     try self.exitlude_jump_relocs .append(self.gpa, jmp_reloc);
20174+     try self.epilogue_relocs .append(self.gpa, jmp_reloc);
2017520175}
2017620176
2017720177fn airRetLoad(self: *CodeGen, inst: Air.Inst.Index) !void {
@@ -20191,7 +20191,7 @@ fn airRetLoad(self: *CodeGen, inst: Air.Inst.Index) !void {
2019120191    // TODO optimization opportunity: figure out when we can emit this as a 2 byte instruction
2019220192    // which is available if the jump is 127 bytes or less forward.
2019320193    const jmp_reloc = try self.asmJmpReloc(undefined);
20194-     try self.exitlude_jump_relocs .append(self.gpa, jmp_reloc);
20194+     try self.epilogue_relocs .append(self.gpa, jmp_reloc);
2019520195}
2019620196
2019720197fn airCmp(self: *CodeGen, inst: Air.Inst.Index, op: std.math.CompareOperator) !void {
@@ -24543,7 +24543,7 @@ fn airMemset(self: *CodeGen, inst: Air.Inst.Index, safety: bool) !void {
2454324543                self.register_manager.lockRegAssumeUnused(dst_regs[0]),
2454424544                self.register_manager.lockRegAssumeUnused(dst_regs[1]),
2454524545            },
24546-             else => .{  null, null } ,
24546+             else => @splat( null) ,
2454724547        };
2454824548        for (dst_locks) |dst_lock| if (dst_lock) |lock| self.register_manager.unlockReg(lock);
2454924549
@@ -24682,7 +24682,7 @@ fn airMemcpy(self: *CodeGen, inst: Air.Inst.Index) !void {
2468224682            self.register_manager.lockRegAssumeUnused(dst_regs[0]),
2468324683            self.register_manager.lockReg(dst_regs[1]),
2468424684        },
24685-         else => .{  null, null } ,
24685+         else => @splat( null) ,
2468624686    };
2468724687    for (dst_locks) |dst_lock| if (dst_lock) |lock| self.register_manager.unlockReg(lock);
2468824688
@@ -24693,7 +24693,7 @@ fn airMemcpy(self: *CodeGen, inst: Air.Inst.Index) !void {
2469324693            self.register_manager.lockRegAssumeUnused(src_regs[0]),
2469424694            self.register_manager.lockRegAssumeUnused(src_regs[1]),
2469524695        },
24696-         else => .{  null, null } ,
24696+         else => @splat( null) ,
2469724697    };
2469824698    for (src_locks) |src_lock| if (src_lock) |lock| self.register_manager.unlockReg(lock);
2469924699
0 commit comments