From 9a7d28fe58c0dbd34b0e41fe0e4fed6d57c7ad45 Mon Sep 17 00:00:00 2001 From: Matthew Lugg Date: Thu, 30 Oct 2025 09:11:36 +0000 Subject: [PATCH 1/8] Legalize: rewrite several legalizations The main goal of this change was to avoid emitting the `vector_store_elem` AIR tag, because this represents an operation which Zig no longer supports (and hence Sema no longer emits) as of 010d9a6 (because runtime vector indices are now forbidden). Backends should not need to lower this operation, so I rewrote the legalizations which emitted it (scalarizations of vector operations) to instead unroll the loop and hence emit comptime-known vector indices. In doing this, I actually reworked those legalizations to use a different strategy; instead of using an `alloc` and storing to individual vector elements, the vector is constructed by-val, for instance by performing the scalar operation on all elements and passing them to an `aggregate_init`. This is vastly simpler to implement in Legalize, conceptually simpler, and doesn't severely pessimise memory usage, because a non-optimizing backend will store the full vector on the stack either way. Given the above rationale, I also ended up reworking several other legalizations to use simpler lowerings. The legalizations in question were bitcast scalarization, `struct_field_val` of `packed struct`s (where we just bitcast to an integer and perform the appropriate shift/trunc sequence), and `aggregate_init` of a `packed struct` (also implemented in terms of integer bitwise operations with bitcasts to and from the actual types). This hugely simplified some parts of `Legalize`. So, `Legalize` is now much simpler, and the `vector_store_elem` instruction is no longer emitted by any part of the compiler so can be removed in a future commit. --- src/Air/Legalize.zig | 1863 +++++++++++------------------------------- 1 file changed, 480 insertions(+), 1383 deletions(-) diff --git a/src/Air/Legalize.zig b/src/Air/Legalize.zig index 46c96b4472c3..a26ed8996473 100644 --- a/src/Air/Legalize.zig +++ b/src/Air/Legalize.zig @@ -14,7 +14,7 @@ features: if (switch (dev.env) { return comptime bootstrap_features.contains(feature); } /// `inline` to propagate comptime-known result. - fn hasAny(_: @This(), comptime features: []const Feature) bool { + inline fn hasAny(_: @This(), comptime features: []const Feature) bool { return comptime !bootstrap_features.intersectWith(.initMany(features)).eql(.initEmpty()); } } else struct { @@ -154,9 +154,9 @@ pub const Feature = enum { /// Currently assumes little endian and a specific integer layout where the lsb of every integer is the lsb of the /// first byte of memory until bit pointers know their backing type. expand_packed_store, - /// Replace `struct_field_val` of a packed field with a `store` and packed `load`. + /// Replace `struct_field_val` of a packed field with a `bitcast` to integer, `shr`, `trunc`, and `bitcast` to field type. expand_packed_struct_field_val, - /// Replace `aggregate_init` of a packed aggregate with a series a packed `store`s followed by a `load`. + /// Replace `aggregate_init` of a packed struct with a sequence of `shl_exact`, `bitcast`, `intcast`, and `bit_or`. expand_packed_aggregate_init, fn scalarize(tag: Air.Inst.Tag) Feature { @@ -409,40 +409,9 @@ fn legalizeBody(l: *Legalize, body_start: usize, body_len: usize) Error!void { if (ty_op.ty.toType().isVector(zcu)) continue :inst try l.scalarize(inst, .ty_op); }, .bitcast => if (l.features.has(.scalarize_bitcast)) { - const ty_op = l.air_instructions.items(.data)[@intFromEnum(inst)].ty_op; - - const to_ty = ty_op.ty.toType(); - const to_ty_tag = to_ty.zigTypeTag(zcu); - const to_ty_legal = legal: switch (to_ty_tag) { - else => true, - .array, .vector => { - if (to_ty.arrayLen(zcu) == 1) break :legal true; - const to_elem_ty = to_ty.childType(zcu); - break :legal to_elem_ty.bitSize(zcu) == 8 * to_elem_ty.abiSize(zcu); - }, - }; - - const from_ty = l.typeOf(ty_op.operand); - const from_ty_legal = legal: switch (from_ty.zigTypeTag(zcu)) { - else => true, - .array, .vector => { - if (from_ty.arrayLen(zcu) == 1) break :legal true; - const from_elem_ty = from_ty.childType(zcu); - break :legal from_elem_ty.bitSize(zcu) == 8 * from_elem_ty.abiSize(zcu); - }, - }; - - if (!to_ty_legal and !from_ty_legal and to_ty.arrayLen(zcu) == from_ty.arrayLen(zcu)) switch (to_ty_tag) { - else => unreachable, - .array => continue :inst l.replaceInst(inst, .block, try l.scalarizeBitcastToArrayBlockPayload(inst)), - .vector => continue :inst try l.scalarize(inst, .bitcast), - }; - if (!to_ty_legal) switch (to_ty_tag) { - else => unreachable, - .array => continue :inst l.replaceInst(inst, .block, try l.scalarizeBitcastResultArrayBlockPayload(inst)), - .vector => continue :inst l.replaceInst(inst, .block, try l.scalarizeBitcastResultVectorBlockPayload(inst)), - }; - if (!from_ty_legal) continue :inst l.replaceInst(inst, .block, try l.scalarizeBitcastOperandBlockPayload(inst)); + if (try l.scalarizeBitcastBlockPayload(inst)) |payload| { + continue :inst l.replaceInst(inst, .block, payload); + } }, .intcast_safe => if (l.features.has(.expand_intcast_safe)) { assert(!l.features.has(.scalarize_intcast_safe)); // it doesn't make sense to do both @@ -570,13 +539,17 @@ fn legalizeBody(l: *Legalize, body_start: usize, body_len: usize) Error!void { .load => if (l.features.has(.expand_packed_load)) { const ty_op = l.air_instructions.items(.data)[@intFromEnum(inst)].ty_op; const ptr_info = l.typeOf(ty_op.operand).ptrInfo(zcu); - if (ptr_info.packed_offset.host_size > 0 and ptr_info.flags.vector_index == .none) continue :inst l.replaceInst(inst, .block, try l.packedLoadBlockPayload(inst)); + if (ptr_info.packed_offset.host_size > 0 and ptr_info.flags.vector_index == .none) { + continue :inst l.replaceInst(inst, .block, try l.packedLoadBlockPayload(inst)); + } }, .ret, .ret_safe, .ret_load => {}, .store, .store_safe => if (l.features.has(.expand_packed_store)) { const bin_op = l.air_instructions.items(.data)[@intFromEnum(inst)].bin_op; const ptr_info = l.typeOf(bin_op.lhs).ptrInfo(zcu); - if (ptr_info.packed_offset.host_size > 0 and ptr_info.flags.vector_index == .none) continue :inst l.replaceInst(inst, .block, try l.packedStoreBlockPayload(inst)); + if (ptr_info.packed_offset.host_size > 0 and ptr_info.flags.vector_index == .none) { + continue :inst l.replaceInst(inst, .block, try l.packedStoreBlockPayload(inst)); + } }, .unreach, .optional_payload, @@ -624,7 +597,7 @@ fn legalizeBody(l: *Legalize, body_start: usize, body_len: usize) Error!void { switch (vector_ty.vectorLen(zcu)) { 0 => unreachable, 1 => continue :inst l.replaceInst(inst, .bitcast, .{ .ty_op = .{ - .ty = Air.internedToRef(vector_ty.childType(zcu).toIntern()), + .ty = .fromType(vector_ty.childType(zcu)), .operand = reduce.operand, } }), else => {}, @@ -666,9 +639,18 @@ fn legalizeBody(l: *Legalize, body_start: usize, body_len: usize) Error!void { const agg_ty = ty_pl.ty.toType(); switch (agg_ty.zigTypeTag(zcu)) { else => {}, - .@"struct", .@"union" => switch (agg_ty.containerLayout(zcu)) { + .@"union" => unreachable, + .@"struct" => switch (agg_ty.containerLayout(zcu)) { .auto, .@"extern" => {}, - .@"packed" => continue :inst l.replaceInst(inst, .block, try l.packedAggregateInitBlockPayload(inst)), + .@"packed" => switch (agg_ty.structFieldCount(zcu)) { + 0 => unreachable, + // An `aggregate_init` of a packed struct with 1 field is just a fancy bitcast. + 1 => continue :inst l.replaceInst(inst, .bitcast, .{ .ty_op = .{ + .ty = .fromType(agg_ty), + .operand = @enumFromInt(l.air_extra.items[ty_pl.payload]), + } }), + else => continue :inst l.replaceInst(inst, .block, try l.packedAggregateInitBlockPayload(inst)), + }, }, } }, @@ -685,7 +667,6 @@ fn legalizeBody(l: *Legalize, body_start: usize, body_len: usize) Error!void { .set_err_return_trace, .addrspace_cast, .save_err_return_trace_index, - .vector_store_elem, .runtime_nav_ptr, .c_va_arg, .c_va_copy, @@ -699,7 +680,7 @@ fn legalizeBody(l: *Legalize, body_start: usize, body_len: usize) Error!void { } } -const ScalarizeForm = enum { un_op, ty_op, bin_op, pl_op_bin, bitcast, cmp_vector, shuffle_one, shuffle_two, select }; +const ScalarizeForm = enum { un_op, ty_op, bin_op, pl_op_bin, cmp_vector, shuffle_one, shuffle_two, select }; /// inline to propagate comptime-known `replaceInst` result. inline fn scalarize(l: *Legalize, orig_inst: Air.Inst.Index, comptime form: ScalarizeForm) Error!Air.Inst.Tag { return l.replaceInst(orig_inst, .block, try l.scalarizeBlockPayload(orig_inst, form)); @@ -707,1160 +688,420 @@ inline fn scalarize(l: *Legalize, orig_inst: Air.Inst.Index, comptime form: Scal fn scalarizeBlockPayload(l: *Legalize, orig_inst: Air.Inst.Index, comptime form: ScalarizeForm) Error!Air.Inst.Data { const pt = l.pt; const zcu = pt.zcu; + const gpa = zcu.gpa; const orig = l.air_instructions.get(@intFromEnum(orig_inst)); const res_ty = l.typeOfIndex(orig_inst); const res_len = res_ty.vectorLen(zcu); - const extra_insts = switch (form) { - .un_op, .ty_op, .bitcast => 1, - .bin_op, .cmp_vector => 2, - .pl_op_bin => 3, - .shuffle_one, .shuffle_two => 13, - .select => 6, + const inst_per_elem = switch (form) { + .un_op, .ty_op => 2, + .bin_op, .cmp_vector => 3, + .pl_op_bin => 4, + .shuffle_one, .shuffle_two => 1, + .select => 7, }; - var inst_buf: [5 + extra_insts + 9]Air.Inst.Index = undefined; - try l.air_instructions.ensureUnusedCapacity(zcu.gpa, inst_buf.len); - var res_block: Block = .init(&inst_buf); - { - const res_alloc_inst = res_block.add(l, .{ - .tag = .alloc, - .data = .{ .ty = try pt.singleMutPtrType(res_ty) }, - }); - const index_alloc_inst = res_block.add(l, .{ - .tag = .alloc, - .data = .{ .ty = .ptr_usize }, - }); - _ = res_block.add(l, .{ - .tag = .store, - .data = .{ .bin_op = .{ - .lhs = index_alloc_inst.toRef(), - .rhs = .zero_usize, - } }, - }); + var sfba_state = std.heap.stackFallback(@sizeOf([inst_per_elem * 32 + 2]Air.Inst.Index) + @sizeOf([32]Air.Inst.Ref), gpa); + const sfba = sfba_state.get(); - var loop: Loop = .init(l, &res_block); - loop.block = .init(res_block.stealRemainingCapacity()); - { - const cur_index_inst = loop.block.add(l, .{ - .tag = .load, - .data = .{ .ty_op = .{ - .ty = .usize_type, - .operand = index_alloc_inst.toRef(), - } }, - }); - _ = loop.block.add(l, .{ - .tag = .vector_store_elem, - .data = .{ .vector_store_elem = .{ - .vector_ptr = res_alloc_inst.toRef(), - .payload = try l.addExtra(Air.Bin, .{ - .lhs = cur_index_inst.toRef(), - .rhs = res_elem: switch (form) { - .un_op => loop.block.add(l, .{ - .tag = orig.tag, - .data = .{ .un_op = loop.block.add(l, .{ - .tag = .array_elem_val, - .data = .{ .bin_op = .{ - .lhs = orig.data.un_op, - .rhs = cur_index_inst.toRef(), - } }, - }).toRef() }, - }).toRef(), - .ty_op => loop.block.add(l, .{ - .tag = orig.tag, - .data = .{ .ty_op = .{ - .ty = Air.internedToRef(res_ty.childType(zcu).toIntern()), - .operand = loop.block.add(l, .{ - .tag = .array_elem_val, - .data = .{ .bin_op = .{ - .lhs = orig.data.ty_op.operand, - .rhs = cur_index_inst.toRef(), - } }, - }).toRef(), - } }, - }).toRef(), - .bin_op => loop.block.add(l, .{ - .tag = orig.tag, - .data = .{ .bin_op = .{ - .lhs = loop.block.add(l, .{ - .tag = .array_elem_val, - .data = .{ .bin_op = .{ - .lhs = orig.data.bin_op.lhs, - .rhs = cur_index_inst.toRef(), - } }, - }).toRef(), - .rhs = loop.block.add(l, .{ - .tag = .array_elem_val, - .data = .{ .bin_op = .{ - .lhs = orig.data.bin_op.rhs, - .rhs = cur_index_inst.toRef(), - } }, - }).toRef(), - } }, - }).toRef(), - .pl_op_bin => { - const extra = l.extraData(Air.Bin, orig.data.pl_op.payload).data; - break :res_elem loop.block.add(l, .{ - .tag = orig.tag, - .data = .{ .pl_op = .{ - .payload = try l.addExtra(Air.Bin, .{ - .lhs = loop.block.add(l, .{ - .tag = .array_elem_val, - .data = .{ .bin_op = .{ - .lhs = extra.lhs, - .rhs = cur_index_inst.toRef(), - } }, - }).toRef(), - .rhs = loop.block.add(l, .{ - .tag = .array_elem_val, - .data = .{ .bin_op = .{ - .lhs = extra.rhs, - .rhs = cur_index_inst.toRef(), - } }, - }).toRef(), - }), - .operand = loop.block.add(l, .{ - .tag = .array_elem_val, - .data = .{ .bin_op = .{ - .lhs = orig.data.pl_op.operand, - .rhs = cur_index_inst.toRef(), - } }, - }).toRef(), - } }, - }).toRef(); - }, - .bitcast => loop.block.addBitCast(l, res_ty.childType(zcu), loop.block.add(l, .{ - .tag = .array_elem_val, - .data = .{ .bin_op = .{ - .lhs = orig.data.ty_op.operand, - .rhs = cur_index_inst.toRef(), - } }, - }).toRef()), - .cmp_vector => { - const extra = l.extraData(Air.VectorCmp, orig.data.ty_pl.payload).data; - break :res_elem (try loop.block.addCmp( - l, - extra.compareOperator(), - loop.block.add(l, .{ - .tag = .array_elem_val, - .data = .{ .bin_op = .{ - .lhs = extra.lhs, - .rhs = cur_index_inst.toRef(), - } }, - }).toRef(), - loop.block.add(l, .{ - .tag = .array_elem_val, - .data = .{ .bin_op = .{ - .lhs = extra.rhs, - .rhs = cur_index_inst.toRef(), - } }, - }).toRef(), - .{ .optimized = switch (orig.tag) { - else => unreachable, - .cmp_vector => false, - .cmp_vector_optimized => true, - } }, - )).toRef(); - }, - .shuffle_one, .shuffle_two => { - const ip = &zcu.intern_pool; - const unwrapped = switch (form) { - else => comptime unreachable, - .shuffle_one => l.getTmpAir().unwrapShuffleOne(zcu, orig_inst), - .shuffle_two => l.getTmpAir().unwrapShuffleTwo(zcu, orig_inst), - }; - const operand_a = switch (form) { - else => comptime unreachable, - .shuffle_one => unwrapped.operand, - .shuffle_two => unwrapped.operand_a, - }; - const operand_a_len = l.typeOf(operand_a).vectorLen(zcu); - const elem_ty = res_ty.childType(zcu); - var res_elem: Result = .init(l, elem_ty, &loop.block); - res_elem.block = .init(loop.block.stealCapacity(extra_insts)); - { - const ExpectedContents = extern struct { - mask_elems: [128]InternPool.Index, - ct_elems: switch (form) { - else => unreachable, - .shuffle_one => extern struct { - keys: [152]InternPool.Index, - header: u8 align(@alignOf(u32)), - index: [256][2]u8, - }, - .shuffle_two => void, - }, - }; - var stack align(@max(@alignOf(ExpectedContents), @alignOf(std.heap.StackFallbackAllocator(0)))) = - std.heap.stackFallback(@sizeOf(ExpectedContents), zcu.gpa); - const gpa = stack.get(); + // Plus 2 extra instructions for `aggregate_init` and `br`. + const inst_buf = try sfba.alloc(Air.Inst.Index, inst_per_elem * res_len + 2); + defer sfba.free(inst_buf); - const mask_elems = try gpa.alloc(InternPool.Index, res_len); - defer gpa.free(mask_elems); - - var ct_elems: switch (form) { - else => unreachable, - .shuffle_one => std.AutoArrayHashMapUnmanaged(InternPool.Index, void), - .shuffle_two => struct { - const empty: @This() = .{}; - inline fn deinit(_: @This(), _: std.mem.Allocator) void {} - inline fn ensureTotalCapacity(_: @This(), _: std.mem.Allocator, _: usize) error{}!void {} - }, - } = .empty; - defer ct_elems.deinit(gpa); - try ct_elems.ensureTotalCapacity(gpa, res_len); - - const mask_elem_ty = try pt.intType(.signed, 1 + Type.smallestUnsignedBits(@max(operand_a_len, switch (form) { - else => comptime unreachable, - .shuffle_one => res_len, - .shuffle_two => l.typeOf(unwrapped.operand_b).vectorLen(zcu), - }))); - for (mask_elems, unwrapped.mask) |*mask_elem_val, mask_elem| mask_elem_val.* = (try pt.intValue(mask_elem_ty, switch (form) { - else => comptime unreachable, - .shuffle_one => switch (mask_elem.unwrap()) { - .elem => |index| index, - .value => |elem_val| if (ip.isUndef(elem_val)) - operand_a_len - else - ~@as(i33, @intCast((ct_elems.getOrPutAssumeCapacity(elem_val)).index)), - }, - .shuffle_two => switch (mask_elem.unwrap()) { - .a_elem => |a_index| a_index, - .b_elem => |b_index| ~@as(i33, b_index), - .undef => operand_a_len, - }, - })).toIntern(); - const mask_ty = try pt.arrayType(.{ - .len = res_len, - .child = mask_elem_ty.toIntern(), - }); - const mask_elem_inst = res_elem.block.add(l, .{ - .tag = .ptr_elem_val, - .data = .{ .bin_op = .{ - .lhs = Air.internedToRef(try pt.intern(.{ .ptr = .{ - .ty = (try pt.manyConstPtrType(mask_elem_ty)).toIntern(), - .base_addr = .{ .uav = .{ - .val = (try pt.aggregateValue(mask_ty, mask_elems)).toIntern(), - .orig_ty = (try pt.singleConstPtrType(mask_ty)).toIntern(), - } }, - .byte_offset = 0, - } })), - .rhs = cur_index_inst.toRef(), - } }, - }); - var def_cond_br: CondBr = .init(l, (try res_elem.block.addCmp( - l, - .lt, - mask_elem_inst.toRef(), - try pt.intRef(mask_elem_ty, operand_a_len), - .{}, - )).toRef(), &res_elem.block, .{}); - def_cond_br.then_block = .init(res_elem.block.stealRemainingCapacity()); - { - const operand_b_used = switch (form) { - else => comptime unreachable, - .shuffle_one => ct_elems.count() > 0, - .shuffle_two => true, - }; - var operand_cond_br: CondBr = undefined; - operand_cond_br.then_block = if (operand_b_used) then_block: { - operand_cond_br = .init(l, (try def_cond_br.then_block.addCmp( - l, - .gte, - mask_elem_inst.toRef(), - try pt.intRef(mask_elem_ty, 0), - .{}, - )).toRef(), &def_cond_br.then_block, .{}); - break :then_block .init(def_cond_br.then_block.stealRemainingCapacity()); - } else def_cond_br.then_block; - _ = operand_cond_br.then_block.add(l, .{ - .tag = .br, - .data = .{ .br = .{ - .block_inst = res_elem.inst, - .operand = operand_cond_br.then_block.add(l, .{ - .tag = .array_elem_val, - .data = .{ .bin_op = .{ - .lhs = operand_a, - .rhs = operand_cond_br.then_block.add(l, .{ - .tag = .intcast, - .data = .{ .ty_op = .{ - .ty = .usize_type, - .operand = mask_elem_inst.toRef(), - } }, - }).toRef(), - } }, - }).toRef(), - } }, - }); - if (operand_b_used) { - operand_cond_br.else_block = .init(operand_cond_br.then_block.stealRemainingCapacity()); - _ = operand_cond_br.else_block.add(l, .{ - .tag = .br, - .data = .{ .br = .{ - .block_inst = res_elem.inst, - .operand = if (switch (form) { - else => comptime unreachable, - .shuffle_one => ct_elems.count() > 1, - .shuffle_two => true, - }) operand_cond_br.else_block.add(l, .{ - .tag = switch (form) { - else => comptime unreachable, - .shuffle_one => .ptr_elem_val, - .shuffle_two => .array_elem_val, - }, - .data = .{ .bin_op = .{ - .lhs = operand_b: switch (form) { - else => comptime unreachable, - .shuffle_one => { - const ct_elems_ty = try pt.arrayType(.{ - .len = ct_elems.count(), - .child = elem_ty.toIntern(), - }); - break :operand_b Air.internedToRef(try pt.intern(.{ .ptr = .{ - .ty = (try pt.manyConstPtrType(elem_ty)).toIntern(), - .base_addr = .{ .uav = .{ - .val = (try pt.aggregateValue(ct_elems_ty, ct_elems.keys())).toIntern(), - .orig_ty = (try pt.singleConstPtrType(ct_elems_ty)).toIntern(), - } }, - .byte_offset = 0, - } })); - }, - .shuffle_two => unwrapped.operand_b, - }, - .rhs = operand_cond_br.else_block.add(l, .{ - .tag = .intcast, - .data = .{ .ty_op = .{ - .ty = .usize_type, - .operand = operand_cond_br.else_block.add(l, .{ - .tag = .not, - .data = .{ .ty_op = .{ - .ty = Air.internedToRef(mask_elem_ty.toIntern()), - .operand = mask_elem_inst.toRef(), - } }, - }).toRef(), - } }, - }).toRef(), - } }, - }).toRef() else res_elem_br: { - _ = operand_cond_br.else_block.stealCapacity(3); - break :res_elem_br Air.internedToRef(ct_elems.keys()[0]); - }, - } }, - }); - def_cond_br.else_block = .init(operand_cond_br.else_block.stealRemainingCapacity()); - try operand_cond_br.finish(l); - } else { - def_cond_br.then_block = operand_cond_br.then_block; - _ = def_cond_br.then_block.stealCapacity(6); - def_cond_br.else_block = .init(def_cond_br.then_block.stealRemainingCapacity()); - } - } - _ = def_cond_br.else_block.add(l, .{ - .tag = .br, - .data = .{ .br = .{ - .block_inst = res_elem.inst, - .operand = try pt.undefRef(elem_ty), - } }, - }); - try def_cond_br.finish(l); - } - try res_elem.finish(l); - break :res_elem res_elem.inst.toRef(); - }, - .select => { - const extra = l.extraData(Air.Bin, orig.data.pl_op.payload).data; - var res_elem: Result = .init(l, l.typeOf(extra.lhs).childType(zcu), &loop.block); - res_elem.block = .init(loop.block.stealCapacity(extra_insts)); - { - var select_cond_br: CondBr = .init(l, res_elem.block.add(l, .{ - .tag = .array_elem_val, - .data = .{ .bin_op = .{ - .lhs = orig.data.pl_op.operand, - .rhs = cur_index_inst.toRef(), - } }, - }).toRef(), &res_elem.block, .{}); - select_cond_br.then_block = .init(res_elem.block.stealRemainingCapacity()); - _ = select_cond_br.then_block.add(l, .{ - .tag = .br, - .data = .{ .br = .{ - .block_inst = res_elem.inst, - .operand = select_cond_br.then_block.add(l, .{ - .tag = .array_elem_val, - .data = .{ .bin_op = .{ - .lhs = extra.lhs, - .rhs = cur_index_inst.toRef(), - } }, - }).toRef(), - } }, - }); - select_cond_br.else_block = .init(select_cond_br.then_block.stealRemainingCapacity()); - _ = select_cond_br.else_block.add(l, .{ - .tag = .br, - .data = .{ .br = .{ - .block_inst = res_elem.inst, - .operand = select_cond_br.else_block.add(l, .{ - .tag = .array_elem_val, - .data = .{ .bin_op = .{ - .lhs = extra.rhs, - .rhs = cur_index_inst.toRef(), - } }, - }).toRef(), - } }, - }); - try select_cond_br.finish(l); - } - try res_elem.finish(l); - break :res_elem res_elem.inst.toRef(); - }, - }, - }), - } }, - }); + var main_block: Block = .init(inst_buf); + try l.air_instructions.ensureUnusedCapacity(gpa, inst_buf.len); - var loop_cond_br: CondBr = .init(l, (try loop.block.addCmp( - l, - .lt, - cur_index_inst.toRef(), - try pt.intRef(.usize, res_len - 1), - .{}, - )).toRef(), &loop.block, .{}); - loop_cond_br.then_block = .init(loop.block.stealRemainingCapacity()); - { - _ = loop_cond_br.then_block.add(l, .{ - .tag = .store, - .data = .{ .bin_op = .{ - .lhs = index_alloc_inst.toRef(), - .rhs = loop_cond_br.then_block.add(l, .{ - .tag = .add, - .data = .{ .bin_op = .{ - .lhs = cur_index_inst.toRef(), - .rhs = .one_usize, - } }, - }).toRef(), + const elem_buf = try sfba.alloc(Air.Inst.Ref, res_len); + defer sfba.free(elem_buf); + + switch (form) { + .un_op => { + const orig_operand = orig.data.un_op; + const un_op_tag = orig.tag; + for (elem_buf, 0..) |*elem, elem_idx| { + const elem_idx_ref: Air.Inst.Ref = .fromValue(try pt.intValue(.usize, elem_idx)); + const operand = main_block.addBinOp(l, .array_elem_val, orig_operand, elem_idx_ref).toRef(); + elem.* = main_block.addUnOp(l, un_op_tag, operand).toRef(); + } + }, + .ty_op => { + const orig_operand = orig.data.ty_op.operand; + const orig_ty: Type = .fromInterned(orig.data.ty_op.ty.toInterned().?); + const scalar_ty = orig_ty.childType(zcu); + const ty_op_tag = orig.tag; + for (elem_buf, 0..) |*elem, elem_idx| { + const elem_idx_ref: Air.Inst.Ref = .fromValue(try pt.intValue(.usize, elem_idx)); + const operand = main_block.addBinOp(l, .array_elem_val, orig_operand, elem_idx_ref).toRef(); + elem.* = main_block.addTyOp(l, ty_op_tag, scalar_ty, operand).toRef(); + } + }, + .bin_op => { + const orig_operands = orig.data.bin_op; + const bin_op_tag = orig.tag; + for (elem_buf, 0..) |*elem, elem_idx| { + const elem_idx_ref: Air.Inst.Ref = .fromValue(try pt.intValue(.usize, elem_idx)); + const lhs = main_block.addBinOp(l, .array_elem_val, orig_operands.lhs, elem_idx_ref).toRef(); + const rhs = main_block.addBinOp(l, .array_elem_val, orig_operands.rhs, elem_idx_ref).toRef(); + elem.* = main_block.addBinOp(l, bin_op_tag, lhs, rhs).toRef(); + } + }, + .pl_op_bin => { + const orig_operand = orig.data.pl_op.operand; + const orig_payload = l.extraData(Air.Bin, orig.data.pl_op.payload).data; + const pl_op_tag = orig.tag; + for (elem_buf, 0..) |*elem, elem_idx| { + const elem_idx_ref: Air.Inst.Ref = .fromValue(try pt.intValue(.usize, elem_idx)); + const operand = main_block.addBinOp(l, .array_elem_val, orig_operand, elem_idx_ref).toRef(); + const lhs = main_block.addBinOp(l, .array_elem_val, orig_payload.lhs, elem_idx_ref).toRef(); + const rhs = main_block.addBinOp(l, .array_elem_val, orig_payload.rhs, elem_idx_ref).toRef(); + elem.* = main_block.add(l, .{ + .tag = pl_op_tag, + .data = .{ .pl_op = .{ + .payload = try l.addExtra(Air.Bin, .{ .lhs = lhs, .rhs = rhs }), + .operand = operand, } }, - }); - _ = loop_cond_br.then_block.add(l, .{ - .tag = .repeat, - .data = .{ .repeat = .{ .loop_inst = loop.inst } }, - }); + }).toRef(); } - loop_cond_br.else_block = .init(loop_cond_br.then_block.stealRemainingCapacity()); - _ = loop_cond_br.else_block.add(l, .{ - .tag = .br, - .data = .{ .br = .{ - .block_inst = orig_inst, - .operand = loop_cond_br.else_block.add(l, .{ - .tag = .load, - .data = .{ .ty_op = .{ - .ty = Air.internedToRef(res_ty.toIntern()), - .operand = res_alloc_inst.toRef(), - } }, - }).toRef(), - } }, - }); - try loop_cond_br.finish(l); - } - try loop.finish(l); - } - return .{ .ty_pl = .{ - .ty = Air.internedToRef(res_ty.toIntern()), - .payload = try l.addBlockBody(res_block.body()), - } }; -} -fn scalarizeBitcastToArrayBlockPayload(l: *Legalize, orig_inst: Air.Inst.Index) Error!Air.Inst.Data { - const pt = l.pt; - const zcu = pt.zcu; + }, + .cmp_vector => { + const orig_payload = l.extraData(Air.VectorCmp, orig.data.ty_pl.payload).data; + const cmp_op = orig_payload.compareOperator(); + const optimized = switch (orig.tag) { + .cmp_vector => false, + .cmp_vector_optimized => true, + else => unreachable, + }; + for (elem_buf, 0..) |*elem, elem_idx| { + const elem_idx_ref: Air.Inst.Ref = .fromValue(try pt.intValue(.usize, elem_idx)); + const lhs = main_block.addBinOp(l, .array_elem_val, orig_payload.lhs, elem_idx_ref).toRef(); + const rhs = main_block.addBinOp(l, .array_elem_val, orig_payload.rhs, elem_idx_ref).toRef(); + elem.* = main_block.addCmpScalar(l, cmp_op, lhs, rhs, optimized).toRef(); + } + }, + .shuffle_one => { + const shuffle = l.getTmpAir().unwrapShuffleOne(zcu, orig_inst); + for (elem_buf, shuffle.mask) |*elem, mask| elem.* = switch (mask.unwrap()) { + .value => |val| .fromIntern(val), + .elem => |src_idx| elem: { + const src_idx_ref: Air.Inst.Ref = .fromValue(try pt.intValue(.usize, src_idx)); + break :elem main_block.addBinOp(l, .array_elem_val, shuffle.operand, src_idx_ref).toRef(); + }, + }; + }, + .shuffle_two => { + const shuffle = l.getTmpAir().unwrapShuffleTwo(zcu, orig_inst); + const scalar_ty = res_ty.childType(zcu); + for (elem_buf, shuffle.mask) |*elem, mask| elem.* = switch (mask.unwrap()) { + .undef => .fromValue(try pt.undefValue(scalar_ty)), + .a_elem => |src_idx| elem: { + const src_idx_ref: Air.Inst.Ref = .fromValue(try pt.intValue(.usize, src_idx)); + break :elem main_block.addBinOp(l, .array_elem_val, shuffle.operand_a, src_idx_ref).toRef(); + }, + .b_elem => |src_idx| elem: { + const src_idx_ref: Air.Inst.Ref = .fromValue(try pt.intValue(.usize, src_idx)); + break :elem main_block.addBinOp(l, .array_elem_val, shuffle.operand_b, src_idx_ref).toRef(); + }, + }; + }, + .select => { + const orig_cond = orig.data.pl_op.operand; + const orig_bin = l.extraData(Air.Bin, orig.data.pl_op.payload).data; + const res_scalar_ty = res_ty.childType(zcu); + for (elem_buf, 0..) |*elem, elem_idx| { + // Payload to be populated later; we need the index early for `br`s. + const elem_block_inst = main_block.add(l, .{ + .tag = .block, + .data = .{ .ty_pl = .{ + .ty = .fromType(res_scalar_ty), + .payload = undefined, + } }, + }); + var elem_block: Block = .init(main_block.stealCapacity(2)); - const orig_ty_op = l.air_instructions.items(.data)[@intFromEnum(orig_inst)].ty_op; - const res_ty = orig_ty_op.ty.toType(); - const res_elem_ty = res_ty.childType(zcu); - const res_len = res_ty.arrayLen(zcu); + const elem_idx_ref: Air.Inst.Ref = .fromValue(try pt.intValue(.usize, elem_idx)); + const cond = elem_block.addBinOp(l, .array_elem_val, orig_cond, elem_idx_ref).toRef(); + var condbr: CondBr = .init(l, cond, &elem_block, .{}); - var inst_buf: [16]Air.Inst.Index = undefined; - try l.air_instructions.ensureUnusedCapacity(zcu.gpa, inst_buf.len); + condbr.then_block = .init(main_block.stealCapacity(2)); + const lhs = condbr.then_block.addBinOp(l, .array_elem_val, orig_bin.lhs, elem_idx_ref).toRef(); + condbr.then_block.addBr(l, elem_block_inst, lhs); - var res_block: Block = .init(&inst_buf); - { - const res_alloc_inst = res_block.add(l, .{ - .tag = .alloc, - .data = .{ .ty = try pt.singleMutPtrType(res_ty) }, - }); - const index_alloc_inst = res_block.add(l, .{ - .tag = .alloc, - .data = .{ .ty = .ptr_usize }, - }); - _ = res_block.add(l, .{ - .tag = .store, - .data = .{ .bin_op = .{ - .lhs = index_alloc_inst.toRef(), - .rhs = .zero_usize, - } }, - }); + condbr.else_block = .init(main_block.stealCapacity(2)); + const rhs = condbr.else_block.addBinOp(l, .array_elem_val, orig_bin.rhs, elem_idx_ref).toRef(); + condbr.else_block.addBr(l, elem_block_inst, rhs); - var loop: Loop = .init(l, &res_block); - loop.block = .init(res_block.stealRemainingCapacity()); - { - const cur_index_inst = loop.block.add(l, .{ - .tag = .load, - .data = .{ .ty_op = .{ - .ty = .usize_type, - .operand = index_alloc_inst.toRef(), - } }, - }); - _ = loop.block.add(l, .{ - .tag = .store, - .data = .{ .bin_op = .{ - .lhs = loop.block.add(l, .{ - .tag = .ptr_elem_ptr, - .data = .{ .ty_pl = .{ - .ty = Air.internedToRef((try pt.singleMutPtrType(res_elem_ty)).toIntern()), - .payload = try l.addExtra(Air.Bin, .{ - .lhs = res_alloc_inst.toRef(), - .rhs = cur_index_inst.toRef(), - }), - } }, - }).toRef(), - .rhs = loop.block.addBitCast(l, res_elem_ty, loop.block.add(l, .{ - .tag = .array_elem_val, - .data = .{ .bin_op = .{ - .lhs = orig_ty_op.operand, - .rhs = cur_index_inst.toRef(), - } }, - }).toRef()), - } }, - }); + try condbr.finish(l); - var loop_cond_br: CondBr = .init(l, (try loop.block.addCmp( - l, - .lt, - cur_index_inst.toRef(), - try pt.intRef(.usize, res_len - 1), - .{}, - )).toRef(), &loop.block, .{}); - loop_cond_br.then_block = .init(loop.block.stealRemainingCapacity()); - { - _ = loop_cond_br.then_block.add(l, .{ - .tag = .store, - .data = .{ .bin_op = .{ - .lhs = index_alloc_inst.toRef(), - .rhs = loop_cond_br.then_block.add(l, .{ - .tag = .add, - .data = .{ .bin_op = .{ - .lhs = cur_index_inst.toRef(), - .rhs = .one_usize, - } }, - }).toRef(), - } }, - }); - _ = loop_cond_br.then_block.add(l, .{ - .tag = .repeat, - .data = .{ .repeat = .{ .loop_inst = loop.inst } }, - }); + const inst_data = l.air_instructions.items(.data); + inst_data[@intFromEnum(elem_block_inst)].ty_pl.payload = try l.addBlockBody(elem_block.body()); + + elem.* = elem_block_inst.toRef(); } - loop_cond_br.else_block = .init(loop_cond_br.then_block.stealRemainingCapacity()); - _ = loop_cond_br.else_block.add(l, .{ - .tag = .br, - .data = .{ .br = .{ - .block_inst = orig_inst, - .operand = loop_cond_br.else_block.add(l, .{ - .tag = .load, - .data = .{ .ty_op = .{ - .ty = Air.internedToRef(res_ty.toIntern()), - .operand = res_alloc_inst.toRef(), - } }, - }).toRef(), - } }, - }); - try loop_cond_br.finish(l); - } - try loop.finish(l); + }, } + + const result = main_block.add(l, .{ + .tag = .aggregate_init, + .data = .{ .ty_pl = .{ + .ty = .fromType(res_ty), + .payload = payload: { + const idx = l.air_extra.items.len; + try l.air_extra.appendSlice(gpa, @ptrCast(elem_buf)); + break :payload @intCast(idx); + }, + } }, + }).toRef(); + + main_block.addBr(l, orig_inst, result); + + // Some `form` values may intentionally not use the full instruction buffer. + switch (form) { + .un_op, + .ty_op, + .bin_op, + .pl_op_bin, + .cmp_vector, + .select, + => {}, + .shuffle_one, + .shuffle_two, + => _ = main_block.stealRemainingCapacity(), + } + return .{ .ty_pl = .{ - .ty = Air.internedToRef(res_ty.toIntern()), - .payload = try l.addBlockBody(res_block.body()), + .ty = .fromType(res_ty), + .payload = try l.addBlockBody(main_block.body()), } }; } -fn scalarizeBitcastOperandBlockPayload(l: *Legalize, orig_inst: Air.Inst.Index) Error!Air.Inst.Data { +fn scalarizeBitcastBlockPayload(l: *Legalize, orig_inst: Air.Inst.Index) Error!?Air.Inst.Data { const pt = l.pt; const zcu = pt.zcu; + const gpa = zcu.gpa; - const orig_ty_op = l.air_instructions.items(.data)[@intFromEnum(orig_inst)].ty_op; - const res_ty = orig_ty_op.ty.toType(); - const operand_ty = l.typeOf(orig_ty_op.operand); - const int_bits: u16 = @intCast(operand_ty.bitSize(zcu)); - const int_ty = try pt.intType(.unsigned, int_bits); - const shift_ty = try pt.intType(.unsigned, std.math.log2_int_ceil(u16, int_bits)); - const elem_bits: u16 = @intCast(operand_ty.childType(zcu).bitSize(zcu)); - const elem_int_ty = try pt.intType(.unsigned, elem_bits); - - var inst_buf: [22]Air.Inst.Index = undefined; - try l.air_instructions.ensureUnusedCapacity(zcu.gpa, inst_buf.len); + var sfba_state = std.heap.stackFallback(512, gpa); + const sfba = sfba_state.get(); - var res_block: Block = .init(&inst_buf); - { - const int_alloc_inst = res_block.add(l, .{ - .tag = .alloc, - .data = .{ .ty = try pt.singleMutPtrType(int_ty) }, - }); - _ = res_block.add(l, .{ - .tag = .store, - .data = .{ .bin_op = .{ - .lhs = int_alloc_inst.toRef(), - .rhs = try pt.intRef(int_ty, 0), - } }, - }); - const index_alloc_inst = res_block.add(l, .{ - .tag = .alloc, - .data = .{ .ty = .ptr_usize }, - }); - _ = res_block.add(l, .{ - .tag = .store, - .data = .{ .bin_op = .{ - .lhs = index_alloc_inst.toRef(), - .rhs = .zero_usize, - } }, - }); + const ty_op = l.air_instructions.items(.data)[@intFromEnum(orig_inst)].ty_op; - var loop: Loop = .init(l, &res_block); - loop.block = .init(res_block.stealRemainingCapacity()); - { - const cur_index_inst = loop.block.add(l, .{ - .tag = .load, - .data = .{ .ty_op = .{ - .ty = .usize_type, - .operand = index_alloc_inst.toRef(), - } }, - }); - const cur_int_inst = loop.block.add(l, .{ - .tag = .bit_or, - .data = .{ .bin_op = .{ - .lhs = loop.block.add(l, .{ - .tag = .shl_exact, - .data = .{ .bin_op = .{ - .lhs = loop.block.add(l, .{ - .tag = .intcast, - .data = .{ .ty_op = .{ - .ty = Air.internedToRef(int_ty.toIntern()), - .operand = loop.block.addBitCast(l, elem_int_ty, loop.block.add(l, .{ - .tag = .array_elem_val, - .data = .{ .bin_op = .{ - .lhs = orig_ty_op.operand, - .rhs = cur_index_inst.toRef(), - } }, - }).toRef()), - } }, - }).toRef(), - .rhs = loop.block.add(l, .{ - .tag = .mul, - .data = .{ .bin_op = .{ - .lhs = loop.block.add(l, .{ - .tag = .intcast, - .data = .{ .ty_op = .{ - .ty = Air.internedToRef(shift_ty.toIntern()), - .operand = cur_index_inst.toRef(), - } }, - }).toRef(), - .rhs = try pt.intRef(shift_ty, elem_bits), - } }, - }).toRef(), - } }, - }).toRef(), - .rhs = loop.block.add(l, .{ - .tag = .load, - .data = .{ .ty_op = .{ - .ty = Air.internedToRef(int_ty.toIntern()), - .operand = int_alloc_inst.toRef(), - } }, - }).toRef(), - } }, - }); + const dest_ty = ty_op.ty.toType(); + const dest_legal = switch (dest_ty.zigTypeTag(zcu)) { + else => true, + .array, .vector => legal: { + if (dest_ty.arrayLen(zcu) == 1) break :legal true; + const dest_elem_ty = dest_ty.childType(zcu); + break :legal dest_elem_ty.bitSize(zcu) == 8 * dest_elem_ty.abiSize(zcu); + }, + }; - var loop_cond_br: CondBr = .init(l, (try loop.block.addCmp( - l, - .lt, - cur_index_inst.toRef(), - try pt.intRef(.usize, operand_ty.arrayLen(zcu) - 1), - .{}, - )).toRef(), &loop.block, .{}); - loop_cond_br.then_block = .init(loop.block.stealRemainingCapacity()); - { - _ = loop_cond_br.then_block.add(l, .{ - .tag = .store, - .data = .{ .bin_op = .{ - .lhs = int_alloc_inst.toRef(), - .rhs = cur_int_inst.toRef(), - } }, - }); - _ = loop_cond_br.then_block.add(l, .{ - .tag = .store, - .data = .{ .bin_op = .{ - .lhs = index_alloc_inst.toRef(), - .rhs = loop_cond_br.then_block.add(l, .{ - .tag = .add, - .data = .{ .bin_op = .{ - .lhs = cur_index_inst.toRef(), - .rhs = .one_usize, - } }, - }).toRef(), - } }, - }); - _ = loop_cond_br.then_block.add(l, .{ - .tag = .repeat, - .data = .{ .repeat = .{ .loop_inst = loop.inst } }, - }); - } - loop_cond_br.else_block = .init(loop_cond_br.then_block.stealRemainingCapacity()); - _ = loop_cond_br.else_block.add(l, .{ - .tag = .br, - .data = .{ .br = .{ - .block_inst = orig_inst, - .operand = loop_cond_br.else_block.addBitCast(l, res_ty, cur_int_inst.toRef()), - } }, - }); - try loop_cond_br.finish(l); - } - try loop.finish(l); - } - return .{ .ty_pl = .{ - .ty = Air.internedToRef(res_ty.toIntern()), - .payload = try l.addBlockBody(res_block.body()), - } }; -} -fn scalarizeBitcastResultArrayBlockPayload(l: *Legalize, orig_inst: Air.Inst.Index) Error!Air.Inst.Data { - const pt = l.pt; - const zcu = pt.zcu; + const operand_ty = l.typeOf(ty_op.operand); + const operand_legal = switch (operand_ty.zigTypeTag(zcu)) { + else => true, + .array, .vector => legal: { + if (operand_ty.arrayLen(zcu) == 1) break :legal true; + const operand_elem_ty = operand_ty.childType(zcu); + break :legal operand_elem_ty.bitSize(zcu) == 8 * operand_elem_ty.abiSize(zcu); + }, + }; - const orig_ty_op = l.air_instructions.items(.data)[@intFromEnum(orig_inst)].ty_op; - const res_ty = orig_ty_op.ty.toType(); - const int_bits: u16 = @intCast(res_ty.bitSize(zcu)); - const int_ty = try pt.intType(.unsigned, int_bits); - const shift_ty = try pt.intType(.unsigned, std.math.log2_int_ceil(u16, int_bits)); - const res_elem_ty = res_ty.childType(zcu); - const elem_bits: u16 = @intCast(res_elem_ty.bitSize(zcu)); - const elem_int_ty = try pt.intType(.unsigned, elem_bits); - - var inst_buf: [20]Air.Inst.Index = undefined; - try l.air_instructions.ensureUnusedCapacity(zcu.gpa, inst_buf.len); + if (dest_legal and operand_legal) return null; - var res_block: Block = .init(&inst_buf); - { - const res_alloc_inst = res_block.add(l, .{ - .tag = .alloc, - .data = .{ .ty = try pt.singleMutPtrType(res_ty) }, - }); - const int_ref = res_block.addBitCast(l, int_ty, orig_ty_op.operand); - const index_alloc_inst = res_block.add(l, .{ - .tag = .alloc, - .data = .{ .ty = .ptr_usize }, - }); - _ = res_block.add(l, .{ - .tag = .store, - .data = .{ .bin_op = .{ - .lhs = index_alloc_inst.toRef(), - .rhs = .zero_usize, - } }, - }); + if (!operand_legal and !dest_legal and operand_ty.arrayLen(zcu) == dest_ty.arrayLen(zcu)) { + // from_ty and to_ty are both arrays or vectors of types with the same bit size, + // so we can do an elementwise bitcast. + return try l.scalarizeBlockPayload(orig_inst, .ty_op); + } - var loop: Loop = .init(l, &res_block); - loop.block = .init(res_block.stealRemainingCapacity()); - { - const cur_index_inst = loop.block.add(l, .{ - .tag = .load, - .data = .{ .ty_op = .{ - .ty = .usize_type, - .operand = index_alloc_inst.toRef(), - } }, - }); - _ = loop.block.add(l, .{ - .tag = .store, - .data = .{ .bin_op = .{ - .lhs = loop.block.add(l, .{ - .tag = .ptr_elem_ptr, - .data = .{ .ty_pl = .{ - .ty = Air.internedToRef((try pt.singleMutPtrType(res_elem_ty)).toIntern()), - .payload = try l.addExtra(Air.Bin, .{ - .lhs = res_alloc_inst.toRef(), - .rhs = cur_index_inst.toRef(), - }), - } }, - }).toRef(), - .rhs = loop.block.addBitCast(l, res_elem_ty, loop.block.add(l, .{ - .tag = .trunc, - .data = .{ .ty_op = .{ - .ty = Air.internedToRef(elem_int_ty.toIntern()), - .operand = loop.block.add(l, .{ - .tag = .shr, - .data = .{ .bin_op = .{ - .lhs = int_ref, - .rhs = loop.block.add(l, .{ - .tag = .mul, - .data = .{ .bin_op = .{ - .lhs = loop.block.add(l, .{ - .tag = .intcast, - .data = .{ .ty_op = .{ - .ty = Air.internedToRef(shift_ty.toIntern()), - .operand = cur_index_inst.toRef(), - } }, - }).toRef(), - .rhs = try pt.intRef(shift_ty, elem_bits), - } }, - }).toRef(), - } }, - }).toRef(), - } }, - }).toRef()), - } }, - }); + // Fallback path. Our strategy is to use an unsigned integer type as an intermediate + // "bag of bits" representation which can be manipulated by bitwise operations. - var loop_cond_br: CondBr = .init(l, (try loop.block.addCmp( - l, - .lt, - cur_index_inst.toRef(), - try pt.intRef(.usize, res_ty.arrayLen(zcu) - 1), - .{}, - )).toRef(), &loop.block, .{}); - loop_cond_br.then_block = .init(loop.block.stealRemainingCapacity()); - { - _ = loop_cond_br.then_block.add(l, .{ - .tag = .store, - .data = .{ .bin_op = .{ - .lhs = index_alloc_inst.toRef(), - .rhs = loop_cond_br.then_block.add(l, .{ - .tag = .add, - .data = .{ .bin_op = .{ - .lhs = cur_index_inst.toRef(), - .rhs = .one_usize, - } }, - }).toRef(), - } }, - }); - _ = loop_cond_br.then_block.add(l, .{ - .tag = .repeat, - .data = .{ .repeat = .{ .loop_inst = loop.inst } }, - }); - } - loop_cond_br.else_block = .init(loop_cond_br.then_block.stealRemainingCapacity()); - _ = loop_cond_br.else_block.add(l, .{ - .tag = .br, - .data = .{ .br = .{ - .block_inst = orig_inst, - .operand = loop_cond_br.else_block.add(l, .{ - .tag = .load, - .data = .{ .ty_op = .{ - .ty = Air.internedToRef(res_ty.toIntern()), - .operand = res_alloc_inst.toRef(), - } }, - }).toRef(), - } }, - }); - try loop_cond_br.finish(l); + const num_bits: u16 = @intCast(dest_ty.bitSize(zcu)); + assert(operand_ty.bitSize(zcu) == num_bits); + const uint_ty = try pt.intType(.unsigned, num_bits); + const shift_ty = try pt.intType(.unsigned, std.math.log2_int_ceil(u16, num_bits)); + + const inst_buf = try sfba.alloc(Air.Inst.Index, len: { + const operand_to_uint_len: u64 = if (operand_legal) 1 else (operand_ty.arrayLen(zcu) * 5); + const uint_to_dest_len: u64 = if (dest_legal) 1 else (dest_ty.arrayLen(zcu) * 3 + 1); + break :len @intCast(operand_to_uint_len + uint_to_dest_len + 1); + }); + defer sfba.free(inst_buf); + var main_block: Block = .init(inst_buf); + try l.air_instructions.ensureUnusedCapacity(gpa, inst_buf.len); + + // First, convert `operand_ty` to `uint_ty` (`uN`). + + const uint_val: Air.Inst.Ref = uint_val: { + if (operand_legal) break :uint_val main_block.addBitCast(l, uint_ty, ty_op.operand); + + const bits_per_elem: u16 = @intCast(operand_ty.childType(zcu).bitSize(zcu)); + const bits_per_elem_ref: Air.Inst.Ref = .fromValue(try pt.intValue(shift_ty, bits_per_elem)); + const elem_uint_ty = try pt.intType(.unsigned, bits_per_elem); + + var cur_uint: Air.Inst.Ref = .fromValue(try pt.intValue(uint_ty, 0)); + var elem_idx = operand_ty.arrayLen(zcu); + while (elem_idx > 0) { + elem_idx -= 1; + const elem_idx_ref: Air.Inst.Ref = .fromValue(try pt.intValue(.usize, elem_idx)); + const orig_elem = main_block.addBinOp(l, .array_elem_val, ty_op.operand, elem_idx_ref).toRef(); + const elem_as_uint = main_block.addBitCast(l, elem_uint_ty, orig_elem); + const elem_extended = main_block.addTyOp(l, .intcast, uint_ty, elem_as_uint).toRef(); + cur_uint = main_block.addBinOp(l, .shl_exact, cur_uint, bits_per_elem_ref).toRef(); + cur_uint = main_block.addBinOp(l, .bit_or, cur_uint, elem_extended).toRef(); } - try loop.finish(l); - } - return .{ .ty_pl = .{ - .ty = Air.internedToRef(res_ty.toIntern()), - .payload = try l.addBlockBody(res_block.body()), - } }; -} -fn scalarizeBitcastResultVectorBlockPayload(l: *Legalize, orig_inst: Air.Inst.Index) Error!Air.Inst.Data { - const pt = l.pt; - const zcu = pt.zcu; + break :uint_val cur_uint; + }; - const orig_ty_op = l.air_instructions.items(.data)[@intFromEnum(orig_inst)].ty_op; - const res_ty = orig_ty_op.ty.toType(); - const int_bits: u16 = @intCast(res_ty.bitSize(zcu)); - const int_ty = try pt.intType(.unsigned, int_bits); - const shift_ty = try pt.intType(.unsigned, std.math.log2_int_ceil(u16, int_bits)); - const res_elem_ty = res_ty.childType(zcu); - const elem_bits: u16 = @intCast(res_elem_ty.bitSize(zcu)); - const elem_int_ty = try pt.intType(.unsigned, elem_bits); - - var inst_buf: [19]Air.Inst.Index = undefined; - try l.air_instructions.ensureUnusedCapacity(zcu.gpa, inst_buf.len); + // Now convert `uint_ty` (`uN`) to `dest_ty`. - var res_block: Block = .init(&inst_buf); - { - const res_alloc_inst = res_block.add(l, .{ - .tag = .alloc, - .data = .{ .ty = try pt.singleMutPtrType(res_ty) }, - }); - const int_ref = res_block.addBitCast(l, int_ty, orig_ty_op.operand); - const index_alloc_inst = res_block.add(l, .{ - .tag = .alloc, - .data = .{ .ty = .ptr_usize }, - }); - _ = res_block.add(l, .{ - .tag = .store, - .data = .{ .bin_op = .{ - .lhs = index_alloc_inst.toRef(), - .rhs = .zero_usize, - } }, - }); + const result: Air.Inst.Ref = result: { + if (dest_legal) break :result main_block.addBitCast(l, dest_ty, uint_val); - var loop: Loop = .init(l, &res_block); - loop.block = .init(res_block.stealRemainingCapacity()); - { - const cur_index_inst = loop.block.add(l, .{ - .tag = .load, - .data = .{ .ty_op = .{ - .ty = .usize_type, - .operand = index_alloc_inst.toRef(), - } }, - }); - _ = loop.block.add(l, .{ - .tag = .vector_store_elem, - .data = .{ .vector_store_elem = .{ - .vector_ptr = res_alloc_inst.toRef(), - .payload = try l.addExtra(Air.Bin, .{ - .lhs = cur_index_inst.toRef(), - .rhs = loop.block.addBitCast(l, res_elem_ty, loop.block.add(l, .{ - .tag = .trunc, - .data = .{ .ty_op = .{ - .ty = Air.internedToRef(elem_int_ty.toIntern()), - .operand = loop.block.add(l, .{ - .tag = .shr, - .data = .{ .bin_op = .{ - .lhs = int_ref, - .rhs = loop.block.add(l, .{ - .tag = .mul, - .data = .{ .bin_op = .{ - .lhs = loop.block.add(l, .{ - .tag = .intcast, - .data = .{ .ty_op = .{ - .ty = Air.internedToRef(shift_ty.toIntern()), - .operand = cur_index_inst.toRef(), - } }, - }).toRef(), - .rhs = try pt.intRef(shift_ty, elem_bits), - } }, - }).toRef(), - } }, - }).toRef(), - } }, - }).toRef()), - }), - } }, - }); + const elem_ty = dest_ty.childType(zcu); + const bits_per_elem: u16 = @intCast(elem_ty.bitSize(zcu)); + const bits_per_elem_ref: Air.Inst.Ref = .fromValue(try pt.intValue(shift_ty, bits_per_elem)); + const elem_uint_ty = try pt.intType(.unsigned, bits_per_elem); - var loop_cond_br: CondBr = .init(l, (try loop.block.addCmp( - l, - .lt, - cur_index_inst.toRef(), - try pt.intRef(.usize, res_ty.vectorLen(zcu) - 1), - .{}, - )).toRef(), &loop.block, .{}); - loop_cond_br.then_block = .init(loop.block.stealRemainingCapacity()); - { - _ = loop_cond_br.then_block.add(l, .{ - .tag = .store, - .data = .{ .bin_op = .{ - .lhs = index_alloc_inst.toRef(), - .rhs = loop_cond_br.then_block.add(l, .{ - .tag = .add, - .data = .{ .bin_op = .{ - .lhs = cur_index_inst.toRef(), - .rhs = .one_usize, - } }, - }).toRef(), - } }, - }); - _ = loop_cond_br.then_block.add(l, .{ - .tag = .repeat, - .data = .{ .repeat = .{ .loop_inst = loop.inst } }, - }); - } - loop_cond_br.else_block = .init(loop_cond_br.then_block.stealRemainingCapacity()); - _ = loop_cond_br.else_block.add(l, .{ - .tag = .br, - .data = .{ .br = .{ - .block_inst = orig_inst, - .operand = loop_cond_br.else_block.add(l, .{ - .tag = .load, - .data = .{ .ty_op = .{ - .ty = Air.internedToRef(res_ty.toIntern()), - .operand = res_alloc_inst.toRef(), - } }, - }).toRef(), - } }, - }); - try loop_cond_br.finish(l); + const elem_buf = try sfba.alloc(Air.Inst.Ref, dest_ty.arrayLen(zcu)); + defer sfba.free(elem_buf); + + var cur_uint = uint_val; + for (elem_buf) |*elem| { + const elem_as_uint = main_block.addTyOp(l, .trunc, elem_uint_ty, cur_uint).toRef(); + elem.* = main_block.addBitCast(l, elem_ty, elem_as_uint); + cur_uint = main_block.addBinOp(l, .shr, cur_uint, bits_per_elem_ref).toRef(); } - try loop.finish(l); - } + + break :result main_block.add(l, .{ + .tag = .aggregate_init, + .data = .{ .ty_pl = .{ + .ty = .fromType(dest_ty), + .payload = payload: { + const idx = l.air_extra.items.len; + try l.air_extra.appendSlice(gpa, @ptrCast(elem_buf)); + break :payload @intCast(idx); + }, + } }, + }).toRef(); + }; + + main_block.addBr(l, orig_inst, result); + return .{ .ty_pl = .{ - .ty = Air.internedToRef(res_ty.toIntern()), - .payload = try l.addBlockBody(res_block.body()), + .ty = .fromType(dest_ty), + .payload = try l.addBlockBody(main_block.body()), } }; } fn scalarizeOverflowBlockPayload(l: *Legalize, orig_inst: Air.Inst.Index) Error!Air.Inst.Data { const pt = l.pt; const zcu = pt.zcu; + const gpa = zcu.gpa; + + var sfba_state = std.heap.stackFallback(512, gpa); + const sfba = sfba_state.get(); const orig = l.air_instructions.get(@intFromEnum(orig_inst)); - const res_ty = l.typeOfIndex(orig_inst); - const wrapped_res_ty = res_ty.fieldType(0, zcu); - const wrapped_res_scalar_ty = wrapped_res_ty.childType(zcu); - const res_len = wrapped_res_ty.vectorLen(zcu); + const orig_operands = l.extraData(Air.Bin, orig.data.ty_pl.payload).data; - var inst_buf: [21]Air.Inst.Index = undefined; - try l.air_instructions.ensureUnusedCapacity(zcu.gpa, inst_buf.len); + const vec_tuple_ty = l.typeOfIndex(orig_inst); + const vec_int_ty = vec_tuple_ty.fieldType(0, zcu); + const vec_overflow_ty = vec_tuple_ty.fieldType(1, zcu); - var res_block: Block = .init(&inst_buf); - { - const res_alloc_inst = res_block.add(l, .{ - .tag = .alloc, - .data = .{ .ty = try pt.singleMutPtrType(res_ty) }, - }); - const ptr_wrapped_res_inst = res_block.add(l, .{ - .tag = .struct_field_ptr_index_0, - .data = .{ .ty_op = .{ - .ty = Air.internedToRef((try pt.singleMutPtrType(wrapped_res_ty)).toIntern()), - .operand = res_alloc_inst.toRef(), + assert(l.typeOf(orig_operands.lhs).toIntern() == vec_int_ty.toIntern()); + if (orig.tag != .shl_with_overflow) { + assert(l.typeOf(orig_operands.rhs).toIntern() == vec_int_ty.toIntern()); + } + + const scalar_int_ty = vec_int_ty.childType(zcu); + const scalar_tuple_ty = try pt.overflowArithmeticTupleType(scalar_int_ty); + + const elems_len = vec_int_ty.vectorLen(zcu); + + const inst_buf = try sfba.alloc(Air.Inst.Index, 5 * elems_len + 4); + defer sfba.free(inst_buf); + + var main_block: Block = .init(inst_buf); + try l.air_instructions.ensureUnusedCapacity(gpa, inst_buf.len); + + const int_elem_buf = try sfba.alloc(Air.Inst.Ref, elems_len); + defer sfba.free(int_elem_buf); + const overflow_elem_buf = try sfba.alloc(Air.Inst.Ref, elems_len); + defer sfba.free(overflow_elem_buf); + + for (int_elem_buf, overflow_elem_buf, 0..) |*int_elem, *overflow_elem, elem_idx| { + const elem_idx_ref: Air.Inst.Ref = .fromValue(try pt.intValue(.usize, elem_idx)); + const lhs = main_block.addBinOp(l, .array_elem_val, orig_operands.lhs, elem_idx_ref).toRef(); + const rhs = main_block.addBinOp(l, .array_elem_val, orig_operands.rhs, elem_idx_ref).toRef(); + const elem_result = main_block.add(l, .{ + .tag = orig.tag, + .data = .{ .ty_pl = .{ + .ty = .fromType(scalar_tuple_ty), + .payload = try l.addExtra(Air.Bin, .{ .lhs = lhs, .rhs = rhs }), } }, - }); - const ptr_overflow_res_inst = res_block.add(l, .{ - .tag = .struct_field_ptr_index_1, - .data = .{ .ty_op = .{ - .ty = Air.internedToRef((try pt.singleMutPtrType(res_ty.fieldType(1, zcu))).toIntern()), - .operand = res_alloc_inst.toRef(), + }).toRef(); + int_elem.* = main_block.add(l, .{ + .tag = .struct_field_val, + .data = .{ .ty_pl = .{ + .ty = .fromType(scalar_int_ty), + .payload = try l.addExtra(Air.StructField, .{ + .struct_operand = elem_result, + .field_index = 0, + }), } }, - }); - const index_alloc_inst = res_block.add(l, .{ - .tag = .alloc, - .data = .{ .ty = .ptr_usize }, - }); - _ = res_block.add(l, .{ - .tag = .store, - .data = .{ .bin_op = .{ - .lhs = index_alloc_inst.toRef(), - .rhs = .zero_usize, + }).toRef(); + overflow_elem.* = main_block.add(l, .{ + .tag = .struct_field_val, + .data = .{ .ty_pl = .{ + .ty = .bool_type, + .payload = try l.addExtra(Air.StructField, .{ + .struct_operand = elem_result, + .field_index = 1, + }), } }, - }); + }).toRef(); + } - var loop: Loop = .init(l, &res_block); - loop.block = .init(res_block.stealRemainingCapacity()); - { - const cur_index_inst = loop.block.add(l, .{ - .tag = .load, - .data = .{ .ty_op = .{ - .ty = .usize_type, - .operand = index_alloc_inst.toRef(), - } }, - }); - const extra = l.extraData(Air.Bin, orig.data.ty_pl.payload).data; - const res_elem = loop.block.add(l, .{ - .tag = orig.tag, - .data = .{ .ty_pl = .{ - .ty = Air.internedToRef(try zcu.intern_pool.getTupleType(zcu.gpa, pt.tid, .{ - .types = &.{ wrapped_res_scalar_ty.toIntern(), .u1_type }, - .values = &(.{.none} ** 2), - })), - .payload = try l.addExtra(Air.Bin, .{ - .lhs = loop.block.add(l, .{ - .tag = .array_elem_val, - .data = .{ .bin_op = .{ - .lhs = extra.lhs, - .rhs = cur_index_inst.toRef(), - } }, - }).toRef(), - .rhs = loop.block.add(l, .{ - .tag = .array_elem_val, - .data = .{ .bin_op = .{ - .lhs = extra.rhs, - .rhs = cur_index_inst.toRef(), - } }, - }).toRef(), - }), - } }, - }); - _ = loop.block.add(l, .{ - .tag = .vector_store_elem, - .data = .{ .vector_store_elem = .{ - .vector_ptr = ptr_overflow_res_inst.toRef(), - .payload = try l.addExtra(Air.Bin, .{ - .lhs = cur_index_inst.toRef(), - .rhs = loop.block.add(l, .{ - .tag = .struct_field_val, - .data = .{ .ty_pl = .{ - .ty = .u1_type, - .payload = try l.addExtra(Air.StructField, .{ - .struct_operand = res_elem.toRef(), - .field_index = 1, - }), - } }, - }).toRef(), - }), - } }, - }); - _ = loop.block.add(l, .{ - .tag = .vector_store_elem, - .data = .{ .vector_store_elem = .{ - .vector_ptr = ptr_wrapped_res_inst.toRef(), - .payload = try l.addExtra(Air.Bin, .{ - .lhs = cur_index_inst.toRef(), - .rhs = loop.block.add(l, .{ - .tag = .struct_field_val, - .data = .{ .ty_pl = .{ - .ty = Air.internedToRef(wrapped_res_scalar_ty.toIntern()), - .payload = try l.addExtra(Air.StructField, .{ - .struct_operand = res_elem.toRef(), - .field_index = 0, - }), - } }, - }).toRef(), - }), - } }, - }); + const int_vec = main_block.add(l, .{ + .tag = .aggregate_init, + .data = .{ .ty_pl = .{ + .ty = .fromType(vec_int_ty), + .payload = payload: { + const idx = l.air_extra.items.len; + try l.air_extra.appendSlice(gpa, @ptrCast(int_elem_buf)); + break :payload @intCast(idx); + }, + } }, + }).toRef(); + const overflow_vec = main_block.add(l, .{ + .tag = .aggregate_init, + .data = .{ .ty_pl = .{ + .ty = .fromType(vec_overflow_ty), + .payload = payload: { + const idx = l.air_extra.items.len; + try l.air_extra.appendSlice(gpa, @ptrCast(overflow_elem_buf)); + break :payload @intCast(idx); + }, + } }, + }).toRef(); + + const tuple_elems: [2]Air.Inst.Ref = .{ int_vec, overflow_vec }; + const result = main_block.add(l, .{ + .tag = .aggregate_init, + .data = .{ .ty_pl = .{ + .ty = .fromType(vec_tuple_ty), + .payload = payload: { + const idx = l.air_extra.items.len; + try l.air_extra.appendSlice(gpa, @ptrCast(&tuple_elems)); + break :payload @intCast(idx); + }, + } }, + }).toRef(); + + main_block.addBr(l, orig_inst, result); - var loop_cond_br: CondBr = .init(l, (try loop.block.addCmp( - l, - .lt, - cur_index_inst.toRef(), - try pt.intRef(.usize, res_len - 1), - .{}, - )).toRef(), &loop.block, .{}); - loop_cond_br.then_block = .init(loop.block.stealRemainingCapacity()); - { - _ = loop_cond_br.then_block.add(l, .{ - .tag = .store, - .data = .{ .bin_op = .{ - .lhs = index_alloc_inst.toRef(), - .rhs = loop_cond_br.then_block.add(l, .{ - .tag = .add, - .data = .{ .bin_op = .{ - .lhs = cur_index_inst.toRef(), - .rhs = .one_usize, - } }, - }).toRef(), - } }, - }); - _ = loop_cond_br.then_block.add(l, .{ - .tag = .repeat, - .data = .{ .repeat = .{ .loop_inst = loop.inst } }, - }); - } - loop_cond_br.else_block = .init(loop_cond_br.then_block.stealRemainingCapacity()); - _ = loop_cond_br.else_block.add(l, .{ - .tag = .br, - .data = .{ .br = .{ - .block_inst = orig_inst, - .operand = loop_cond_br.else_block.add(l, .{ - .tag = .load, - .data = .{ .ty_op = .{ - .ty = Air.internedToRef(res_ty.toIntern()), - .operand = res_alloc_inst.toRef(), - } }, - }).toRef(), - } }, - }); - try loop_cond_br.finish(l); - } - try loop.finish(l); - } return .{ .ty_pl = .{ - .ty = Air.internedToRef(res_ty.toIntern()), - .payload = try l.addBlockBody(res_block.body()), + .ty = .fromType(vec_tuple_ty), + .payload = try l.addBlockBody(main_block.body()), } }; } @@ -2231,37 +1472,6 @@ fn safeArithmeticBlockPayload(l: *Legalize, orig_inst: Air.Inst.Index, overflow_ } }; } -fn expandBitcastBlockPayload(l: *Legalize, orig_inst: Air.Inst.Index) Error!Air.Inst.Data { - const pt = l.pt; - const zcu = pt.zcu; - const ip = &zcu.intern_pool; - - const orig_ty_op = l.air_instructions.items(.data)[@intFromEnum(orig_inst)].ty_op; - const res_ty = orig_ty_op.ty.toType(); - const res_ty_key = ip.indexToKey(res_ty.toIntern()); - const operand_ty = l.typeOf(orig_ty_op.operand); - const operand_ty_key = ip.indexToKey(operand_ty.toIntern()); - _ = res_ty_key; - _ = operand_ty_key; - - var inst_buf: [1]Air.Inst.Index = undefined; - try l.air_instructions.ensureUnusedCapacity(zcu.gpa, inst_buf.len); - - var res_block: Block = .init(&inst_buf); - { - _ = res_block.add(l, .{ - .tag = .br, - .data = .{ .br = .{ - .block_inst = orig_inst, - .operand = try pt.undefRef(res_ty), - } }, - }); - } - return .{ .ty_pl = .{ - .ty = Air.internedToRef(res_ty.toIntern()), - .payload = try l.addBlockBody(res_block.body()), - } }; -} fn packedLoadBlockPayload(l: *Legalize, orig_inst: Air.Inst.Index) Error!Air.Inst.Data { const pt = l.pt; const zcu = pt.zcu; @@ -2431,89 +1641,73 @@ fn packedStructFieldValBlockPayload(l: *Legalize, orig_inst: Air.Inst.Index) Err const field_ty = orig_ty_pl.ty.toType(); const agg_ty = l.typeOf(orig_extra.struct_operand); + const agg_bits: u16 = @intCast(agg_ty.bitSize(zcu)); + const bit_offset = zcu.structPackedFieldBitOffset(zcu.typeToStruct(agg_ty).?, orig_extra.field_index); + + const agg_int_ty = try pt.intType(.unsigned, agg_bits); + const field_int_ty = try pt.intType(.unsigned, @intCast(field_ty.bitSize(zcu))); + + const agg_shift_ty = try pt.intType(.unsigned, std.math.log2_int_ceil(u16, agg_bits)); + const bit_offset_ref: Air.Inst.Ref = .fromValue(try pt.intValue(agg_shift_ty, bit_offset)); + var inst_buf: [5]Air.Inst.Index = undefined; + var main_block: Block = .init(&inst_buf); try l.air_instructions.ensureUnusedCapacity(zcu.gpa, inst_buf.len); - var res_block: Block = .init(&inst_buf); - { - const agg_alloc_inst = res_block.add(l, .{ - .tag = .alloc, - .data = .{ .ty = try pt.singleMutPtrType(agg_ty) }, - }); - _ = res_block.add(l, .{ - .tag = .store, - .data = .{ .bin_op = .{ - .lhs = agg_alloc_inst.toRef(), - .rhs = orig_extra.struct_operand, - } }, - }); - _ = res_block.add(l, .{ - .tag = .br, - .data = .{ .br = .{ - .block_inst = orig_inst, - .operand = res_block.add(l, .{ - .tag = .load, - .data = .{ .ty_op = .{ - .ty = Air.internedToRef(field_ty.toIntern()), - .operand = (try res_block.addStructFieldPtr(l, agg_alloc_inst.toRef(), orig_extra.field_index)).toRef(), - } }, - }).toRef(), - } }, - }); - } + const agg_int = main_block.addBitCast(l, agg_int_ty, orig_extra.struct_operand); + const shifted_agg_int = main_block.addBinOp(l, .shr, agg_int, bit_offset_ref).toRef(); + const field_int = main_block.addTyOp(l, .trunc, field_int_ty, shifted_agg_int).toRef(); + const field_val = main_block.addBitCast(l, field_ty, field_int); + main_block.addBr(l, orig_inst, field_val); + return .{ .ty_pl = .{ - .ty = Air.internedToRef(field_ty.toIntern()), - .payload = try l.addBlockBody(res_block.body()), + .ty = .fromType(field_ty), + .payload = try l.addBlockBody(main_block.body()), } }; } fn packedAggregateInitBlockPayload(l: *Legalize, orig_inst: Air.Inst.Index) Error!Air.Inst.Data { const pt = l.pt; const zcu = pt.zcu; + const gpa = zcu.gpa; const orig_ty_pl = l.air_instructions.items(.data)[@intFromEnum(orig_inst)].ty_pl; - const field_ty = orig_ty_pl.ty.toType(); const agg_ty = orig_ty_pl.ty.toType(); const agg_field_count = agg_ty.structFieldCount(zcu); - const ExpectedContents = [1 + 2 * 32 + 2]Air.Inst.Index; - var stack align(@max(@alignOf(ExpectedContents), @alignOf(std.heap.StackFallbackAllocator(0)))) = - std.heap.stackFallback(@sizeOf(ExpectedContents), zcu.gpa); - const gpa = stack.get(); + var sfba_state = std.heap.stackFallback(@sizeOf([4 * 32 + 2]Air.Inst.Index), gpa); + const sfba = sfba_state.get(); - const inst_buf = try gpa.alloc(Air.Inst.Index, 1 + 2 * agg_field_count + 2); - defer gpa.free(inst_buf); - try l.air_instructions.ensureUnusedCapacity(zcu.gpa, inst_buf.len); + const inst_buf = try sfba.alloc(Air.Inst.Index, 4 * agg_field_count + 2); + defer sfba.free(inst_buf); - var res_block: Block = .init(inst_buf); - { - const agg_alloc_inst = res_block.add(l, .{ - .tag = .alloc, - .data = .{ .ty = try pt.singleMutPtrType(agg_ty) }, - }); - for (0..agg_field_count, orig_ty_pl.payload..) |field_index, extra_index| _ = res_block.add(l, .{ - .tag = .store, - .data = .{ .bin_op = .{ - .lhs = (try res_block.addStructFieldPtr(l, agg_alloc_inst.toRef(), field_index)).toRef(), - .rhs = @enumFromInt(l.air_extra.items[extra_index]), - } }, - }); - _ = res_block.add(l, .{ - .tag = .br, - .data = .{ .br = .{ - .block_inst = orig_inst, - .operand = res_block.add(l, .{ - .tag = .load, - .data = .{ .ty_op = .{ - .ty = Air.internedToRef(field_ty.toIntern()), - .operand = agg_alloc_inst.toRef(), - } }, - }).toRef(), - } }, - }); + var main_block: Block = .init(inst_buf); + try l.air_instructions.ensureUnusedCapacity(gpa, inst_buf.len); + + const num_bits: u16 = @intCast(agg_ty.bitSize(zcu)); + const shift_ty = try pt.intType(.unsigned, std.math.log2_int_ceil(u16, num_bits)); + const uint_ty = try pt.intType(.unsigned, num_bits); + var cur_uint: Air.Inst.Ref = .fromValue(try pt.intValue(uint_ty, 0)); + + var field_idx = agg_field_count; + while (field_idx > 0) { + field_idx -= 1; + const field_ty = agg_ty.fieldType(field_idx, zcu); + const field_uint_ty = try pt.intType(.unsigned, @intCast(field_ty.bitSize(zcu))); + const field_bit_size_ref: Air.Inst.Ref = .fromValue(try pt.intValue(shift_ty, field_ty.bitSize(zcu))); + const field_val: Air.Inst.Ref = @enumFromInt(l.air_extra.items[orig_ty_pl.payload + field_idx]); + + const shifted = main_block.addBinOp(l, .shl_exact, cur_uint, field_bit_size_ref).toRef(); + const field_as_uint = main_block.addBitCast(l, field_uint_ty, field_val); + const field_extended = main_block.addTyOp(l, .intcast, uint_ty, field_as_uint).toRef(); + cur_uint = main_block.addBinOp(l, .bit_or, shifted, field_extended).toRef(); } + + const result = main_block.addBitCast(l, agg_ty, cur_uint); + main_block.addBr(l, orig_inst, result); + return .{ .ty_pl = .{ - .ty = Air.internedToRef(field_ty.toIntern()), - .payload = try l.addBlockBody(res_block.body()), + .ty = .fromType(agg_ty), + .payload = try l.addBlockBody(main_block.body()), } }; } @@ -2571,6 +1765,33 @@ const Block = struct { b.len += 1; return inst; } + fn addBr(b: *Block, l: *Legalize, target: Air.Inst.Index, operand: Air.Inst.Ref) void { + _ = b.add(l, .{ + .tag = .br, + .data = .{ .br = .{ .block_inst = target, .operand = operand } }, + }); + } + fn addBinOp(b: *Block, l: *Legalize, tag: Air.Inst.Tag, lhs: Air.Inst.Ref, rhs: Air.Inst.Ref) Air.Inst.Index { + return b.add(l, .{ + .tag = tag, + .data = .{ .bin_op = .{ .lhs = lhs, .rhs = rhs } }, + }); + } + fn addUnOp(b: *Block, l: *Legalize, tag: Air.Inst.Tag, operand: Air.Inst.Ref) Air.Inst.Index { + return b.add(l, .{ + .tag = tag, + .data = .{ .un_op = operand }, + }); + } + fn addTyOp(b: *Block, l: *Legalize, tag: Air.Inst.Tag, ty: Type, operand: Air.Inst.Ref) Air.Inst.Index { + return b.add(l, .{ + .tag = tag, + .data = .{ .ty_op = .{ + .ty = .fromType(ty), + .operand = operand, + } }, + }); + } /// Adds the code to call the panic handler `panic_id`. This is usually `.call` then `.unreach`, /// but if `Zcu.Feature.panic_fn` is unsupported, we lower to `.trap` instead. @@ -2625,14 +1846,27 @@ const Block = struct { } }, }); } + return addCmpScalar(b, l, op, lhs, rhs, opts.optimized); + } + + /// Similar to `addCmp`, but for scalars only. Unlike `addCmp`, this function is + /// infallible, because it doesn't need to add entries to `extra`. + fn addCmpScalar( + b: *Block, + l: *Legalize, + op: std.math.CompareOperator, + lhs: Air.Inst.Ref, + rhs: Air.Inst.Ref, + optimized: bool, + ) Air.Inst.Index { return b.add(l, .{ .tag = switch (op) { - .lt => if (opts.optimized) .cmp_lt_optimized else .cmp_lt, - .lte => if (opts.optimized) .cmp_lte_optimized else .cmp_lte, - .eq => if (opts.optimized) .cmp_eq_optimized else .cmp_eq, - .gte => if (opts.optimized) .cmp_gte_optimized else .cmp_gte, - .gt => if (opts.optimized) .cmp_gt_optimized else .cmp_gt, - .neq => if (opts.optimized) .cmp_neq_optimized else .cmp_neq, + .lt => if (optimized) .cmp_lt_optimized else .cmp_lt, + .lte => if (optimized) .cmp_lte_optimized else .cmp_lte, + .eq => if (optimized) .cmp_eq_optimized else .cmp_eq, + .gte => if (optimized) .cmp_gte_optimized else .cmp_gte, + .gt => if (optimized) .cmp_gt_optimized else .cmp_gt, + .neq => if (optimized) .cmp_neq_optimized else .cmp_neq, }, .data = .{ .bin_op = .{ .lhs = lhs, @@ -2641,93 +1875,6 @@ const Block = struct { }); } - /// Adds a `struct_field_ptr*` instruction to `b`. This is a fairly thin wrapper around `add` - /// that selects the optimized instruction encoding to use, although it does compute the - /// proper field pointer type. - fn addStructFieldPtr( - b: *Block, - l: *Legalize, - struct_operand: Air.Inst.Ref, - field_index: usize, - ) Error!Air.Inst.Index { - const pt = l.pt; - const zcu = pt.zcu; - - const agg_ptr_ty = l.typeOf(struct_operand); - const agg_ptr_info = agg_ptr_ty.ptrInfo(zcu); - const agg_ty: Type = .fromInterned(agg_ptr_info.child); - const agg_ptr_align = switch (agg_ptr_info.flags.alignment) { - .none => agg_ty.abiAlignment(zcu), - else => |agg_ptr_align| agg_ptr_align, - }; - const agg_layout = agg_ty.containerLayout(zcu); - const field_ty = agg_ty.fieldType(field_index, zcu); - var field_ptr_info: InternPool.Key.PtrType = .{ - .child = field_ty.toIntern(), - .flags = .{ - .is_const = agg_ptr_info.flags.is_const, - .is_volatile = agg_ptr_info.flags.is_volatile, - .address_space = agg_ptr_info.flags.address_space, - }, - }; - field_ptr_info.flags.alignment = field_ptr_align: switch (agg_layout) { - .auto => agg_ty.fieldAlignment(field_index, zcu).min(agg_ptr_align), - .@"extern" => switch (agg_ty.zigTypeTag(zcu)) { - else => unreachable, - .@"struct" => .fromLog2Units(@min( - agg_ptr_align.toLog2Units(), - @ctz(agg_ty.structFieldOffset(field_index, zcu)), - )), - .@"union" => agg_ptr_align, - }, - .@"packed" => switch (agg_ty.zigTypeTag(zcu)) { - else => unreachable, - .@"struct" => { - const packed_offset = agg_ty.packedStructFieldPtrInfo(agg_ptr_ty, @intCast(field_index), pt); - field_ptr_info.packed_offset = packed_offset; - break :field_ptr_align agg_ptr_align; - }, - .@"union" => { - field_ptr_info.packed_offset = .{ - .host_size = switch (agg_ptr_info.packed_offset.host_size) { - 0 => @intCast(agg_ty.abiSize(zcu)), - else => |host_size| host_size, - }, - .bit_offset = agg_ptr_info.packed_offset.bit_offset, - }; - break :field_ptr_align agg_ptr_align; - }, - }, - }; - const field_ptr_ty = try pt.ptrType(field_ptr_info); - const field_ptr_ty_ref = Air.internedToRef(field_ptr_ty.toIntern()); - return switch (field_index) { - inline 0...3 => |ct_field_index| b.add(l, .{ - .tag = switch (ct_field_index) { - 0 => .struct_field_ptr_index_0, - 1 => .struct_field_ptr_index_1, - 2 => .struct_field_ptr_index_2, - 3 => .struct_field_ptr_index_3, - else => comptime unreachable, - }, - .data = .{ .ty_op = .{ - .ty = field_ptr_ty_ref, - .operand = struct_operand, - } }, - }), - else => b.add(l, .{ - .tag = .struct_field_ptr, - .data = .{ .ty_pl = .{ - .ty = field_ptr_ty_ref, - .payload = try l.addExtra(Air.StructField, .{ - .struct_operand = struct_operand, - .field_index = @intCast(field_index), - }), - } }, - }), - }; - } - /// Adds a `bitcast` instruction to `b`. This is a thin wrapper that omits the instruction for /// no-op casts. fn addBitCast( @@ -2774,56 +1921,6 @@ const Block = struct { } }; -const Result = struct { - inst: Air.Inst.Index, - block: Block, - - /// The return value has `block` initialized to `undefined`; it is the caller's reponsibility - /// to initialize it. - fn init(l: *Legalize, ty: Type, parent_block: *Block) Result { - return .{ - .inst = parent_block.add(l, .{ - .tag = .block, - .data = .{ .ty_pl = .{ - .ty = Air.internedToRef(ty.toIntern()), - .payload = undefined, - } }, - }), - .block = undefined, - }; - } - - fn finish(res: Result, l: *Legalize) Error!void { - const data = &l.air_instructions.items(.data)[@intFromEnum(res.inst)]; - data.ty_pl.payload = try l.addBlockBody(res.block.body()); - } -}; - -const Loop = struct { - inst: Air.Inst.Index, - block: Block, - - /// The return value has `block` initialized to `undefined`; it is the caller's reponsibility - /// to initialize it. - fn init(l: *Legalize, parent_block: *Block) Loop { - return .{ - .inst = parent_block.add(l, .{ - .tag = .loop, - .data = .{ .ty_pl = .{ - .ty = .noreturn_type, - .payload = undefined, - } }, - }), - .block = undefined, - }; - } - - fn finish(loop: Loop, l: *Legalize) Error!void { - const data = &l.air_instructions.items(.data)[@intFromEnum(loop.inst)]; - data.ty_pl.payload = try l.addBlockBody(loop.block.body()); - } -}; - const CondBr = struct { inst: Air.Inst.Index, hints: Air.CondBr.BranchHints, From c091e27aac9d51cb3af06904c3039a8c316e5b89 Mon Sep 17 00:00:00 2001 From: Matthew Lugg Date: Thu, 30 Oct 2025 09:20:04 +0000 Subject: [PATCH 2/8] compiler: spring cleaning I started this diff trying to remove a little dead code from the C backend, but ended up finding a bunch of dead code sprinkled all over the place: * `packed` handling in the C backend which was made dead by `Legalize` * Representation of pointers to runtime-known vector indices * Handling for the `vector_store_elem` AIR instruction (now removed) * Old tuple handling from when they used the InternPool repr of structs * Straightforward unused functions * TODOs in the LLVM backend for features which Zig just does not support --- src/Air.zig | 11 - src/Air/Liveness.zig | 6 - src/Air/Liveness/Verify.zig | 5 - src/Air/print.zig | 12 - src/Air/types_resolved.zig | 7 - src/InternPool.zig | 7 +- src/Sema.zig | 122 +-- src/Sema/comptime_ptr_access.zig | 2 - src/Type.zig | 8 +- src/Value.zig | 171 +--- src/Zcu/PerThread.zig | 26 +- src/codegen/aarch64/Select.zig | 12 - src/codegen/c.zig | 528 ++-------- src/codegen/c/Type.zig | 6 +- src/codegen/llvm.zig | 53 +- src/codegen/riscv64/CodeGen.zig | 1 - src/codegen/sparc64/CodeGen.zig | 1 - src/codegen/spirv/CodeGen.zig | 28 +- src/codegen/wasm/CodeGen.zig | 1 - src/codegen/x86_64/CodeGen.zig | 1617 +----------------------------- src/link/Dwarf.zig | 28 +- 21 files changed, 210 insertions(+), 2442 deletions(-) diff --git a/src/Air.zig b/src/Air.zig index 1b394ca1c161..db5307f4591a 100644 --- a/src/Air.zig +++ b/src/Air.zig @@ -874,10 +874,6 @@ pub const Inst = struct { /// Uses the `ty_pl` field. save_err_return_trace_index, - /// Store an element to a vector pointer at an index. - /// Uses the `vector_store_elem` field. - vector_store_elem, - /// Compute a pointer to a `Nav` at runtime, always one of: /// /// * `threadlocal var` @@ -1220,11 +1216,6 @@ pub const Inst = struct { operand: Ref, operation: std.builtin.ReduceOp, }, - vector_store_elem: struct { - vector_ptr: Ref, - // Index into a different array. - payload: u32, - }, ty_nav: struct { ty: InternPool.Index, nav: InternPool.Nav.Index, @@ -1689,7 +1680,6 @@ pub fn typeOfIndex(air: *const Air, inst: Air.Inst.Index, ip: *const InternPool) .set_union_tag, .prefetch, .set_err_return_trace, - .vector_store_elem, .c_va_end, => return .void, @@ -1857,7 +1847,6 @@ pub fn mustLower(air: Air, inst: Air.Inst.Index, ip: *const InternPool) bool { .prefetch, .wasm_memory_grow, .set_err_return_trace, - .vector_store_elem, .c_va_arg, .c_va_copy, .c_va_end, diff --git a/src/Air/Liveness.zig b/src/Air/Liveness.zig index 58169730e880..ea170d0893d6 100644 --- a/src/Air/Liveness.zig +++ b/src/Air/Liveness.zig @@ -463,12 +463,6 @@ fn analyzeInst( return analyzeOperands(a, pass, data, inst, .{ o.lhs, o.rhs, .none }); }, - .vector_store_elem => { - const o = inst_datas[@intFromEnum(inst)].vector_store_elem; - const extra = a.air.extraData(Air.Bin, o.payload).data; - return analyzeOperands(a, pass, data, inst, .{ o.vector_ptr, extra.lhs, extra.rhs }); - }, - .arg, .alloc, .ret_ptr, diff --git a/src/Air/Liveness/Verify.zig b/src/Air/Liveness/Verify.zig index a1cce26a64f3..2f50937bbe40 100644 --- a/src/Air/Liveness/Verify.zig +++ b/src/Air/Liveness/Verify.zig @@ -322,11 +322,6 @@ fn verifyBody(self: *Verify, body: []const Air.Inst.Index) Error!void { const extra = self.air.extraData(Air.Bin, pl_op.payload).data; try self.verifyInstOperands(inst, .{ extra.lhs, extra.rhs, pl_op.operand }); }, - .vector_store_elem => { - const vector_store_elem = data[@intFromEnum(inst)].vector_store_elem; - const extra = self.air.extraData(Air.Bin, vector_store_elem.payload).data; - try self.verifyInstOperands(inst, .{ vector_store_elem.vector_ptr, extra.lhs, extra.rhs }); - }, .cmpxchg_strong, .cmpxchg_weak, => { diff --git a/src/Air/print.zig b/src/Air/print.zig index 73cf2ed9b31e..4b44af3206b8 100644 --- a/src/Air/print.zig +++ b/src/Air/print.zig @@ -330,7 +330,6 @@ const Writer = struct { .shuffle_two => try w.writeShuffleTwo(s, inst), .reduce, .reduce_optimized => try w.writeReduce(s, inst), .cmp_vector, .cmp_vector_optimized => try w.writeCmpVector(s, inst), - .vector_store_elem => try w.writeVectorStoreElem(s, inst), .runtime_nav_ptr => try w.writeRuntimeNavPtr(s, inst), .work_item_id, @@ -576,17 +575,6 @@ const Writer = struct { try w.writeOperand(s, inst, 1, extra.rhs); } - fn writeVectorStoreElem(w: *Writer, s: *std.Io.Writer, inst: Air.Inst.Index) Error!void { - const data = w.air.instructions.items(.data)[@intFromEnum(inst)].vector_store_elem; - const extra = w.air.extraData(Air.VectorCmp, data.payload).data; - - try w.writeOperand(s, inst, 0, data.vector_ptr); - try s.writeAll(", "); - try w.writeOperand(s, inst, 1, extra.lhs); - try s.writeAll(", "); - try w.writeOperand(s, inst, 2, extra.rhs); - } - fn writeRuntimeNavPtr(w: *Writer, s: *std.Io.Writer, inst: Air.Inst.Index) Error!void { const ip = &w.pt.zcu.intern_pool; const ty_nav = w.air.instructions.items(.data)[@intFromEnum(inst)].ty_nav; diff --git a/src/Air/types_resolved.zig b/src/Air/types_resolved.zig index 44669b82df87..cac981cb00d2 100644 --- a/src/Air/types_resolved.zig +++ b/src/Air/types_resolved.zig @@ -316,13 +316,6 @@ fn checkBody(air: Air, body: []const Air.Inst.Index, zcu: *Zcu) bool { if (!checkRef(data.prefetch.ptr, zcu)) return false; }, - .vector_store_elem => { - const bin = air.extraData(Air.Bin, data.vector_store_elem.payload).data; - if (!checkRef(data.vector_store_elem.vector_ptr, zcu)) return false; - if (!checkRef(bin.lhs, zcu)) return false; - if (!checkRef(bin.rhs, zcu)) return false; - }, - .runtime_nav_ptr => { if (!checkType(.fromInterned(data.ty_nav.ty), zcu)) return false; }, diff --git a/src/InternPool.zig b/src/InternPool.zig index 3bee7a3f941d..e53caf382f8d 100644 --- a/src/InternPool.zig +++ b/src/InternPool.zig @@ -2104,7 +2104,6 @@ pub const Key = union(enum) { pub const VectorIndex = enum(u16) { none = std.math.maxInt(u16), - runtime = std.math.maxInt(u16) - 1, _, }; @@ -3739,10 +3738,8 @@ pub const LoadedStructType = struct { return s.field_inits.get(ip)[i]; } - /// Returns `none` in the case the struct is a tuple. - pub fn fieldName(s: LoadedStructType, ip: *const InternPool, i: usize) OptionalNullTerminatedString { - if (s.field_names.len == 0) return .none; - return s.field_names.get(ip)[i].toOptional(); + pub fn fieldName(s: LoadedStructType, ip: *const InternPool, i: usize) NullTerminatedString { + return s.field_names.get(ip)[i]; } pub fn fieldIsComptime(s: LoadedStructType, ip: *const InternPool, i: usize) bool { diff --git a/src/Sema.zig b/src/Sema.zig index af342a0d046d..8b5c7c5de22b 100644 --- a/src/Sema.zig +++ b/src/Sema.zig @@ -15919,26 +15919,27 @@ fn zirOverflowArithmetic( }, .mul_with_overflow => { // If either of the arguments is zero, the result is zero and no overflow occured. + if (maybe_lhs_val) |lhs_val| { + if (!lhs_val.isUndef(zcu) and try lhs_val.compareAllWithZeroSema(.eq, pt)) { + break :result .{ .overflow_bit = try sema.splat(overflow_ty, .zero_u1), .inst = lhs }; + } + } + if (maybe_rhs_val) |rhs_val| { + if (!rhs_val.isUndef(zcu) and try rhs_val.compareAllWithZeroSema(.eq, pt)) { + break :result .{ .overflow_bit = try sema.splat(overflow_ty, .zero_u1), .inst = rhs }; + } + } // If either of the arguments is one, the result is the other and no overflow occured. - // Otherwise, if either of the arguments is undefined, both results are undefined. const scalar_one = try pt.intValue(dest_ty.scalarType(zcu), 1); + const vec_one = try sema.splat(dest_ty, scalar_one); if (maybe_lhs_val) |lhs_val| { - if (!lhs_val.isUndef(zcu)) { - if (try lhs_val.compareAllWithZeroSema(.eq, pt)) { - break :result .{ .overflow_bit = try sema.splat(overflow_ty, .zero_u1), .inst = lhs }; - } else if (try sema.compareAll(lhs_val, .eq, try sema.splat(dest_ty, scalar_one), dest_ty)) { - break :result .{ .overflow_bit = try sema.splat(overflow_ty, .zero_u1), .inst = rhs }; - } + if (!lhs_val.isUndef(zcu) and try sema.compareAll(lhs_val, .eq, vec_one, dest_ty)) { + break :result .{ .overflow_bit = try sema.splat(overflow_ty, .zero_u1), .inst = rhs }; } } - if (maybe_rhs_val) |rhs_val| { - if (!rhs_val.isUndef(zcu)) { - if (try rhs_val.compareAllWithZeroSema(.eq, pt)) { - break :result .{ .overflow_bit = try sema.splat(overflow_ty, .zero_u1), .inst = rhs }; - } else if (try sema.compareAll(rhs_val, .eq, try sema.splat(dest_ty, scalar_one), dest_ty)) { - break :result .{ .overflow_bit = try sema.splat(overflow_ty, .zero_u1), .inst = lhs }; - } + if (!rhs_val.isUndef(zcu) and try sema.compareAll(rhs_val, .eq, vec_one, dest_ty)) { + break :result .{ .overflow_bit = try sema.splat(overflow_ty, .zero_u1), .inst = lhs }; } } @@ -15947,7 +15948,6 @@ fn zirOverflowArithmetic( if (lhs_val.isUndef(zcu) or rhs_val.isUndef(zcu)) { break :result .{ .overflow_bit = .undef, .wrapped = .undef }; } - const result = try arith.mulWithOverflow(sema, dest_ty, lhs_val, rhs_val); break :result .{ .overflow_bit = result.overflow_bit, .wrapped = result.wrapped_result }; } @@ -17751,10 +17751,7 @@ fn zirTypeInfo(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError!Ai try ty.resolveStructFieldInits(pt); for (struct_field_vals, 0..) |*field_val, field_index| { - const field_name = if (struct_type.fieldName(ip, field_index).unwrap()) |field_name| - field_name - else - try ip.getOrPutStringFmt(gpa, pt.tid, "{d}", .{field_index}, .no_embedded_nulls); + const field_name = struct_type.fieldName(ip, field_index); const field_name_len = field_name.length(ip); const field_ty: Type = .fromInterned(struct_type.field_types.get(ip)[field_index]); const field_init = struct_type.fieldInit(ip, field_index); @@ -28347,6 +28344,10 @@ fn elemPtrArray( break :o index; } else null; + if (offset == null and array_ty.zigTypeTag(zcu) == .vector) { + return sema.fail(block, elem_index_src, "vector index not comptime known", .{}); + } + const elem_ptr_ty = try array_ptr_ty.elemPtrType(offset, pt); if (maybe_undef_array_ptr_val) |array_ptr_val| { @@ -28364,10 +28365,6 @@ fn elemPtrArray( try sema.validateRuntimeValue(block, array_ptr_src, array_ptr); } - if (offset == null and array_ty.zigTypeTag(zcu) == .vector) { - return sema.fail(block, elem_index_src, "vector index not comptime known", .{}); - } - // Runtime check is only needed if unable to comptime check. if (oob_safety and block.wantSafety() and offset == null) { const len_inst = try pt.intRef(.usize, array_len); @@ -30399,22 +30396,6 @@ fn storePtr2( const is_ret = air_tag == .ret_ptr; - // Detect if we are storing an array operand to a bitcasted vector pointer. - // If so, we instead reach through the bitcasted pointer to the vector pointer, - // bitcast the array operand to a vector, and then lower this as a store of - // a vector value to a vector pointer. This generally results in better code, - // as well as working around an LLVM bug: - // https://github.com/ziglang/zig/issues/11154 - if (sema.obtainBitCastedVectorPtr(ptr)) |vector_ptr| { - const vector_ty = sema.typeOf(vector_ptr).childType(zcu); - const vector = sema.coerceExtra(block, vector_ty, uncasted_operand, operand_src, .{ .is_ret = is_ret }) catch |err| switch (err) { - error.NotCoercible => unreachable, - else => |e| return e, - }; - try sema.storePtr2(block, src, vector_ptr, ptr_src, vector, operand_src, .store); - return; - } - const operand = sema.coerceExtra(block, elem_ty, uncasted_operand, operand_src, .{ .is_ret = is_ret }) catch |err| switch (err) { error.NotCoercible => unreachable, else => |e| return e, @@ -30447,29 +30428,6 @@ fn storePtr2( try sema.requireRuntimeBlock(block, src, runtime_src); - if (ptr_ty.ptrInfo(zcu).flags.vector_index == .runtime) { - const ptr_inst = ptr.toIndex().?; - const air_tags = sema.air_instructions.items(.tag); - if (air_tags[@intFromEnum(ptr_inst)] == .ptr_elem_ptr) { - const ty_pl = sema.air_instructions.items(.data)[@intFromEnum(ptr_inst)].ty_pl; - const bin_op = sema.getTmpAir().extraData(Air.Bin, ty_pl.payload).data; - _ = try block.addInst(.{ - .tag = .vector_store_elem, - .data = .{ .vector_store_elem = .{ - .vector_ptr = bin_op.lhs, - .payload = try block.sema.addExtra(Air.Bin{ - .lhs = bin_op.rhs, - .rhs = operand, - }), - } }, - }); - return; - } - return sema.fail(block, ptr_src, "unable to determine vector element index of type '{f}'", .{ - ptr_ty.fmt(pt), - }); - } - const store_inst = if (is_ret) try block.addBinOp(.store, ptr, operand) else @@ -30569,37 +30527,6 @@ fn markMaybeComptimeAllocRuntime(sema: *Sema, block: *Block, alloc_inst: Air.Ins } } -/// Traverse an arbitrary number of bitcasted pointers and return the underyling vector -/// pointer. Only if the final element type matches the vector element type, and the -/// lengths match. -fn obtainBitCastedVectorPtr(sema: *Sema, ptr: Air.Inst.Ref) ?Air.Inst.Ref { - const pt = sema.pt; - const zcu = pt.zcu; - const array_ty = sema.typeOf(ptr).childType(zcu); - if (array_ty.zigTypeTag(zcu) != .array) return null; - var ptr_ref = ptr; - var ptr_inst = ptr_ref.toIndex() orelse return null; - const air_datas = sema.air_instructions.items(.data); - const air_tags = sema.air_instructions.items(.tag); - const vector_ty = while (air_tags[@intFromEnum(ptr_inst)] == .bitcast) { - ptr_ref = air_datas[@intFromEnum(ptr_inst)].ty_op.operand; - if (!sema.isKnownZigType(ptr_ref, .pointer)) return null; - const child_ty = sema.typeOf(ptr_ref).childType(zcu); - if (child_ty.zigTypeTag(zcu) == .vector) break child_ty; - ptr_inst = ptr_ref.toIndex() orelse return null; - } else return null; - - // We have a pointer-to-array and a pointer-to-vector. If the elements and - // lengths match, return the result. - if (array_ty.childType(zcu).eql(vector_ty.childType(zcu), zcu) and - array_ty.arrayLen(zcu) == vector_ty.vectorLen(zcu)) - { - return ptr_ref; - } else { - return null; - } -} - /// Call when you have Value objects rather than Air instructions, and you want to /// assert the store must be done at comptime. fn storePtrVal( @@ -35579,8 +35506,13 @@ fn structFieldInits( const default_val = try sema.resolveConstValue(&block_scope, init_src, coerced, null); if (default_val.canMutateComptimeVarState(zcu)) { - const field_name = struct_type.fieldName(ip, field_i).unwrap().?; - return sema.failWithContainsReferenceToComptimeVar(&block_scope, init_src, field_name, "field default value", default_val); + return sema.failWithContainsReferenceToComptimeVar( + &block_scope, + init_src, + struct_type.fieldName(ip, field_i), + "field default value", + default_val, + ); } struct_type.field_inits.get(ip)[field_i] = default_val.toIntern(); } diff --git a/src/Sema/comptime_ptr_access.zig b/src/Sema/comptime_ptr_access.zig index 9441f8cf72a1..4e101ecd0f96 100644 --- a/src/Sema/comptime_ptr_access.zig +++ b/src/Sema/comptime_ptr_access.zig @@ -24,7 +24,6 @@ pub fn loadComptimePtr(sema: *Sema, block: *Block, src: LazySrcLoc, ptr: Value) const child_bits = Type.fromInterned(ptr_info.child).bitSize(zcu); const bit_offset = ptr_info.packed_offset.bit_offset + switch (ptr_info.flags.vector_index) { .none => 0, - .runtime => return .runtime_load, else => |idx| switch (pt.zcu.getTarget().cpu.arch.endian()) { .little => child_bits * @intFromEnum(idx), .big => host_bits - child_bits * (@intFromEnum(idx) + 1), // element order reversed on big endian @@ -81,7 +80,6 @@ pub fn storeComptimePtr( }; const bit_offset = ptr_info.packed_offset.bit_offset + switch (ptr_info.flags.vector_index) { .none => 0, - .runtime => return .runtime_store, else => |idx| switch (zcu.getTarget().cpu.arch.endian()) { .little => Type.fromInterned(ptr_info.child).bitSize(zcu) * @intFromEnum(idx), .big => host_bits - Type.fromInterned(ptr_info.child).bitSize(zcu) * (@intFromEnum(idx) + 1), // element order reversed on big endian diff --git a/src/Type.zig b/src/Type.zig index f3f5c9949179..d6e38420cb4e 100644 --- a/src/Type.zig +++ b/src/Type.zig @@ -198,9 +198,7 @@ pub fn print(ty: Type, writer: *std.Io.Writer, pt: Zcu.PerThread) std.Io.Writer. info.packed_offset.bit_offset, info.packed_offset.host_size, }); } - if (info.flags.vector_index == .runtime) { - try writer.writeAll(":?"); - } else if (info.flags.vector_index != .none) { + if (info.flags.vector_index != .none) { try writer.print(":{d}", .{@intFromEnum(info.flags.vector_index)}); } try writer.writeAll(") "); @@ -3113,7 +3111,7 @@ pub fn enumTagFieldIndex(ty: Type, enum_tag: Value, zcu: *const Zcu) ?u32 { pub fn structFieldName(ty: Type, index: usize, zcu: *const Zcu) InternPool.OptionalNullTerminatedString { const ip = &zcu.intern_pool; return switch (ip.indexToKey(ty.toIntern())) { - .struct_type => ip.loadStructType(ty.toIntern()).fieldName(ip, index), + .struct_type => ip.loadStructType(ty.toIntern()).fieldName(ip, index).toOptional(), .tuple_type => .none, else => unreachable, }; @@ -3985,7 +3983,7 @@ pub fn elemPtrType(ptr_ty: Type, offset: ?usize, pt: Zcu.PerThread) !Type { break :blk .{ .host_size = @intCast(parent_ty.arrayLen(zcu)), .alignment = parent_ty.abiAlignment(zcu), - .vector_index = if (offset) |some| @enumFromInt(some) else .runtime, + .vector_index = @enumFromInt(offset.?), }; } else .{}; diff --git a/src/Value.zig b/src/Value.zig index b72ee2f78958..9ced6f107473 100644 --- a/src/Value.zig +++ b/src/Value.zig @@ -574,166 +574,37 @@ pub fn writeToPackedMemory( } } -/// Load a Value from the contents of `buffer`. +/// Load a Value from the contents of `buffer`, where `ty` is an unsigned integer type. /// /// Asserts that buffer.len >= ty.abiSize(). The buffer is allowed to extend past /// the end of the value in memory. -pub fn readFromMemory( +pub fn readUintFromMemory( ty: Type, pt: Zcu.PerThread, buffer: []const u8, arena: Allocator, -) error{ - IllDefinedMemoryLayout, - Unimplemented, - OutOfMemory, -}!Value { +) Allocator.Error!Value { const zcu = pt.zcu; - const ip = &zcu.intern_pool; - const target = zcu.getTarget(); - const endian = target.cpu.arch.endian(); - switch (ty.zigTypeTag(zcu)) { - .void => return Value.void, - .bool => { - if (buffer[0] == 0) { - return Value.false; - } else { - return Value.true; - } - }, - .int, .@"enum" => |ty_tag| { - const int_ty = switch (ty_tag) { - .int => ty, - .@"enum" => ty.intTagType(zcu), - else => unreachable, - }; - const int_info = int_ty.intInfo(zcu); - const bits = int_info.bits; - const byte_count: u16 = @intCast((@as(u17, bits) + 7) / 8); - if (bits == 0 or buffer.len == 0) return zcu.getCoerced(try zcu.intValue(int_ty, 0), ty); + const endian = zcu.getTarget().cpu.arch.endian(); - if (bits <= 64) switch (int_info.signedness) { // Fast path for integers <= u64 - .signed => { - const val = std.mem.readVarInt(i64, buffer[0..byte_count], endian); - const result = (val << @as(u6, @intCast(64 - bits))) >> @as(u6, @intCast(64 - bits)); - return zcu.getCoerced(try zcu.intValue(int_ty, result), ty); - }, - .unsigned => { - const val = std.mem.readVarInt(u64, buffer[0..byte_count], endian); - const result = (val << @as(u6, @intCast(64 - bits))) >> @as(u6, @intCast(64 - bits)); - return zcu.getCoerced(try zcu.intValue(int_ty, result), ty); - }, - } else { // Slow path, we have to construct a big-int - const Limb = std.math.big.Limb; - const limb_count = (byte_count + @sizeOf(Limb) - 1) / @sizeOf(Limb); - const limbs_buffer = try arena.alloc(Limb, limb_count); - - var bigint = BigIntMutable.init(limbs_buffer, 0); - bigint.readTwosComplement(buffer[0..byte_count], bits, endian, int_info.signedness); - return zcu.getCoerced(try zcu.intValue_big(int_ty, bigint.toConst()), ty); - } - }, - .float => return Value.fromInterned(try pt.intern(.{ .float = .{ - .ty = ty.toIntern(), - .storage = switch (ty.floatBits(target)) { - 16 => .{ .f16 = @bitCast(std.mem.readInt(u16, buffer[0..2], endian)) }, - 32 => .{ .f32 = @bitCast(std.mem.readInt(u32, buffer[0..4], endian)) }, - 64 => .{ .f64 = @bitCast(std.mem.readInt(u64, buffer[0..8], endian)) }, - 80 => .{ .f80 = @bitCast(std.mem.readInt(u80, buffer[0..10], endian)) }, - 128 => .{ .f128 = @bitCast(std.mem.readInt(u128, buffer[0..16], endian)) }, - else => unreachable, - }, - } })), - .array => { - const elem_ty = ty.childType(zcu); - const elem_size = elem_ty.abiSize(zcu); - const elems = try arena.alloc(InternPool.Index, @intCast(ty.arrayLen(zcu))); - var offset: usize = 0; - for (elems) |*elem| { - elem.* = (try readFromMemory(elem_ty, zcu, buffer[offset..], arena)).toIntern(); - offset += @intCast(elem_size); - } - return pt.aggregateValue(ty, elems); - }, - .vector => { - // We use byte_count instead of abi_size here, so that any padding bytes - // follow the data bytes, on both big- and little-endian systems. - const byte_count = (@as(usize, @intCast(ty.bitSize(zcu))) + 7) / 8; - return readFromPackedMemory(ty, zcu, buffer[0..byte_count], 0, arena); - }, - .@"struct" => { - const struct_type = zcu.typeToStruct(ty).?; - switch (struct_type.layout) { - .auto => unreachable, // Sema is supposed to have emitted a compile error already - .@"extern" => { - const field_types = struct_type.field_types; - const field_vals = try arena.alloc(InternPool.Index, field_types.len); - for (field_vals, 0..) |*field_val, i| { - const field_ty = Type.fromInterned(field_types.get(ip)[i]); - const off: usize = @intCast(ty.structFieldOffset(i, zcu)); - const sz: usize = @intCast(field_ty.abiSize(zcu)); - field_val.* = (try readFromMemory(field_ty, zcu, buffer[off..(off + sz)], arena)).toIntern(); - } - return pt.aggregateValue(ty, field_vals); - }, - .@"packed" => { - const byte_count = (@as(usize, @intCast(ty.bitSize(zcu))) + 7) / 8; - return readFromPackedMemory(ty, zcu, buffer[0..byte_count], 0, arena); - }, - } - }, - .error_set => { - const bits = zcu.errorSetBits(); - const byte_count: u16 = @intCast((@as(u17, bits) + 7) / 8); - const int = std.mem.readVarInt(u64, buffer[0..byte_count], endian); - const index = (int << @as(u6, @intCast(64 - bits))) >> @as(u6, @intCast(64 - bits)); - const name = zcu.global_error_set.keys()[@intCast(index)]; + assert(ty.isUnsignedInt(zcu)); + const bits = ty.intInfo(zcu).bits; + const byte_count: u16 = @intCast((@as(u17, bits) + 7) / 8); - return Value.fromInterned(try pt.intern(.{ .err = .{ - .ty = ty.toIntern(), - .name = name, - } })); - }, - .@"union" => switch (ty.containerLayout(zcu)) { - .auto => return error.IllDefinedMemoryLayout, - .@"extern" => { - const union_size = ty.abiSize(zcu); - const array_ty = try zcu.arrayType(.{ .len = union_size, .child = .u8_type }); - const val = (try readFromMemory(array_ty, zcu, buffer, arena)).toIntern(); - return Value.fromInterned(try pt.internUnion(.{ - .ty = ty.toIntern(), - .tag = .none, - .val = val, - })); - }, - .@"packed" => { - const byte_count = (@as(usize, @intCast(ty.bitSize(zcu))) + 7) / 8; - return readFromPackedMemory(ty, zcu, buffer[0..byte_count], 0, arena); - }, - }, - .pointer => { - assert(!ty.isSlice(zcu)); // No well defined layout. - const int_val = try readFromMemory(Type.usize, zcu, buffer, arena); - return Value.fromInterned(try pt.intern(.{ .ptr = .{ - .ty = ty.toIntern(), - .base_addr = .int, - .byte_offset = int_val.toUnsignedInt(zcu), - } })); - }, - .optional => { - assert(ty.isPtrLikeOptional(zcu)); - const child_ty = ty.optionalChild(zcu); - const child_val = try readFromMemory(child_ty, zcu, buffer, arena); - return Value.fromInterned(try pt.intern(.{ .opt = .{ - .ty = ty.toIntern(), - .val = switch (child_val.orderAgainstZero(pt)) { - .lt => unreachable, - .eq => .none, - .gt => child_val.toIntern(), - }, - } })); - }, - else => return error.Unimplemented, + assert(buffer.len >= byte_count); + + if (bits <= 64) { + const val = std.mem.readVarInt(u64, buffer[0..byte_count], endian); + const result = (val << @as(u6, @intCast(64 - bits))) >> @as(u6, @intCast(64 - bits)); + return pt.intValue(ty, result); + } else { + const Limb = std.math.big.Limb; + const limb_count = (byte_count + @sizeOf(Limb) - 1) / @sizeOf(Limb); + const limbs_buffer = try arena.alloc(Limb, limb_count); + + var bigint: BigIntMutable = .init(limbs_buffer, 0); + bigint.readTwosComplement(buffer[0..byte_count], bits, endian, .unsigned); + return pt.intValue_big(ty, bigint.toConst()); } } diff --git a/src/Zcu/PerThread.zig b/src/Zcu/PerThread.zig index 20aaa3d3c258..41b5a32f6e25 100644 --- a/src/Zcu/PerThread.zig +++ b/src/Zcu/PerThread.zig @@ -3512,7 +3512,6 @@ pub fn ptrType(pt: Zcu.PerThread, info: InternPool.Key.PtrType) Allocator.Error! canon_info.packed_offset.host_size = 0; } }, - .runtime => {}, _ => assert(@intFromEnum(info.flags.vector_index) < info.packed_offset.host_size), } @@ -3663,21 +3662,40 @@ pub fn intRef(pt: Zcu.PerThread, ty: Type, x: anytype) Allocator.Error!Air.Inst. } pub fn intValue_big(pt: Zcu.PerThread, ty: Type, x: BigIntConst) Allocator.Error!Value { - return Value.fromInterned(try pt.intern(.{ .int = .{ + if (ty.toIntern() != .comptime_int_type) { + const int_info = ty.intInfo(pt.zcu); + assert(x.fitsInTwosComp(int_info.signedness, int_info.bits)); + } + return .fromInterned(try pt.intern(.{ .int = .{ .ty = ty.toIntern(), .storage = .{ .big_int = x }, } })); } pub fn intValue_u64(pt: Zcu.PerThread, ty: Type, x: u64) Allocator.Error!Value { - return Value.fromInterned(try pt.intern(.{ .int = .{ + if (ty.toIntern() != .comptime_int_type and x != 0) { + const int_info = ty.intInfo(pt.zcu); + const unsigned_bits = int_info.bits - @intFromBool(int_info.signedness == .signed); + assert(unsigned_bits >= std.math.log2(x) + 1); + } + return .fromInterned(try pt.intern(.{ .int = .{ .ty = ty.toIntern(), .storage = .{ .u64 = x }, } })); } pub fn intValue_i64(pt: Zcu.PerThread, ty: Type, x: i64) Allocator.Error!Value { - return Value.fromInterned(try pt.intern(.{ .int = .{ + if (ty.toIntern() != .comptime_int_type and x != 0) { + const int_info = ty.intInfo(pt.zcu); + const unsigned_bits = int_info.bits - @intFromBool(int_info.signedness == .signed); + if (x > 0) { + assert(unsigned_bits >= std.math.log2(x) + 1); + } else { + assert(int_info.signedness == .signed); + assert(unsigned_bits >= std.math.log2_int_ceil(u64, @abs(x))); + } + } + return .fromInterned(try pt.intern(.{ .int = .{ .ty = ty.toIntern(), .storage = .{ .i64 = x }, } })); diff --git a/src/codegen/aarch64/Select.zig b/src/codegen/aarch64/Select.zig index 4fe798271fd9..36ca69e589ee 100644 --- a/src/codegen/aarch64/Select.zig +++ b/src/codegen/aarch64/Select.zig @@ -826,18 +826,6 @@ pub fn analyze(isel: *Select, air_body: []const Air.Inst.Index) !void { try isel.analyzeUse(un_op); - air_body_index += 1; - air_inst_index = air_body[air_body_index]; - continue :air_tag air_tags[@intFromEnum(air_inst_index)]; - }, - .vector_store_elem => { - const vector_store_elem = air_data[@intFromEnum(air_inst_index)].vector_store_elem; - const bin_op = isel.air.extraData(Air.Bin, vector_store_elem.payload).data; - - try isel.analyzeUse(vector_store_elem.vector_ptr); - try isel.analyzeUse(bin_op.lhs); - try isel.analyzeUse(bin_op.rhs); - air_body_index += 1; air_inst_index = air_body[air_body_index]; continue :air_tag air_tags[@intFromEnum(air_inst_index)]; diff --git a/src/codegen/c.zig b/src/codegen/c.zig index 7341a9fd0bdf..0abea3d50396 100644 --- a/src/codegen/c.zig +++ b/src/codegen/c.zig @@ -37,6 +37,7 @@ pub fn legalizeFeatures(_: *const std.Target) ?*const Air.Legalize.Features { .expand_packed_load = true, .expand_packed_store = true, .expand_packed_struct_field_val = true, + .expand_packed_aggregate_init = true, }), }; } @@ -1392,114 +1393,21 @@ pub const DeclGen = struct { try w.writeByte('}'); }, .@"packed" => { - const int_info = ty.intInfo(zcu); - - const bits = Type.smallestUnsignedBits(int_info.bits - 1); - const bit_offset_ty = try pt.intType(.unsigned, bits); - - var bit_offset: u64 = 0; - var eff_num_fields: usize = 0; - - for (0..loaded_struct.field_types.len) |field_index| { - const field_ty: Type = .fromInterned(loaded_struct.field_types.get(ip)[field_index]); - if (!field_ty.hasRuntimeBitsIgnoreComptime(zcu)) continue; - eff_num_fields += 1; - } - - if (eff_num_fields == 0) { - try w.writeByte('('); - try dg.renderUndefValue(w, ty, location); - try w.writeByte(')'); - } else if (ty.bitSize(zcu) > 64) { - // zig_or_u128(zig_or_u128(zig_shl_u128(a, a_off), zig_shl_u128(b, b_off)), zig_shl_u128(c, c_off)) - var num_or = eff_num_fields - 1; - while (num_or > 0) : (num_or -= 1) { - try w.writeAll("zig_or_"); - try dg.renderTypeForBuiltinFnName(w, ty); - try w.writeByte('('); - } - - var eff_index: usize = 0; - var needs_closing_paren = false; - for (0..loaded_struct.field_types.len) |field_index| { - const field_ty: Type = .fromInterned(loaded_struct.field_types.get(ip)[field_index]); - if (!field_ty.hasRuntimeBitsIgnoreComptime(zcu)) continue; - - const field_val = switch (ip.indexToKey(val.toIntern()).aggregate.storage) { - .bytes => |bytes| try pt.intern(.{ .int = .{ - .ty = field_ty.toIntern(), - .storage = .{ .u64 = bytes.at(field_index, ip) }, - } }), - .elems => |elems| elems[field_index], - .repeated_elem => |elem| elem, - }; - const cast_context = IntCastContext{ .value = .{ .value = Value.fromInterned(field_val) } }; - if (bit_offset != 0) { - try w.writeAll("zig_shl_"); - try dg.renderTypeForBuiltinFnName(w, ty); - try w.writeByte('('); - try dg.renderIntCast(w, ty, cast_context, field_ty, .FunctionArgument); - try w.writeAll(", "); - try dg.renderValue(w, try pt.intValue(bit_offset_ty, bit_offset), .FunctionArgument); - try w.writeByte(')'); - } else { - try dg.renderIntCast(w, ty, cast_context, field_ty, .FunctionArgument); - } - - if (needs_closing_paren) try w.writeByte(')'); - if (eff_index != eff_num_fields - 1) try w.writeAll(", "); - - bit_offset += field_ty.bitSize(zcu); - needs_closing_paren = true; - eff_index += 1; - } - } else { - try w.writeByte('('); - // a << a_off | b << b_off | c << c_off - var empty = true; - for (0..loaded_struct.field_types.len) |field_index| { - const field_ty: Type = .fromInterned(loaded_struct.field_types.get(ip)[field_index]); - if (!field_ty.hasRuntimeBitsIgnoreComptime(zcu)) continue; - - if (!empty) try w.writeAll(" | "); - try w.writeByte('('); - try dg.renderCType(w, ctype); - try w.writeByte(')'); - - const field_val = switch (ip.indexToKey(val.toIntern()).aggregate.storage) { - .bytes => |bytes| try pt.intern(.{ .int = .{ - .ty = field_ty.toIntern(), - .storage = .{ .u64 = bytes.at(field_index, ip) }, - } }), - .elems => |elems| elems[field_index], - .repeated_elem => |elem| elem, - }; - - const field_int_info: std.builtin.Type.Int = if (field_ty.isAbiInt(zcu)) - field_ty.intInfo(zcu) - else - .{ .signedness = .unsigned, .bits = undefined }; - switch (field_int_info.signedness) { - .signed => { - try w.writeByte('('); - try dg.renderValue(w, Value.fromInterned(field_val), .Other); - try w.writeAll(" & "); - const field_uint_ty = try pt.intType(.unsigned, field_int_info.bits); - try dg.renderValue(w, try field_uint_ty.maxIntScalar(pt, field_uint_ty), .Other); - try w.writeByte(')'); - }, - .unsigned => try dg.renderValue(w, Value.fromInterned(field_val), .Other), - } - if (bit_offset != 0) { - try w.writeAll(" << "); - try dg.renderValue(w, try pt.intValue(bit_offset_ty, bit_offset), .FunctionArgument); - } - - bit_offset += field_ty.bitSize(zcu); - empty = false; - } - try w.writeByte(')'); - } + // https://github.com/ziglang/zig/issues/24657 will eliminate most of the + // following logic, leaving only the recursive `renderValue` call. Once + // that proposal is implemented, a `packed struct` will literally be + // represented in the InternPool by its comptime-known backing integer. + var arena: std.heap.ArenaAllocator = .init(zcu.gpa); + defer arena.deinit(); + const backing_ty: Type = .fromInterned(loaded_struct.backingIntTypeUnordered(ip)); + const buf = try arena.allocator().alloc(u8, @intCast(ty.abiSize(zcu))); + val.writeToMemory(pt, buf) catch |err| switch (err) { + error.IllDefinedMemoryLayout => unreachable, + error.OutOfMemory => |e| return e, + error.ReinterpretDeclRef, error.Unimplemented => return dg.fail("TODO: C backend: lower packed struct value", .{}), + }; + const backing_val: Value = try .readUintFromMemory(backing_ty, pt, buf, arena.allocator()); + return dg.renderValue(w, backing_val, location); }, } }, @@ -1507,33 +1415,38 @@ pub const DeclGen = struct { }, .un => |un| { const loaded_union = ip.loadUnionType(ty.toIntern()); + if (loaded_union.flagsUnordered(ip).layout == .@"packed") { + // https://github.com/ziglang/zig/issues/24657 will eliminate most of the + // following logic, leaving only the recursive `renderValue` call. Once + // that proposal is implemented, a `packed union` will literally be + // represented in the InternPool by its comptime-known backing integer. + var arena: std.heap.ArenaAllocator = .init(zcu.gpa); + defer arena.deinit(); + const backing_ty = try ty.unionBackingType(pt); + const buf = try arena.allocator().alloc(u8, @intCast(ty.abiSize(zcu))); + val.writeToMemory(pt, buf) catch |err| switch (err) { + error.IllDefinedMemoryLayout => unreachable, + error.OutOfMemory => |e| return e, + error.ReinterpretDeclRef, error.Unimplemented => return dg.fail("TODO: C backend: lower packed union value", .{}), + }; + const backing_val: Value = try .readUintFromMemory(backing_ty, pt, buf, arena.allocator()); + return dg.renderValue(w, backing_val, location); + } if (un.tag == .none) { const backing_ty = try ty.unionBackingType(pt); - switch (loaded_union.flagsUnordered(ip).layout) { - .@"packed" => { - if (!location.isInitializer()) { - try w.writeByte('('); - try dg.renderType(w, backing_ty); - try w.writeByte(')'); - } - try dg.renderValue(w, Value.fromInterned(un.val), location); - }, - .@"extern" => { - if (location == .StaticInitializer) { - return dg.fail("TODO: C backend: implement extern union backing type rendering in static initializers", .{}); - } - - const ptr_ty = try pt.singleConstPtrType(ty); - try w.writeAll("*(("); - try dg.renderType(w, ptr_ty); - try w.writeAll(")("); - try dg.renderType(w, backing_ty); - try w.writeAll("){"); - try dg.renderValue(w, Value.fromInterned(un.val), location); - try w.writeAll("})"); - }, - else => unreachable, + assert(loaded_union.flagsUnordered(ip).layout == .@"extern"); + if (location == .StaticInitializer) { + return dg.fail("TODO: C backend: implement extern union backing type rendering in static initializers", .{}); } + + const ptr_ty = try pt.singleConstPtrType(ty); + try w.writeAll("*(("); + try dg.renderType(w, ptr_ty); + try w.writeAll(")("); + try dg.renderType(w, backing_ty); + try w.writeAll("){"); + try dg.renderValue(w, Value.fromInterned(un.val), location); + try w.writeAll("})"); } else { if (!location.isInitializer()) { try w.writeByte('('); @@ -1544,21 +1457,6 @@ pub const DeclGen = struct { const field_index = zcu.unionTagFieldIndex(loaded_union, Value.fromInterned(un.tag)).?; const field_ty: Type = .fromInterned(loaded_union.field_types.get(ip)[field_index]); const field_name = loaded_union.loadTagType(ip).names.get(ip)[field_index]; - if (loaded_union.flagsUnordered(ip).layout == .@"packed") { - if (field_ty.hasRuntimeBits(zcu)) { - if (field_ty.isPtrAtRuntime(zcu)) { - try w.writeByte('('); - try dg.renderCType(w, ctype); - try w.writeByte(')'); - } else if (field_ty.zigTypeTag(zcu) == .float) { - try w.writeByte('('); - try dg.renderCType(w, ctype); - try w.writeByte(')'); - } - try dg.renderValue(w, Value.fromInterned(un.val), location); - } else try w.writeByte('0'); - return; - } const has_tag = loaded_union.hasTag(ip); if (has_tag) try w.writeByte('{'); @@ -1745,9 +1643,11 @@ pub const DeclGen = struct { } return w.writeByte('}'); }, - .@"packed" => return w.print("{f}", .{ - try dg.fmtIntLiteralHex(try pt.undefValue(ty), .Other), - }), + .@"packed" => return dg.renderUndefValue( + w, + .fromInterned(loaded_struct.backingIntTypeUnordered(ip)), + location, + ), } }, .tuple_type => |tuple_info| { @@ -1815,9 +1715,11 @@ pub const DeclGen = struct { } if (has_tag) try w.writeByte('}'); }, - .@"packed" => return w.print("{f}", .{ - try dg.fmtIntLiteralHex(try pt.undefValue(ty), .Other), - }), + .@"packed" => return dg.renderUndefValue( + w, + try ty.unionBackingType(pt), + location, + ), } }, .error_union_type => |error_union_type| switch (ctype.info(ctype_pool)) { @@ -2445,10 +2347,7 @@ pub const DeclGen = struct { const ty = val.typeOf(zcu); return .{ .data = .{ .dg = dg, - .int_info = if (ty.zigTypeTag(zcu) == .@"union" and ty.containerLayout(zcu) == .@"packed") - .{ .signedness = .unsigned, .bits = @intCast(ty.bitSize(zcu)) } - else - ty.intInfo(zcu), + .int_info = ty.intInfo(zcu), .kind = kind, .ctype = try dg.ctypeFromType(ty, kind), .val = val, @@ -3656,7 +3555,6 @@ fn genBodyInner(f: *Function, body: []const Air.Inst.Index) Error!void { .is_named_enum_value => return f.fail("TODO: C backend: implement is_named_enum_value", .{}), .error_set_has_value => return f.fail("TODO: C backend: implement error_set_has_value", .{}), - .vector_store_elem => return f.fail("TODO: C backend: implement vector_store_elem", .{}), .runtime_nav_ptr => try airRuntimeNavPtr(f, inst), @@ -3956,6 +3854,10 @@ fn airLoad(f: *Function, inst: Air.Inst.Index) !CValue { const ptr_info = ptr_scalar_ty.ptrInfo(zcu); const src_ty: Type = .fromInterned(ptr_info.child); + // `Air.Legalize.Feature.expand_packed_load` should ensure that the only + // bit-pointers we see here are vector element pointers. + assert(ptr_info.packed_offset.host_size == 0 or ptr_info.flags.vector_index != .none); + if (!src_ty.hasRuntimeBitsIgnoreComptime(zcu)) { try reap(f, inst, &.{ty_op.operand}); return .none; @@ -3987,40 +3889,6 @@ fn airLoad(f: *Function, inst: Air.Inst.Index) !CValue { try w.writeAll(", sizeof("); try f.renderType(w, src_ty); try w.writeAll("))"); - } else if (ptr_info.packed_offset.host_size > 0 and ptr_info.flags.vector_index == .none) { - const host_bits: u16 = ptr_info.packed_offset.host_size * 8; - const host_ty = try pt.intType(.unsigned, host_bits); - - const bit_offset_ty = try pt.intType(.unsigned, Type.smallestUnsignedBits(host_bits - 1)); - const bit_offset_val = try pt.intValue(bit_offset_ty, ptr_info.packed_offset.bit_offset); - - const field_ty = try pt.intType(.unsigned, @as(u16, @intCast(src_ty.bitSize(zcu)))); - - try f.writeCValue(w, local, .Other); - try v.elem(f, w); - try w.writeAll(" = ("); - try f.renderType(w, src_ty); - try w.writeAll(")zig_wrap_"); - try f.object.dg.renderTypeForBuiltinFnName(w, field_ty); - try w.writeAll("(("); - try f.renderType(w, field_ty); - try w.writeByte(')'); - const cant_cast = host_ty.isInt(zcu) and host_ty.bitSize(zcu) > 64; - if (cant_cast) { - if (field_ty.bitSize(zcu) > 64) return f.fail("TODO: C backend: implement casting between types > 64 bits", .{}); - try w.writeAll("zig_lo_"); - try f.object.dg.renderTypeForBuiltinFnName(w, host_ty); - try w.writeByte('('); - } - try w.writeAll("zig_shr_"); - try f.object.dg.renderTypeForBuiltinFnName(w, host_ty); - try w.writeByte('('); - try f.writeCValueDeref(w, operand); - try v.elem(f, w); - try w.print(", {f})", .{try f.fmtIntLiteralDec(bit_offset_val)}); - if (cant_cast) try w.writeByte(')'); - try f.object.dg.renderBuiltinInfo(w, field_ty, .bits); - try w.writeByte(')'); } else { try f.writeCValue(w, local, .Other); try v.elem(f, w); @@ -4213,6 +4081,10 @@ fn airStore(f: *Function, inst: Air.Inst.Index, safety: bool) !CValue { const ptr_scalar_ty = ptr_ty.scalarType(zcu); const ptr_info = ptr_scalar_ty.ptrInfo(zcu); + // `Air.Legalize.Feature.expand_packed_store` should ensure that the only + // bit-pointers we see here are vector element pointers. + assert(ptr_info.packed_offset.host_size == 0 or ptr_info.flags.vector_index != .none); + const ptr_val = try f.resolveInst(bin_op.lhs); const src_ty = f.typeOf(bin_op.rhs); @@ -4277,66 +4149,6 @@ fn airStore(f: *Function, inst: Air.Inst.Index, safety: bool) !CValue { try w.writeByte(';'); try f.object.newline(); try v.end(f, inst, w); - } else if (ptr_info.packed_offset.host_size > 0 and ptr_info.flags.vector_index == .none) { - const host_bits = ptr_info.packed_offset.host_size * 8; - const host_ty = try pt.intType(.unsigned, host_bits); - - const bit_offset_ty = try pt.intType(.unsigned, Type.smallestUnsignedBits(host_bits - 1)); - const bit_offset_val = try pt.intValue(bit_offset_ty, ptr_info.packed_offset.bit_offset); - - const src_bits = src_ty.bitSize(zcu); - - const ExpectedContents = [BigInt.Managed.default_capacity]BigIntLimb; - var stack align(@alignOf(ExpectedContents)) = - std.heap.stackFallback(@sizeOf(ExpectedContents), f.object.dg.gpa); - - var mask = try BigInt.Managed.initCapacity(stack.get(), BigInt.calcTwosCompLimbCount(host_bits)); - defer mask.deinit(); - - try mask.setTwosCompIntLimit(.max, .unsigned, @intCast(src_bits)); - try mask.shiftLeft(&mask, ptr_info.packed_offset.bit_offset); - try mask.bitNotWrap(&mask, .unsigned, host_bits); - - const mask_val = try pt.intValue_big(host_ty, mask.toConst()); - - const v = try Vectorize.start(f, inst, w, ptr_ty); - const a = try Assignment.start(f, w, src_scalar_ctype); - try f.writeCValueDeref(w, ptr_val); - try v.elem(f, w); - try a.assign(f, w); - try w.writeAll("zig_or_"); - try f.object.dg.renderTypeForBuiltinFnName(w, host_ty); - try w.writeAll("(zig_and_"); - try f.object.dg.renderTypeForBuiltinFnName(w, host_ty); - try w.writeByte('('); - try f.writeCValueDeref(w, ptr_val); - try v.elem(f, w); - try w.print(", {f}), zig_shl_", .{try f.fmtIntLiteralHex(mask_val)}); - try f.object.dg.renderTypeForBuiltinFnName(w, host_ty); - try w.writeByte('('); - const cant_cast = host_ty.isInt(zcu) and host_ty.bitSize(zcu) > 64; - if (cant_cast) { - if (src_ty.bitSize(zcu) > 64) return f.fail("TODO: C backend: implement casting between types > 64 bits", .{}); - try w.writeAll("zig_make_"); - try f.object.dg.renderTypeForBuiltinFnName(w, host_ty); - try w.writeAll("(0, "); - } else { - try w.writeByte('('); - try f.renderType(w, host_ty); - try w.writeByte(')'); - } - - if (src_ty.isPtrAtRuntime(zcu)) { - try w.writeByte('('); - try f.renderType(w, .usize); - try w.writeByte(')'); - } - try f.writeCValue(w, src_val, .Other); - try v.elem(f, w); - if (cant_cast) try w.writeByte(')'); - try w.print(", {f}))", .{try f.fmtIntLiteralDec(bit_offset_val)}); - try a.end(f, w); - try v.end(f, inst, w); } else { switch (ptr_val) { .local_ref => |ptr_local_index| switch (src_val) { @@ -6015,10 +5827,7 @@ fn fieldLocation( else if (!field_ptr_ty.childType(zcu).hasRuntimeBitsIgnoreComptime(zcu)) .{ .byte_offset = loaded_struct.offsets.get(ip)[field_index] } else - .{ .field = if (loaded_struct.fieldName(ip, field_index).unwrap()) |field_name| - .{ .identifier = field_name.toSlice(ip) } - else - .{ .field = field_index } }, + .{ .field = .{ .identifier = loaded_struct.fieldName(ip, field_index).toSlice(ip) } }, .@"packed" => if (field_ptr_ty.ptrInfo(zcu).packed_offset.host_size == 0) .{ .byte_offset = @divExact(zcu.structPackedFieldBitOffset(loaded_struct, field_index) + container_ptr_ty.ptrInfo(zcu).packed_offset.bit_offset, 8) } @@ -6202,115 +6011,20 @@ fn airStructFieldVal(f: *Function, inst: Air.Inst.Index) !CValue { // Ensure complete type definition is visible before accessing fields. _ = try f.ctypeFromType(struct_ty, .complete); + assert(struct_ty.containerLayout(zcu) != .@"packed"); // `Air.Legalize.Feature.expand_packed_struct_field_val` handles this case const field_name: CValue = switch (ip.indexToKey(struct_ty.toIntern())) { - .struct_type => field_name: { - const loaded_struct = ip.loadStructType(struct_ty.toIntern()); - switch (loaded_struct.layout) { - .auto, .@"extern" => break :field_name if (loaded_struct.fieldName(ip, extra.field_index).unwrap()) |field_name| - .{ .identifier = field_name.toSlice(ip) } - else - .{ .field = extra.field_index }, - .@"packed" => { - const int_info = struct_ty.intInfo(zcu); - - const bit_offset_ty = try pt.intType(.unsigned, Type.smallestUnsignedBits(int_info.bits - 1)); - - const bit_offset = zcu.structPackedFieldBitOffset(loaded_struct, extra.field_index); - - const field_int_signedness = if (inst_ty.isAbiInt(zcu)) - inst_ty.intInfo(zcu).signedness - else - .unsigned; - const field_int_ty = try pt.intType(field_int_signedness, @as(u16, @intCast(inst_ty.bitSize(zcu)))); - - const temp_local = try f.allocLocal(inst, field_int_ty); - try f.writeCValue(w, temp_local, .Other); - try w.writeAll(" = zig_wrap_"); - try f.object.dg.renderTypeForBuiltinFnName(w, field_int_ty); - try w.writeAll("(("); - try f.renderType(w, field_int_ty); - try w.writeByte(')'); - const cant_cast = int_info.bits > 64; - if (cant_cast) { - if (field_int_ty.bitSize(zcu) > 64) return f.fail("TODO: C backend: implement casting between types > 64 bits", .{}); - try w.writeAll("zig_lo_"); - try f.object.dg.renderTypeForBuiltinFnName(w, struct_ty); - try w.writeByte('('); - } - if (bit_offset > 0) { - try w.writeAll("zig_shr_"); - try f.object.dg.renderTypeForBuiltinFnName(w, struct_ty); - try w.writeByte('('); - } - try f.writeCValue(w, struct_byval, .Other); - if (bit_offset > 0) try w.print(", {f})", .{ - try f.fmtIntLiteralDec(try pt.intValue(bit_offset_ty, bit_offset)), - }); - if (cant_cast) try w.writeByte(')'); - try f.object.dg.renderBuiltinInfo(w, field_int_ty, .bits); - try w.writeAll(");"); - try f.object.newline(); - if (inst_ty.eql(field_int_ty, zcu)) return temp_local; - - const local = try f.allocLocal(inst, inst_ty); - if (local.new_local != temp_local.new_local) { - try w.writeAll("memcpy("); - try f.writeCValue(w, .{ .local_ref = local.new_local }, .FunctionArgument); - try w.writeAll(", "); - try f.writeCValue(w, .{ .local_ref = temp_local.new_local }, .FunctionArgument); - try w.writeAll(", sizeof("); - try f.renderType(w, inst_ty); - try w.writeAll("));"); - try f.object.newline(); - } - try freeLocal(f, inst, temp_local.new_local, null); - return local; - }, + .struct_type => .{ .identifier = struct_ty.structFieldName(extra.field_index, zcu).unwrap().?.toSlice(ip) }, + .union_type => name: { + const union_type = ip.loadUnionType(struct_ty.toIntern()); + const enum_tag_ty: Type = .fromInterned(union_type.enum_tag_ty); + const field_name_str = enum_tag_ty.enumFieldName(extra.field_index, zcu).toSlice(ip); + if (union_type.hasTag(ip)) { + break :name .{ .payload_identifier = field_name_str }; + } else { + break :name .{ .identifier = field_name_str }; } }, .tuple_type => .{ .field = extra.field_index }, - .union_type => field_name: { - const loaded_union = ip.loadUnionType(struct_ty.toIntern()); - switch (loaded_union.flagsUnordered(ip).layout) { - .auto, .@"extern" => { - const name = loaded_union.loadTagType(ip).names.get(ip)[extra.field_index]; - break :field_name if (loaded_union.hasTag(ip)) - .{ .payload_identifier = name.toSlice(ip) } - else - .{ .identifier = name.toSlice(ip) }; - }, - .@"packed" => { - const operand_lval = if (struct_byval == .constant) blk: { - const operand_local = try f.allocLocal(inst, struct_ty); - try f.writeCValue(w, operand_local, .Other); - try w.writeAll(" = "); - try f.writeCValue(w, struct_byval, .Other); - try w.writeByte(';'); - try f.object.newline(); - break :blk operand_local; - } else struct_byval; - const local = try f.allocLocal(inst, inst_ty); - if (switch (local) { - .new_local, .local => |local_index| switch (operand_lval) { - .new_local, .local => |operand_local_index| local_index != operand_local_index, - else => true, - }, - else => true, - }) { - try w.writeAll("memcpy(&"); - try f.writeCValue(w, local, .Other); - try w.writeAll(", &"); - try f.writeCValue(w, operand_lval, .Other); - try w.writeAll(", sizeof("); - try f.renderType(w, inst_ty); - try w.writeAll("));"); - try f.object.newline(); - } - try f.freeCValue(inst, operand_lval); - return local; - }, - } - }, else => unreachable, }; @@ -7702,98 +7416,13 @@ fn airAggregateInit(f: *Function, inst: Air.Inst.Index) !CValue { if (!field_ty.hasRuntimeBitsIgnoreComptime(zcu)) continue; const a = try Assignment.start(f, w, try f.ctypeFromType(field_ty, .complete)); - try f.writeCValueMember(w, local, if (loaded_struct.fieldName(ip, field_index).unwrap()) |field_name| - .{ .identifier = field_name.toSlice(ip) } - else - .{ .field = field_index }); + try f.writeCValueMember(w, local, .{ .identifier = loaded_struct.fieldName(ip, field_index).toSlice(ip) }); try a.assign(f, w); try f.writeCValue(w, resolved_elements[field_index], .Other); try a.end(f, w); } }, - .@"packed" => { - try f.writeCValue(w, local, .Other); - try w.writeAll(" = "); - - const backing_int_ty: Type = .fromInterned(loaded_struct.backingIntTypeUnordered(ip)); - const int_info = backing_int_ty.intInfo(zcu); - - const bit_offset_ty = try pt.intType(.unsigned, Type.smallestUnsignedBits(int_info.bits - 1)); - - var bit_offset: u64 = 0; - - var empty = true; - for (0..elements.len) |field_index| { - if (inst_ty.structFieldIsComptime(field_index, zcu)) continue; - const field_ty = inst_ty.fieldType(field_index, zcu); - if (!field_ty.hasRuntimeBitsIgnoreComptime(zcu)) continue; - - if (!empty) { - try w.writeAll("zig_or_"); - try f.object.dg.renderTypeForBuiltinFnName(w, inst_ty); - try w.writeByte('('); - } - empty = false; - } - empty = true; - for (resolved_elements, 0..) |element, field_index| { - if (inst_ty.structFieldIsComptime(field_index, zcu)) continue; - const field_ty = inst_ty.fieldType(field_index, zcu); - if (!field_ty.hasRuntimeBitsIgnoreComptime(zcu)) continue; - - if (!empty) try w.writeAll(", "); - // TODO: Skip this entire shift if val is 0? - try w.writeAll("zig_shlw_"); - try f.object.dg.renderTypeForBuiltinFnName(w, inst_ty); - try w.writeByte('('); - - if (field_ty.isAbiInt(zcu)) { - try w.writeAll("zig_and_"); - try f.object.dg.renderTypeForBuiltinFnName(w, inst_ty); - try w.writeByte('('); - } - - if (inst_ty.isAbiInt(zcu) and (field_ty.isAbiInt(zcu) or field_ty.isPtrAtRuntime(zcu))) { - try f.renderIntCast(w, inst_ty, element, .{}, field_ty, .FunctionArgument); - } else { - try w.writeByte('('); - try f.renderType(w, inst_ty); - try w.writeByte(')'); - if (field_ty.isPtrAtRuntime(zcu)) { - try w.writeByte('('); - try f.renderType(w, switch (int_info.signedness) { - .unsigned => .usize, - .signed => .isize, - }); - try w.writeByte(')'); - } - try f.writeCValue(w, element, .Other); - } - - if (field_ty.isAbiInt(zcu)) { - try w.writeAll(", "); - const field_int_info = field_ty.intInfo(zcu); - const field_mask = if (int_info.signedness == .signed and int_info.bits == field_int_info.bits) - try pt.intValue(backing_int_ty, -1) - else - try (try pt.intType(.unsigned, field_int_info.bits)).maxIntScalar(pt, backing_int_ty); - try f.object.dg.renderValue(w, field_mask, .FunctionArgument); - try w.writeByte(')'); - } - - try w.print(", {f}", .{ - try f.fmtIntLiteralDec(try pt.intValue(bit_offset_ty, bit_offset)), - }); - try f.object.dg.renderBuiltinInfo(w, inst_ty, .bits); - try w.writeByte(')'); - if (!empty) try w.writeByte(')'); - - bit_offset += field_ty.bitSize(zcu); - empty = false; - } - try w.writeByte(';'); - try f.object.newline(); - }, + .@"packed" => unreachable, // `Air.Legalize.Feature.expand_packed_struct_init` handles this case } }, .tuple_type => |tuple_info| for (0..tuple_info.types.len) |field_index| { @@ -7828,9 +7457,10 @@ fn airUnionInit(f: *Function, inst: Air.Inst.Index) !CValue { try reap(f, inst, &.{extra.init}); const w = &f.object.code.writer; - const local = try f.allocLocal(inst, union_ty); if (loaded_union.flagsUnordered(ip).layout == .@"packed") return f.moveCValue(inst, union_ty, payload); + const local = try f.allocLocal(inst, union_ty); + const field: CValue = if (union_ty.unionTagTypeSafety(zcu)) |tag_ty| field: { const layout = union_ty.unionGetLayout(zcu); if (layout.tag_size != 0) { diff --git a/src/codegen/c/Type.zig b/src/codegen/c/Type.zig index fa4db36a0c69..ac535a47d158 100644 --- a/src/codegen/c/Type.zig +++ b/src/codegen/c/Type.zig @@ -2514,11 +2514,7 @@ pub const Pool = struct { kind.noParameter(), ); if (field_ctype.index == .void) continue; - const field_name = if (loaded_struct.fieldName(ip, field_index) - .unwrap()) |field_name| - try pool.string(allocator, field_name.toSlice(ip)) - else - String.fromUnnamed(@intCast(field_index)); + const field_name = try pool.string(allocator, loaded_struct.fieldName(ip, field_index).toSlice(ip)); const field_alignas = AlignAs.fromAlignment(.{ .@"align" = loaded_struct.fieldAlign(ip, field_index), .abi = field_type.abiAlignment(zcu), diff --git a/src/codegen/llvm.zig b/src/codegen/llvm.zig index 10b90e25b855..1160c2958e6c 100644 --- a/src/codegen/llvm.zig +++ b/src/codegen/llvm.zig @@ -2411,8 +2411,7 @@ pub const Object = struct { const field_size = field_ty.abiSize(zcu); const field_align = ty.fieldAlignment(field_index, zcu); const field_offset = ty.structFieldOffset(field_index, zcu); - const field_name = struct_type.fieldName(ip, field_index).unwrap() orelse - try ip.getOrPutStringFmt(gpa, pt.tid, "{d}", .{field_index}, .no_embedded_nulls); + const field_name = struct_type.fieldName(ip, field_index); fields.appendAssumeCapacity(try o.builder.debugMemberType( try o.builder.metadataString(field_name.toSlice(ip)), null, // File @@ -5093,8 +5092,6 @@ pub const FuncGen = struct { .wasm_memory_size => try self.airWasmMemorySize(inst), .wasm_memory_grow => try self.airWasmMemoryGrow(inst), - .vector_store_elem => try self.airVectorStoreElem(inst), - .runtime_nav_ptr => try self.airRuntimeNavPtr(inst), .inferred_alloc, .inferred_alloc_comptime => unreachable, @@ -6873,16 +6870,14 @@ pub const FuncGen = struct { const array_llvm_ty = try o.lowerType(pt, array_ty); const elem_ty = array_ty.childType(zcu); if (isByRef(array_ty, zcu)) { - const indices: [2]Builder.Value = .{ - try o.builder.intValue(try o.lowerType(pt, Type.usize), 0), rhs, - }; + const elem_ptr = try self.wip.gep(.inbounds, array_llvm_ty, array_llvm_val, &.{ + try o.builder.intValue(try o.lowerType(pt, Type.usize), 0), + rhs, + }, ""); if (isByRef(elem_ty, zcu)) { - const elem_ptr = try self.wip.gep(.inbounds, array_llvm_ty, array_llvm_val, &indices, ""); const elem_alignment = elem_ty.abiAlignment(zcu).toLlvm(); return self.loadByRef(elem_ptr, elem_ty, elem_alignment, .normal); } else { - const elem_ptr = - try self.wip.gep(.inbounds, array_llvm_ty, array_llvm_val, &indices, ""); return self.loadTruncate(.normal, elem_ty, elem_ptr, .default); } } @@ -8140,33 +8135,6 @@ pub const FuncGen = struct { }, ""); } - fn airVectorStoreElem(self: *FuncGen, inst: Air.Inst.Index) !Builder.Value { - const o = self.ng.object; - const pt = self.ng.pt; - const zcu = pt.zcu; - const data = self.air.instructions.items(.data)[@intFromEnum(inst)].vector_store_elem; - const extra = self.air.extraData(Air.Bin, data.payload).data; - - const vector_ptr = try self.resolveInst(data.vector_ptr); - const vector_ptr_ty = self.typeOf(data.vector_ptr); - const index = try self.resolveInst(extra.lhs); - const operand = try self.resolveInst(extra.rhs); - - self.maybeMarkAllowZeroAccess(vector_ptr_ty.ptrInfo(zcu)); - - // TODO: Emitting a load here is a violation of volatile semantics. Not fixable in general. - // https://github.com/ziglang/zig/issues/18652#issuecomment-2452844908 - const access_kind: Builder.MemoryAccessKind = - if (vector_ptr_ty.isVolatilePtr(zcu)) .@"volatile" else .normal; - const elem_llvm_ty = try o.lowerType(pt, vector_ptr_ty.childType(zcu)); - const alignment = vector_ptr_ty.ptrAlignment(zcu).toLlvm(); - const loaded = try self.wip.load(access_kind, elem_llvm_ty, vector_ptr, alignment, ""); - - const new_vector = try self.wip.insertElement(loaded, operand, index, ""); - _ = try self.store(vector_ptr, vector_ptr_ty, new_vector, .none); - return .none; - } - fn airRuntimeNavPtr(fg: *FuncGen, inst: Air.Inst.Index) !Builder.Value { const o = fg.ng.object; const pt = fg.ng.pt; @@ -8303,8 +8271,7 @@ pub const FuncGen = struct { const rhs = try self.resolveInst(bin_op.rhs); const inst_ty = self.typeOfIndex(inst); const scalar_ty = inst_ty.scalarType(zcu); - - if (scalar_ty.isAnyFloat()) return self.todo("saturating float add", .{}); + assert(scalar_ty.zigTypeTag(zcu) == .int); return self.wip.callIntrinsic( .normal, .none, @@ -8344,8 +8311,7 @@ pub const FuncGen = struct { const rhs = try self.resolveInst(bin_op.rhs); const inst_ty = self.typeOfIndex(inst); const scalar_ty = inst_ty.scalarType(zcu); - - if (scalar_ty.isAnyFloat()) return self.todo("saturating float sub", .{}); + assert(scalar_ty.zigTypeTag(zcu) == .int); return self.wip.callIntrinsic( .normal, .none, @@ -8385,8 +8351,7 @@ pub const FuncGen = struct { const rhs = try self.resolveInst(bin_op.rhs); const inst_ty = self.typeOfIndex(inst); const scalar_ty = inst_ty.scalarType(zcu); - - if (scalar_ty.isAnyFloat()) return self.todo("saturating float mul", .{}); + assert(scalar_ty.zigTypeTag(zcu) == .int); return self.wip.callIntrinsic( .normal, .none, @@ -11454,7 +11419,6 @@ pub const FuncGen = struct { const access_kind: Builder.MemoryAccessKind = if (info.flags.is_volatile) .@"volatile" else .normal; - assert(info.flags.vector_index != .runtime); if (info.flags.vector_index != .none) { const index_u32 = try o.builder.intValue(.i32, info.flags.vector_index); const vec_elem_ty = try o.lowerType(pt, elem_ty); @@ -11524,7 +11488,6 @@ pub const FuncGen = struct { const access_kind: Builder.MemoryAccessKind = if (info.flags.is_volatile) .@"volatile" else .normal; - assert(info.flags.vector_index != .runtime); if (info.flags.vector_index != .none) { const index_u32 = try o.builder.intValue(.i32, info.flags.vector_index); const vec_elem_ty = try o.lowerType(pt, elem_ty); diff --git a/src/codegen/riscv64/CodeGen.zig b/src/codegen/riscv64/CodeGen.zig index fe40ba4bbb16..bf5e5b6718a0 100644 --- a/src/codegen/riscv64/CodeGen.zig +++ b/src/codegen/riscv64/CodeGen.zig @@ -1633,7 +1633,6 @@ fn genBody(func: *Func, body: []const Air.Inst.Index) InnerError!void { .is_named_enum_value => return func.fail("TODO implement is_named_enum_value", .{}), .error_set_has_value => return func.fail("TODO implement error_set_has_value", .{}), - .vector_store_elem => return func.fail("TODO implement vector_store_elem", .{}), .c_va_arg => return func.fail("TODO implement c_va_arg", .{}), .c_va_copy => return func.fail("TODO implement c_va_copy", .{}), diff --git a/src/codegen/sparc64/CodeGen.zig b/src/codegen/sparc64/CodeGen.zig index 48d44e39f973..684bfcfabb01 100644 --- a/src/codegen/sparc64/CodeGen.zig +++ b/src/codegen/sparc64/CodeGen.zig @@ -702,7 +702,6 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void { .is_named_enum_value => @panic("TODO implement is_named_enum_value"), .error_set_has_value => @panic("TODO implement error_set_has_value"), - .vector_store_elem => @panic("TODO implement vector_store_elem"), .runtime_nav_ptr => @panic("TODO implement runtime_nav_ptr"), .c_va_arg => return self.fail("TODO implement c_va_arg", .{}), diff --git a/src/codegen/spirv/CodeGen.zig b/src/codegen/spirv/CodeGen.zig index c8956a274b11..281504c7d253 100644 --- a/src/codegen/spirv/CodeGen.zig +++ b/src/codegen/spirv/CodeGen.zig @@ -1520,8 +1520,7 @@ fn resolveType(cg: *CodeGen, ty: Type, repr: Repr) Error!Id { const field_ty: Type = .fromInterned(struct_type.field_types.get(ip)[field_index]); if (!field_ty.hasRuntimeBitsIgnoreComptime(zcu)) continue; - const field_name = struct_type.fieldName(ip, field_index).unwrap() orelse - try ip.getOrPutStringFmt(zcu.gpa, pt.tid, "{d}", .{field_index}, .no_embedded_nulls); + const field_name = struct_type.fieldName(ip, field_index); try member_types.append(try cg.resolveType(field_ty, .indirect)); try member_names.append(field_name.toSlice(ip)); try member_offsets.append(@intCast(ty.structFieldOffset(field_index, zcu))); @@ -2726,8 +2725,6 @@ fn genInst(cg: *CodeGen, inst: Air.Inst.Index) Error!void { .ptr_elem_val => try cg.airPtrElemVal(inst), .array_elem_val => try cg.airArrayElemVal(inst), - .vector_store_elem => return cg.airVectorStoreElem(inst), - .set_union_tag => return cg.airSetUnionTag(inst), .get_union_tag => try cg.airGetUnionTag(inst), .union_init => try cg.airUnionInit(inst), @@ -4446,29 +4443,6 @@ fn airPtrElemVal(cg: *CodeGen, inst: Air.Inst.Index) !?Id { return try cg.load(elem_ty, elem_ptr_id, .{ .is_volatile = ptr_ty.isVolatilePtr(zcu) }); } -fn airVectorStoreElem(cg: *CodeGen, inst: Air.Inst.Index) !void { - const zcu = cg.module.zcu; - const data = cg.air.instructions.items(.data)[@intFromEnum(inst)].vector_store_elem; - const extra = cg.air.extraData(Air.Bin, data.payload).data; - - const vector_ptr_ty = cg.typeOf(data.vector_ptr); - const vector_ty = vector_ptr_ty.childType(zcu); - const scalar_ty = vector_ty.scalarType(zcu); - - const scalar_ty_id = try cg.resolveType(scalar_ty, .indirect); - const storage_class = cg.module.storageClass(vector_ptr_ty.ptrAddressSpace(zcu)); - const scalar_ptr_ty_id = try cg.module.ptrType(scalar_ty_id, storage_class); - - const vector_ptr = try cg.resolve(data.vector_ptr); - const index = try cg.resolve(extra.lhs); - const operand = try cg.resolve(extra.rhs); - - const elem_ptr_id = try cg.accessChainId(scalar_ptr_ty_id, vector_ptr, &.{index}); - try cg.store(scalar_ty, elem_ptr_id, operand, .{ - .is_volatile = vector_ptr_ty.isVolatilePtr(zcu), - }); -} - fn airSetUnionTag(cg: *CodeGen, inst: Air.Inst.Index) !void { const zcu = cg.module.zcu; const bin_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].bin_op; diff --git a/src/codegen/wasm/CodeGen.zig b/src/codegen/wasm/CodeGen.zig index d8d8933cc3ea..b7f7aa151daf 100644 --- a/src/codegen/wasm/CodeGen.zig +++ b/src/codegen/wasm/CodeGen.zig @@ -1978,7 +1978,6 @@ fn genInst(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void { .save_err_return_trace_index, .is_named_enum_value, .addrspace_cast, - .vector_store_elem, .c_va_arg, .c_va_copy, .c_va_end, diff --git a/src/codegen/x86_64/CodeGen.zig b/src/codegen/x86_64/CodeGen.zig index 64340798006d..8e54a3a8c0e4 100644 --- a/src/codegen/x86_64/CodeGen.zig +++ b/src/codegen/x86_64/CodeGen.zig @@ -854,12 +854,6 @@ const FrameAlloc = struct { } }; -const StackAllocation = struct { - inst: ?Air.Inst.Index, - /// TODO do we need size? should be determined by inst.ty.abiSize(zcu) - size: u32, -}; - const BlockData = struct { relocs: std.ArrayListUnmanaged(Mir.Inst.Index) = .empty, state: State, @@ -89326,7 +89320,6 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { error.SelectFailed => res[0] = try ops[0].load(val_ty, .{ .disp = switch (cg.typeOf(ty_op.operand).ptrInfo(zcu).flags.vector_index) { .none => 0, - .runtime => unreachable, else => |vector_index| @intCast(val_ty.abiSize(zcu) * @intFromEnum(vector_index)), }, }, cg), @@ -89569,7 +89562,6 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { error.SelectFailed => try ops[0].store(&ops[1], .{ .disp = switch (cg.typeOf(bin_op.lhs).ptrInfo(zcu).flags.vector_index) { .none => 0, - .runtime => unreachable, else => |vector_index| @intCast(cg.typeOf(bin_op.rhs).abiSize(zcu) * @intFromEnum(vector_index)), }, .safe = switch (air_tag) { @@ -171402,8 +171394,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .aggregate_init => |air_tag| fallback: { const ty_pl = air_datas[@intFromEnum(inst)].ty_pl; const agg_ty = ty_pl.ty.toType(); - if ((agg_ty.isVector(zcu) and agg_ty.childType(zcu).toIntern() == .bool_type) or - (agg_ty.zigTypeTag(zcu) == .@"struct" and agg_ty.containerLayout(zcu) == .@"packed")) break :fallback try cg.airAggregateInit(inst); + if (agg_ty.isVector(zcu) and agg_ty.childType(zcu).toIntern() == .bool_type) { + break :fallback try cg.airAggregateInitBoolVec(inst); + } var res = try cg.tempAllocMem(agg_ty); const reset_index = cg.next_temp_index; var bt = cg.liveness.iterateBigTomb(inst); @@ -171441,10 +171434,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { } } }, - .@"packed" => return cg.fail("failed to select {s} {f}", .{ - @tagName(air_tag), - agg_ty.fmt(pt), - }), + .@"packed" => unreachable, } }, .tuple_type => |tuple_type| { @@ -173054,633 +173044,6 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { try ert.die(cg); try res.finish(inst, &.{}, &.{}, cg); }, - .vector_store_elem => { - const vector_store_elem = air_datas[@intFromEnum(inst)].vector_store_elem; - const bin_op = cg.air.extraData(Air.Bin, vector_store_elem.payload).data; - var ops = try cg.tempsFromOperands(inst, .{ vector_store_elem.vector_ptr, bin_op.lhs, bin_op.rhs }); - cg.select(&.{}, &.{}, &ops, comptime &.{ .{ - .src_constraints = .{ .{ .ptr_bool_vec = .byte }, .any, .bool }, - .patterns = &.{ - .{ .src = .{ .to_gpr, .to_gpr, .{ .imm = 0 } } }, - }, - .extra_temps = .{ - .{ .type = .u8, .kind = .{ .rc = .general_purpose } }, - .unused, - .unused, - .unused, - .unused, - .unused, - .unused, - .unused, - .unused, - .unused, - .unused, - }, - .clobbers = .{ .eflags = true }, - .each = .{ .once = &.{ - .{ ._, ._, .movzx, .tmp0d, .lea(.src0b), ._, ._ }, - .{ ._, ._r, .bt, .tmp0d, .src1d, ._, ._ }, - .{ ._, ._, .mov, .lea(.src0b), .tmp0b, ._, ._ }, - } }, - }, .{ - .src_constraints = .{ .{ .ptr_bool_vec = .byte }, .any, .bool }, - .patterns = &.{ - .{ .src = .{ .to_gpr, .to_gpr, .{ .imm = 1 } } }, - }, - .extra_temps = .{ - .{ .type = .u8, .kind = .{ .rc = .general_purpose } }, - .unused, - .unused, - .unused, - .unused, - .unused, - .unused, - .unused, - .unused, - .unused, - .unused, - }, - .clobbers = .{ .eflags = true }, - .each = .{ .once = &.{ - .{ ._, ._, .movzx, .tmp0d, .lea(.src0b), ._, ._ }, - .{ ._, ._s, .bt, .tmp0d, .src1d, ._, ._ }, - .{ ._, ._, .mov, .lea(.src0b), .tmp0b, ._, ._ }, - } }, - }, .{ - .required_features = .{ .cmov, null, null, null }, - .src_constraints = .{ .{ .ptr_bool_vec = .byte }, .any, .bool }, - .patterns = &.{ - .{ .src = .{ .to_gpr, .to_gpr, .to_gpr } }, - }, - .extra_temps = .{ - .{ .type = .u8, .kind = .{ .rc = .general_purpose } }, - .{ .type = .u8, .kind = .{ .rc = .general_purpose } }, - .unused, - .unused, - .unused, - .unused, - .unused, - .unused, - .unused, - .unused, - .unused, - }, - .clobbers = .{ .eflags = true }, - .each = .{ .once = &.{ - .{ ._, ._, .movzx, .tmp0d, .lea(.src0b), ._, ._ }, - .{ ._, ._, .mov, .tmp1d, .tmp0d, ._, ._ }, - .{ ._, ._r, .bt, .tmp1d, .src1d, ._, ._ }, - .{ ._, ._s, .bt, .tmp0d, .src1d, ._, ._ }, - .{ ._, ._, .@"test", .src2b, .si(1), ._, ._ }, - .{ ._, ._z, .cmov, .tmp0d, .tmp1d, ._, ._ }, - .{ ._, ._, .mov, .lea(.src0b), .tmp0b, ._, ._ }, - } }, - }, .{ - .src_constraints = .{ .{ .ptr_bool_vec = .byte }, .any, .bool }, - .patterns = &.{ - .{ .src = .{ .to_gpr, .to_gpr, .to_gpr } }, - }, - .extra_temps = .{ - .{ .type = .u8, .kind = .{ .rc = .general_purpose } }, - .unused, - .unused, - .unused, - .unused, - .unused, - .unused, - .unused, - .unused, - .unused, - .unused, - }, - .clobbers = .{ .eflags = true }, - .each = .{ .once = &.{ - .{ ._, ._, .movzx, .tmp0d, .lea(.src0b), ._, ._ }, - .{ ._, ._, .@"test", .src2b, .si(1), ._, ._ }, - .{ ._, ._nz, .j, .@"0f", ._, ._, ._ }, - .{ ._, ._r, .bt, .tmp0d, .src1d, ._, ._ }, - .{ ._, ._mp, .j, .@"1f", ._, ._, ._ }, - .{ .@"0:", ._s, .bt, .tmp0d, .src1d, ._, ._ }, - .{ .@"1:", ._, .mov, .lea(.src0b), .tmp0b, ._, ._ }, - } }, - }, .{ - .src_constraints = .{ .{ .ptr_bool_vec = .word }, .any, .bool }, - .patterns = &.{ - .{ .src = .{ .to_gpr, .to_gpr, .{ .imm = 0 } } }, - }, - .clobbers = .{ .eflags = true }, - .each = .{ .once = &.{ - .{ ._, ._r, .bt, .lea(.src0w), .src1w, ._, ._ }, - } }, - }, .{ - .src_constraints = .{ .{ .ptr_bool_vec = .word }, .any, .bool }, - .patterns = &.{ - .{ .src = .{ .to_gpr, .to_gpr, .{ .imm = 1 } } }, - }, - .clobbers = .{ .eflags = true }, - .each = .{ .once = &.{ - .{ ._, ._s, .bt, .lea(.src0d), .src1d, ._, ._ }, - } }, - }, .{ - .required_features = .{ .cmov, null, null, null }, - .src_constraints = .{ .{ .ptr_bool_vec = .word }, .any, .bool }, - .patterns = &.{ - .{ .src = .{ .to_gpr, .to_gpr, .to_gpr } }, - }, - .extra_temps = .{ - .{ .type = .u16, .kind = .{ .rc = .general_purpose } }, - .{ .type = .u16, .kind = .{ .rc = .general_purpose } }, - .unused, - .unused, - .unused, - .unused, - .unused, - .unused, - .unused, - .unused, - .unused, - }, - .clobbers = .{ .eflags = true }, - .each = .{ .once = &.{ - .{ ._, ._, .movzx, .tmp0d, .lea(.src0w), ._, ._ }, - .{ ._, ._, .mov, .tmp1d, .tmp0d, ._, ._ }, - .{ ._, ._r, .bt, .tmp1d, .src1d, ._, ._ }, - .{ ._, ._s, .bt, .tmp0d, .src1d, ._, ._ }, - .{ ._, ._, .@"test", .src2b, .si(1), ._, ._ }, - .{ ._, ._z, .cmov, .tmp0d, .tmp1d, ._, ._ }, - .{ ._, ._, .mov, .lea(.src0w), .tmp0w, ._, ._ }, - } }, - }, .{ - .src_constraints = .{ .{ .ptr_bool_vec = .word }, .any, .bool }, - .patterns = &.{ - .{ .src = .{ .to_gpr, .to_gpr, .to_gpr } }, - }, - .clobbers = .{ .eflags = true }, - .each = .{ .once = &.{ - .{ ._, ._, .@"test", .src2b, .si(1), ._, ._ }, - .{ ._, ._nz, .j, .@"1f", ._, ._, ._ }, - .{ ._, ._r, .bt, .lea(.src0w), .src1w, ._, ._ }, - .{ ._, ._mp, .j, .@"0f", ._, ._, ._ }, - .{ .@"1:", ._s, .bt, .lea(.src0w), .src1w, ._, ._ }, - } }, - }, .{ - .src_constraints = .{ .ptr_any_bool_vec, .any, .bool }, - .patterns = &.{ - .{ .src = .{ .to_gpr, .to_gpr, .{ .imm = 0 } } }, - }, - .clobbers = .{ .eflags = true }, - .each = .{ .once = &.{ - .{ ._, ._r, .bt, .lea(.src0d), .src1d, ._, ._ }, - } }, - }, .{ - .src_constraints = .{ .ptr_any_bool_vec, .any, .bool }, - .patterns = &.{ - .{ .src = .{ .to_gpr, .to_gpr, .{ .imm = 1 } } }, - }, - .clobbers = .{ .eflags = true }, - .each = .{ .once = &.{ - .{ ._, ._s, .bt, .lea(.src0d), .src1d, ._, ._ }, - } }, - }, .{ - .required_features = .{ .cmov, null, null, null }, - .src_constraints = .{ .{ .ptr_bool_vec = .dword }, .any, .bool }, - .patterns = &.{ - .{ .src = .{ .to_gpr, .to_gpr, .to_gpr } }, - }, - .extra_temps = .{ - .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, - .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, - .unused, - .unused, - .unused, - .unused, - .unused, - .unused, - .unused, - .unused, - .unused, - }, - .clobbers = .{ .eflags = true }, - .each = .{ .once = &.{ - .{ ._, ._, .mov, .tmp0d, .lea(.src0d), ._, ._ }, - .{ ._, ._, .mov, .tmp1d, .tmp0d, ._, ._ }, - .{ ._, ._r, .bt, .tmp1d, .src1d, ._, ._ }, - .{ ._, ._s, .bt, .tmp0d, .src1d, ._, ._ }, - .{ ._, ._, .@"test", .src2b, .si(1), ._, ._ }, - .{ ._, ._z, .cmov, .tmp0d, .tmp1d, ._, ._ }, - .{ ._, ._, .mov, .lea(.src0d), .tmp0d, ._, ._ }, - } }, - }, .{ - .required_features = .{ .@"64bit", .cmov, null, null }, - .src_constraints = .{ .{ .ptr_bool_vec = .qword }, .any, .bool }, - .patterns = &.{ - .{ .src = .{ .to_gpr, .to_gpr, .to_gpr } }, - }, - .extra_temps = .{ - .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, - .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, - .unused, - .unused, - .unused, - .unused, - .unused, - .unused, - .unused, - .unused, - .unused, - }, - .clobbers = .{ .eflags = true }, - .each = .{ .once = &.{ - .{ ._, ._, .mov, .tmp0q, .lea(.src0q), ._, ._ }, - .{ ._, ._, .mov, .tmp1q, .tmp0q, ._, ._ }, - .{ ._, ._r, .bt, .tmp1q, .src1q, ._, ._ }, - .{ ._, ._s, .bt, .tmp0q, .src1q, ._, ._ }, - .{ ._, ._, .@"test", .src2b, .si(1), ._, ._ }, - .{ ._, ._z, .cmov, .tmp0q, .tmp1q, ._, ._ }, - .{ ._, ._, .mov, .lea(.src0q), .tmp0q, ._, ._ }, - } }, - }, .{ - .required_features = .{ .cmov, null, null, null }, - .src_constraints = .{ .ptr_any_bool_vec, .any, .bool }, - .patterns = &.{ - .{ .src = .{ .to_gpr, .to_gpr, .to_gpr } }, - }, - .extra_temps = .{ - .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, - .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, - .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, - .unused, - .unused, - .unused, - .unused, - .unused, - .unused, - .unused, - .unused, - }, - .clobbers = .{ .eflags = true }, - .each = .{ .once = &.{ - .{ ._, ._, .mov, .tmp0d, .src1d, ._, ._ }, - .{ ._, ._r, .sh, .tmp0d, .ui(5), ._, ._ }, - .{ ._, ._, .mov, .tmp1d, .leasi(.src0d, .@"4", .tmp0), ._, ._ }, - .{ ._, ._, .mov, .tmp2d, .tmp1d, ._, ._ }, - .{ ._, ._r, .bt, .tmp2d, .src1d, ._, ._ }, - .{ ._, ._s, .bt, .tmp1d, .src1d, ._, ._ }, - .{ ._, ._, .@"test", .src2b, .si(1), ._, ._ }, - .{ ._, ._z, .cmov, .tmp1d, .tmp2d, ._, ._ }, - .{ ._, ._, .mov, .leasi(.src0d, .@"4", .tmp0), .tmp1d, ._, ._ }, - } }, - }, .{ - .src_constraints = .{ .ptr_any_bool_vec, .any, .bool }, - .patterns = &.{ - .{ .src = .{ .to_gpr, .to_gpr, .to_gpr } }, - }, - .clobbers = .{ .eflags = true }, - .each = .{ .once = &.{ - .{ ._, ._, .@"test", .src2b, .si(1), ._, ._ }, - .{ ._, ._nz, .j, .@"1f", ._, ._, ._ }, - .{ ._, ._r, .bt, .lea(.src0d), .src1d, ._, ._ }, - .{ ._, ._mp, .j, .@"0f", ._, ._, ._ }, - .{ .@"1:", ._s, .bt, .lea(.src0d), .src1d, ._, ._ }, - } }, - }, .{ - .src_constraints = .{ .any, .any, .{ .int = .byte } }, - .patterns = &.{ - .{ .src = .{ .to_gpr, .simm32, .imm8 } }, - .{ .src = .{ .to_gpr, .simm32, .to_gpr } }, - }, - .each = .{ .once = &.{ - .{ ._, ._, .mov, .leaa(.src0b, .add_src0_elem_size_mul_src1), .src2b, ._, ._ }, - } }, - }, .{ - .src_constraints = .{ .any, .any, .{ .int = .byte } }, - .patterns = &.{ - .{ .src = .{ .to_gpr, .to_gpr, .imm8 } }, - .{ .src = .{ .to_gpr, .to_gpr, .to_gpr } }, - }, - .each = .{ .once = &.{ - .{ ._, ._, .mov, .leai(.src0b, .src1), .src2b, ._, ._ }, - } }, - }, .{ - .src_constraints = .{ .any, .any, .{ .int = .word } }, - .patterns = &.{ - .{ .src = .{ .to_gpr, .simm32, .imm16 } }, - .{ .src = .{ .to_gpr, .simm32, .to_gpr } }, - }, - .each = .{ .once = &.{ - .{ ._, ._, .mov, .leaa(.src0w, .add_src0_elem_size_mul_src1), .src2w, ._, ._ }, - } }, - }, .{ - .src_constraints = .{ .any, .any, .{ .int = .word } }, - .patterns = &.{ - .{ .src = .{ .to_gpr, .to_gpr, .imm16 } }, - .{ .src = .{ .to_gpr, .to_gpr, .to_gpr } }, - }, - .each = .{ .once = &.{ - .{ ._, ._, .mov, .leasi(.src0w, .@"2", .src1), .src2w, ._, ._ }, - } }, - }, .{ - .required_features = .{ .avx, null, null, null }, - .src_constraints = .{ .any, .any, .{ .float = .word } }, - .patterns = &.{ - .{ .src = .{ .to_gpr, .simm32, .to_sse } }, - }, - .each = .{ .once = &.{ - .{ ._, .vp_w, .extr, .leaa(.src0w, .add_src0_elem_size_mul_src1), .src2x, .ui(0), ._ }, - } }, - }, .{ - .required_features = .{ .sse4_1, null, null, null }, - .src_constraints = .{ .any, .any, .{ .float = .word } }, - .patterns = &.{ - .{ .src = .{ .to_gpr, .simm32, .to_sse } }, - }, - .each = .{ .once = &.{ - .{ ._, .p_w, .extr, .leaa(.src0w, .add_src0_elem_size_mul_src1), .src2x, .ui(0), ._ }, - } }, - }, .{ - .required_features = .{ .sse2, null, null, null }, - .src_constraints = .{ .any, .any, .{ .float = .word } }, - .patterns = &.{ - .{ .src = .{ .to_gpr, .simm32, .to_sse } }, - }, - .extra_temps = .{ - .{ .type = .f16, .kind = .{ .rc = .general_purpose } }, - .unused, - .unused, - .unused, - .unused, - .unused, - .unused, - .unused, - .unused, - .unused, - .unused, - }, - .each = .{ .once = &.{ - .{ ._, .p_w, .extr, .tmp0d, .src2x, .ui(0), ._ }, - .{ ._, ._, .mov, .leaa(.src0w, .add_src0_elem_size_mul_src1), .tmp0w, ._, ._ }, - } }, - }, .{ - .required_features = .{ .sse, null, null, null }, - .src_constraints = .{ .any, .any, .{ .float = .word } }, - .patterns = &.{ - .{ .src = .{ .to_gpr, .simm32, .to_sse } }, - }, - .extra_temps = .{ - .{ .type = .f32, .kind = .mem }, - .{ .type = .f16, .kind = .{ .rc = .general_purpose } }, - .unused, - .unused, - .unused, - .unused, - .unused, - .unused, - .unused, - .unused, - .unused, - }, - .each = .{ .once = &.{ - .{ ._, ._ss, .mov, .mem(.tmp1d), .src2x, ._, ._ }, - .{ ._, ._, .mov, .tmp1d, .mem(.tmp1d), ._, ._ }, - .{ ._, ._, .mov, .leaa(.src0w, .add_src0_elem_size_mul_src1), .tmp1w, ._, ._ }, - } }, - }, .{ - .required_features = .{ .avx, null, null, null }, - .src_constraints = .{ .any, .any, .{ .float = .word } }, - .patterns = &.{ - .{ .src = .{ .to_gpr, .to_gpr, .to_sse } }, - }, - .each = .{ .once = &.{ - .{ ._, .vp_w, .extr, .leasi(.src0w, .@"2", .src1), .src2x, .ui(0), ._ }, - } }, - }, .{ - .required_features = .{ .sse4_1, null, null, null }, - .src_constraints = .{ .any, .any, .{ .float = .word } }, - .patterns = &.{ - .{ .src = .{ .to_gpr, .to_gpr, .to_sse } }, - }, - .each = .{ .once = &.{ - .{ ._, .p_w, .extr, .leasi(.src0w, .@"2", .src1), .src2x, .ui(0), ._ }, - } }, - }, .{ - .required_features = .{ .sse2, null, null, null }, - .src_constraints = .{ .any, .any, .{ .float = .word } }, - .patterns = &.{ - .{ .src = .{ .to_gpr, .simm32, .to_sse } }, - }, - .extra_temps = .{ - .{ .type = .f16, .kind = .{ .rc = .general_purpose } }, - .unused, - .unused, - .unused, - .unused, - .unused, - .unused, - .unused, - .unused, - .unused, - .unused, - }, - .each = .{ .once = &.{ - .{ ._, .p_w, .extr, .tmp0d, .src2x, .ui(0), ._ }, - .{ ._, ._, .mov, .leasi(.src0w, .@"2", .src1), .tmp0w, ._, ._ }, - } }, - }, .{ - .required_features = .{ .sse, null, null, null }, - .src_constraints = .{ .any, .any, .{ .float = .word } }, - .patterns = &.{ - .{ .src = .{ .to_gpr, .simm32, .to_sse } }, - }, - .extra_temps = .{ - .{ .type = .f32, .kind = .mem }, - .{ .type = .f16, .kind = .{ .rc = .general_purpose } }, - .unused, - .unused, - .unused, - .unused, - .unused, - .unused, - .unused, - .unused, - .unused, - }, - .each = .{ .once = &.{ - .{ ._, ._ss, .mov, .mem(.tmp1d), .src2x, ._, ._ }, - .{ ._, ._, .mov, .tmp1d, .mem(.tmp1d), ._, ._ }, - .{ ._, ._, .mov, .leasi(.src0w, .@"2", .src1), .tmp1w, ._, ._ }, - } }, - }, .{ - .src_constraints = .{ .any, .any, .{ .int = .dword } }, - .patterns = &.{ - .{ .src = .{ .to_gpr, .simm32, .imm32 } }, - .{ .src = .{ .to_gpr, .simm32, .to_gpr } }, - }, - .each = .{ .once = &.{ - .{ ._, ._, .mov, .leaa(.src0d, .add_src0_elem_size_mul_src1), .src2d, ._, ._ }, - } }, - }, .{ - .src_constraints = .{ .any, .any, .{ .int = .dword } }, - .patterns = &.{ - .{ .src = .{ .to_gpr, .to_gpr, .imm32 } }, - .{ .src = .{ .to_gpr, .to_gpr, .to_gpr } }, - }, - .each = .{ .once = &.{ - .{ ._, ._, .mov, .leasi(.src0d, .@"4", .src1), .src2d, ._, ._ }, - } }, - }, .{ - .required_features = .{ .avx, null, null, null }, - .src_constraints = .{ .any, .any, .{ .float = .dword } }, - .patterns = &.{ - .{ .src = .{ .to_gpr, .simm32, .to_sse } }, - }, - .each = .{ .once = &.{ - .{ ._, .v_ss, .mov, .leaa(.src0d, .add_src0_elem_size_mul_src1), .src2x, ._, ._ }, - } }, - }, .{ - .required_features = .{ .sse, null, null, null }, - .src_constraints = .{ .any, .any, .{ .float = .dword } }, - .patterns = &.{ - .{ .src = .{ .to_gpr, .simm32, .to_sse } }, - }, - .each = .{ .once = &.{ - .{ ._, ._ss, .mov, .leaa(.src0d, .add_src0_elem_size_mul_src1), .src2x, ._, ._ }, - } }, - }, .{ - .required_features = .{ .avx, null, null, null }, - .src_constraints = .{ .any, .any, .{ .float = .dword } }, - .patterns = &.{ - .{ .src = .{ .to_gpr, .to_gpr, .to_sse } }, - }, - .each = .{ .once = &.{ - .{ ._, .v_ss, .mov, .leasi(.src0d, .@"4", .src1), .src2x, ._, ._ }, - } }, - }, .{ - .required_features = .{ .sse, null, null, null }, - .src_constraints = .{ .any, .any, .{ .float = .dword } }, - .patterns = &.{ - .{ .src = .{ .to_gpr, .to_gpr, .to_sse } }, - }, - .each = .{ .once = &.{ - .{ ._, ._ss, .mov, .leasi(.src0d, .@"4", .src1), .src2x, ._, ._ }, - } }, - }, .{ - .required_features = .{ .@"64bit", null, null, null }, - .src_constraints = .{ .any, .any, .{ .int = .qword } }, - .patterns = &.{ - .{ .src = .{ .to_gpr, .simm32, .simm32 } }, - .{ .src = .{ .to_gpr, .simm32, .to_gpr } }, - }, - .each = .{ .once = &.{ - .{ ._, ._, .mov, .leaa(.src0q, .add_src0_elem_size_mul_src1), .src2q, ._, ._ }, - } }, - }, .{ - .required_features = .{ .@"64bit", null, null, null }, - .src_constraints = .{ .any, .any, .{ .int = .qword } }, - .patterns = &.{ - .{ .src = .{ .to_gpr, .to_gpr, .simm32 } }, - .{ .src = .{ .to_gpr, .to_gpr, .to_gpr } }, - }, - .each = .{ .once = &.{ - .{ ._, ._, .mov, .leasi(.src0q, .@"8", .src1), .src2q, ._, ._ }, - } }, - }, .{ - .required_features = .{ .avx, null, null, null }, - .src_constraints = .{ .any, .any, .{ .float = .qword } }, - .patterns = &.{ - .{ .src = .{ .to_gpr, .simm32, .to_sse } }, - }, - .each = .{ .once = &.{ - .{ ._, .v_sd, .mov, .leaa(.src0q, .add_src0_elem_size_mul_src1), .src2x, ._, ._ }, - } }, - }, .{ - .required_features = .{ .sse2, null, null, null }, - .src_constraints = .{ .any, .any, .{ .float = .qword } }, - .patterns = &.{ - .{ .src = .{ .to_gpr, .simm32, .to_sse } }, - }, - .each = .{ .once = &.{ - .{ ._, ._sd, .mov, .leaa(.src0q, .add_src0_elem_size_mul_src1), .src2x, ._, ._ }, - } }, - }, .{ - .required_features = .{ .sse, null, null, null }, - .src_constraints = .{ .any, .any, .{ .float = .qword } }, - .patterns = &.{ - .{ .src = .{ .to_gpr, .simm32, .to_sse } }, - }, - .each = .{ .once = &.{ - .{ ._, ._ps, .movl, .leaa(.src0q, .add_src0_elem_size_mul_src1), .src2x, ._, ._ }, - } }, - }, .{ - .required_features = .{ .avx, null, null, null }, - .src_constraints = .{ .any, .any, .{ .float = .qword } }, - .patterns = &.{ - .{ .src = .{ .to_gpr, .to_gpr, .to_sse } }, - }, - .each = .{ .once = &.{ - .{ ._, .v_sd, .mov, .leasi(.src0q, .@"8", .src1), .src2x, ._, ._ }, - } }, - }, .{ - .required_features = .{ .sse2, null, null, null }, - .src_constraints = .{ .any, .any, .{ .float = .qword } }, - .patterns = &.{ - .{ .src = .{ .to_gpr, .to_gpr, .to_sse } }, - }, - .each = .{ .once = &.{ - .{ ._, ._sd, .mov, .leasi(.src0q, .@"8", .src1), .src2x, ._, ._ }, - } }, - }, .{ - .required_features = .{ .sse, null, null, null }, - .src_constraints = .{ .any, .any, .{ .float = .qword } }, - .patterns = &.{ - .{ .src = .{ .to_gpr, .to_gpr, .to_sse } }, - }, - .each = .{ .once = &.{ - .{ ._, ._ps, .movl, .leasi(.src0q, .@"8", .src1), .src2x, ._, ._ }, - } }, - } }) catch |err| switch (err) { - error.SelectFailed => { - const elem_size = cg.typeOf(bin_op.rhs).abiSize(zcu); - while (try ops[0].toRegClass(true, .general_purpose, cg) or - try ops[1].toRegClass(true, .general_purpose, cg)) - {} - const base_reg = ops[0].tracking(cg).short.register.to64(); - const rhs_reg = ops[1].tracking(cg).short.register.to64(); - if (!std.math.isPowerOfTwo(elem_size)) { - try cg.spillEflagsIfOccupied(); - try cg.asmRegisterRegisterImmediate( - .{ .i_, .mul }, - rhs_reg, - rhs_reg, - .u(elem_size), - ); - try cg.asmRegisterMemory(.{ ._, .lea }, base_reg, .{ - .base = .{ .reg = base_reg }, - .mod = .{ .rm = .{ .index = rhs_reg } }, - }); - } else if (elem_size > 8) { - try cg.spillEflagsIfOccupied(); - try cg.asmRegisterImmediate( - .{ ._l, .sh }, - rhs_reg, - .u(std.math.log2_int(u64, elem_size)), - ); - try cg.asmRegisterMemory(.{ ._, .lea }, base_reg, .{ - .base = .{ .reg = base_reg }, - .mod = .{ .rm = .{ .index = rhs_reg } }, - }); - } else try cg.asmRegisterMemory(.{ ._, .lea }, base_reg, .{ - .base = .{ .reg = base_reg }, - .mod = .{ .rm = .{ - .index = rhs_reg, - .scale = .fromFactor(@intCast(elem_size)), - } }, - }); - try ops[0].store(&ops[2], .{}, cg); - }, - else => |e| return e, - }; - for (ops) |op| try op.die(cg); - }, .runtime_nav_ptr => { const ty_nav = air_datas[@intFromEnum(inst)].ty_nav; const nav = ip.getNav(ty_nav.nav); @@ -180646,944 +180009,55 @@ fn airSelect(self: *CodeGen, inst: Air.Inst.Index) !void { return self.finishAir(inst, result, .{ pl_op.operand, extra.lhs, extra.rhs }); } -fn airShuffle(self: *CodeGen, inst: Air.Inst.Index) !void { +fn airAggregateInitBoolVec(self: *CodeGen, inst: Air.Inst.Index) !void { const pt = self.pt; const zcu = pt.zcu; + const result_ty = self.typeOfIndex(inst); + const len: usize = @intCast(result_ty.arrayLen(zcu)); const ty_pl = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_pl; - const extra = self.air.extraData(Air.Shuffle, ty_pl.payload).data; - - const dst_ty = self.typeOfIndex(inst); - const elem_ty = dst_ty.childType(zcu); - const elem_abi_size: u16 = @intCast(elem_ty.abiSize(zcu)); - const dst_abi_size: u32 = @intCast(dst_ty.abiSize(zcu)); - const lhs_ty = self.typeOf(extra.a); - const lhs_abi_size: u32 = @intCast(lhs_ty.abiSize(zcu)); - const rhs_ty = self.typeOf(extra.b); - const rhs_abi_size: u32 = @intCast(rhs_ty.abiSize(zcu)); - const max_abi_size = @max(dst_abi_size, lhs_abi_size, rhs_abi_size); - - const ExpectedContents = [32]?i32; - var stack align(@max(@alignOf(ExpectedContents), @alignOf(std.heap.StackFallbackAllocator(0)))) = - std.heap.stackFallback(@sizeOf(ExpectedContents), self.gpa); - const allocator = stack.get(); - - const mask_elems = try allocator.alloc(?i32, extra.mask_len); - defer allocator.free(mask_elems); - for (mask_elems, 0..) |*mask_elem, elem_index| { - const mask_elem_val = - Value.fromInterned(extra.mask).elemValue(pt, elem_index) catch unreachable; - mask_elem.* = if (mask_elem_val.isUndef(zcu)) - null - else - @intCast(mask_elem_val.toSignedInt(zcu)); - } - - const has_avx = self.hasFeature(.avx); - const result = @as(?MCValue, result: { - for (mask_elems) |mask_elem| { - if (mask_elem) |_| break; - } else break :result try self.allocRegOrMem(inst, true); - - for (mask_elems, 0..) |mask_elem, elem_index| { - if (mask_elem orelse continue != elem_index) break; - } else { - const lhs_mcv = try self.resolveInst(extra.a); - if (self.reuseOperand(inst, extra.a, 0, lhs_mcv)) break :result lhs_mcv; - const dst_mcv = try self.allocRegOrMem(inst, true); - try self.genCopy(dst_ty, dst_mcv, lhs_mcv, .{}); - break :result dst_mcv; - } - - for (mask_elems, 0..) |mask_elem, elem_index| { - if (~(mask_elem orelse continue) != elem_index) break; - } else { - const rhs_mcv = try self.resolveInst(extra.b); - if (self.reuseOperand(inst, extra.b, 1, rhs_mcv)) break :result rhs_mcv; - const dst_mcv = try self.allocRegOrMem(inst, true); - try self.genCopy(dst_ty, dst_mcv, rhs_mcv, .{}); - break :result dst_mcv; - } - - for ([_]Mir.Inst.Tag{ .unpckl, .unpckh }) |variant| unpck: { - if (elem_abi_size > 8) break :unpck; - if (dst_abi_size > self.vectorSize(if (elem_abi_size >= 4) .float else .int)) break :unpck; - - var sources: [2]?u1 = @splat(null); - for (mask_elems, 0..) |maybe_mask_elem, elem_index| { - const mask_elem = maybe_mask_elem orelse continue; - const mask_elem_index = - std.math.cast(u5, if (mask_elem < 0) ~mask_elem else mask_elem) orelse break :unpck; - const elem_byte = (elem_index >> 1) * elem_abi_size; - if (mask_elem_index * elem_abi_size != (elem_byte & 0b0111) | @as(u4, switch (variant) { - .unpckl => 0b0000, - .unpckh => 0b1000, - else => unreachable, - }) | (elem_byte << 1 & 0b10000)) break :unpck; - - const source = @intFromBool(mask_elem < 0); - if (sources[elem_index & 0b00001]) |prev_source| { - if (source != prev_source) break :unpck; - } else sources[elem_index & 0b00001] = source; - } - if (sources[0] orelse break :unpck == sources[1] orelse break :unpck) break :unpck; - - const operands = [2]Air.Inst.Ref{ extra.a, extra.b }; - const operand_tys = [2]Type{ lhs_ty, rhs_ty }; - const lhs_mcv = try self.resolveInst(operands[sources[0].?]); - const rhs_mcv = try self.resolveInst(operands[sources[1].?]); - - const dst_mcv: MCValue = if (lhs_mcv.isRegister() and - self.reuseOperand(inst, operands[sources[0].?], sources[0].?, lhs_mcv)) - lhs_mcv - else if (has_avx and lhs_mcv.isRegister()) - .{ .register = try self.register_manager.allocReg(inst, abi.RegisterClass.sse) } - else - try self.copyToRegisterWithInstTracking(inst, operand_tys[sources[0].?], lhs_mcv); - const dst_reg = dst_mcv.getReg().?; - const dst_alias = registerAlias(dst_reg, max_abi_size); - - const mir_tag: Mir.Inst.FixedTag = if ((elem_abi_size >= 4 and elem_ty.isRuntimeFloat()) or - (dst_abi_size > 16 and !self.hasFeature(.avx2))) .{ switch (elem_abi_size) { - 4 => if (has_avx) .v_ps else ._ps, - 8 => if (has_avx) .v_pd else ._pd, - else => unreachable, - }, variant } else .{ if (has_avx) .vp_ else .p_, switch (variant) { - .unpckl => switch (elem_abi_size) { - 1 => .unpcklbw, - 2 => .unpcklwd, - 4 => .unpckldq, - 8 => .unpcklqdq, - else => unreachable, - }, - .unpckh => switch (elem_abi_size) { - 1 => .unpckhbw, - 2 => .unpckhwd, - 4 => .unpckhdq, - 8 => .unpckhqdq, - else => unreachable, - }, - else => unreachable, - } }; - if (has_avx) if (rhs_mcv.isBase()) try self.asmRegisterRegisterMemory( - mir_tag, - dst_alias, - registerAlias(lhs_mcv.getReg() orelse dst_reg, max_abi_size), - try rhs_mcv.mem(self, .{ .size = .fromSize(max_abi_size) }), - ) else try self.asmRegisterRegisterRegister( - mir_tag, - dst_alias, - registerAlias(lhs_mcv.getReg() orelse dst_reg, max_abi_size), - registerAlias(if (rhs_mcv.isRegister()) - rhs_mcv.getReg().? - else - try self.copyToTmpRegister(operand_tys[sources[1].?], rhs_mcv), max_abi_size), - ) else if (rhs_mcv.isBase()) try self.asmRegisterMemory( - mir_tag, - dst_alias, - try rhs_mcv.mem(self, .{ .size = .fromSize(max_abi_size) }), - ) else try self.asmRegisterRegister( - mir_tag, - dst_alias, - registerAlias(if (rhs_mcv.isRegister()) - rhs_mcv.getReg().? - else - try self.copyToTmpRegister(operand_tys[sources[1].?], rhs_mcv), max_abi_size), - ); - break :result dst_mcv; - } - - pshufd: { - if (elem_abi_size != 4) break :pshufd; - if (max_abi_size > self.vectorSize(.float)) break :pshufd; - - var control: u8 = 0b00_00_00_00; - var sources: [1]?u1 = @splat(null); - for (mask_elems, 0..) |maybe_mask_elem, elem_index| { - const mask_elem = maybe_mask_elem orelse continue; - const mask_elem_index: u3 = @intCast(if (mask_elem < 0) ~mask_elem else mask_elem); - if (mask_elem_index & 0b100 != elem_index & 0b100) break :pshufd; - - const source = @intFromBool(mask_elem < 0); - if (sources[0]) |prev_source| { - if (source != prev_source) break :pshufd; - } else sources[(elem_index & 0b010) >> 1] = source; - - const select_bit: u3 = @intCast((elem_index & 0b011) << 1); - const select_mask = @as(u8, @intCast(mask_elem_index & 0b011)) << select_bit; - if (elem_index & 0b100 == 0) - control |= select_mask - else if (control & @as(u8, 0b11) << select_bit != select_mask) break :pshufd; - } - - const operands = [2]Air.Inst.Ref{ extra.a, extra.b }; - const operand_tys = [2]Type{ lhs_ty, rhs_ty }; - const src_mcv = try self.resolveInst(operands[sources[0] orelse break :pshufd]); - - const dst_reg = if (src_mcv.isRegister() and - self.reuseOperand(inst, operands[sources[0].?], sources[0].?, src_mcv)) - src_mcv.getReg().? - else - try self.register_manager.allocReg(inst, abi.RegisterClass.sse); - const dst_alias = registerAlias(dst_reg, max_abi_size); - - if (src_mcv.isBase()) try self.asmRegisterMemoryImmediate( - .{ if (has_avx) .vp_d else .p_d, .shuf }, - dst_alias, - try src_mcv.mem(self, .{ .size = .fromSize(max_abi_size) }), - .u(control), - ) else try self.asmRegisterRegisterImmediate( - .{ if (has_avx) .vp_d else .p_d, .shuf }, - dst_alias, - registerAlias(if (src_mcv.isRegister()) - src_mcv.getReg().? - else - try self.copyToTmpRegister(operand_tys[sources[0].?], src_mcv), max_abi_size), - .u(control), - ); - break :result .{ .register = dst_reg }; - } - - shufps: { - if (elem_abi_size != 4) break :shufps; - if (max_abi_size > self.vectorSize(.float)) break :shufps; - - var control: u8 = 0b00_00_00_00; - var sources: [2]?u1 = @splat(null); - for (mask_elems, 0..) |maybe_mask_elem, elem_index| { - const mask_elem = maybe_mask_elem orelse continue; - const mask_elem_index: u3 = @intCast(if (mask_elem < 0) ~mask_elem else mask_elem); - if (mask_elem_index & 0b100 != elem_index & 0b100) break :shufps; - - const source = @intFromBool(mask_elem < 0); - if (sources[(elem_index & 0b010) >> 1]) |prev_source| { - if (source != prev_source) break :shufps; - } else sources[(elem_index & 0b010) >> 1] = source; - - const select_bit: u3 = @intCast((elem_index & 0b011) << 1); - const select_mask = @as(u8, @intCast(mask_elem_index & 0b011)) << select_bit; - if (elem_index & 0b100 == 0) - control |= select_mask - else if (control & @as(u8, 0b11) << select_bit != select_mask) break :shufps; - } - if (sources[0] orelse break :shufps == sources[1] orelse break :shufps) break :shufps; - - const operands = [2]Air.Inst.Ref{ extra.a, extra.b }; - const operand_tys = [2]Type{ lhs_ty, rhs_ty }; - const lhs_mcv = try self.resolveInst(operands[sources[0].?]); - const rhs_mcv = try self.resolveInst(operands[sources[1].?]); - - const dst_mcv: MCValue = if (lhs_mcv.isRegister() and - self.reuseOperand(inst, operands[sources[0].?], sources[0].?, lhs_mcv)) - lhs_mcv - else if (has_avx and lhs_mcv.isRegister()) - .{ .register = try self.register_manager.allocReg(inst, abi.RegisterClass.sse) } - else - try self.copyToRegisterWithInstTracking(inst, operand_tys[sources[0].?], lhs_mcv); - const dst_reg = dst_mcv.getReg().?; - const dst_alias = registerAlias(dst_reg, max_abi_size); - - if (has_avx) if (rhs_mcv.isBase()) try self.asmRegisterRegisterMemoryImmediate( - .{ .v_ps, .shuf }, - dst_alias, - registerAlias(lhs_mcv.getReg() orelse dst_reg, max_abi_size), - try rhs_mcv.mem(self, .{ .size = .fromSize(max_abi_size) }), - .u(control), - ) else try self.asmRegisterRegisterRegisterImmediate( - .{ .v_ps, .shuf }, - dst_alias, - registerAlias(lhs_mcv.getReg() orelse dst_reg, max_abi_size), - registerAlias(if (rhs_mcv.isRegister()) - rhs_mcv.getReg().? - else - try self.copyToTmpRegister(operand_tys[sources[1].?], rhs_mcv), max_abi_size), - .u(control), - ) else if (rhs_mcv.isBase()) try self.asmRegisterMemoryImmediate( - .{ ._ps, .shuf }, - dst_alias, - try rhs_mcv.mem(self, .{ .size = .fromSize(max_abi_size) }), - .u(control), - ) else try self.asmRegisterRegisterImmediate( - .{ ._ps, .shuf }, - dst_alias, - registerAlias(if (rhs_mcv.isRegister()) - rhs_mcv.getReg().? - else - try self.copyToTmpRegister(operand_tys[sources[1].?], rhs_mcv), max_abi_size), - .u(control), - ); - break :result dst_mcv; - } - - shufpd: { - if (elem_abi_size != 8) break :shufpd; - if (max_abi_size > self.vectorSize(.float)) break :shufpd; - - var control: u4 = 0b0_0_0_0; - var sources: [2]?u1 = @splat(null); - for (mask_elems, 0..) |maybe_mask_elem, elem_index| { - const mask_elem = maybe_mask_elem orelse continue; - const mask_elem_index: u2 = @intCast(if (mask_elem < 0) ~mask_elem else mask_elem); - if (mask_elem_index & 0b10 != elem_index & 0b10) break :shufpd; - - const source = @intFromBool(mask_elem < 0); - if (sources[elem_index & 0b01]) |prev_source| { - if (source != prev_source) break :shufpd; - } else sources[elem_index & 0b01] = source; - - control |= @as(u4, @intCast(mask_elem_index & 0b01)) << @intCast(elem_index); - } - if (sources[0] orelse break :shufpd == sources[1] orelse break :shufpd) break :shufpd; - - const operands: [2]Air.Inst.Ref = .{ extra.a, extra.b }; - const operand_tys: [2]Type = .{ lhs_ty, rhs_ty }; - const lhs_mcv = try self.resolveInst(operands[sources[0].?]); - const rhs_mcv = try self.resolveInst(operands[sources[1].?]); - - const dst_mcv: MCValue = if (lhs_mcv.isRegister() and - self.reuseOperand(inst, operands[sources[0].?], sources[0].?, lhs_mcv)) - lhs_mcv - else if (has_avx and lhs_mcv.isRegister()) - .{ .register = try self.register_manager.allocReg(inst, abi.RegisterClass.sse) } - else - try self.copyToRegisterWithInstTracking(inst, operand_tys[sources[0].?], lhs_mcv); - const dst_reg = dst_mcv.getReg().?; - const dst_alias = registerAlias(dst_reg, max_abi_size); - - if (has_avx) if (rhs_mcv.isBase()) try self.asmRegisterRegisterMemoryImmediate( - .{ .v_pd, .shuf }, - dst_alias, - registerAlias(lhs_mcv.getReg() orelse dst_reg, max_abi_size), - try rhs_mcv.mem(self, .{ .size = .fromSize(max_abi_size) }), - .u(control), - ) else try self.asmRegisterRegisterRegisterImmediate( - .{ .v_pd, .shuf }, - dst_alias, - registerAlias(lhs_mcv.getReg() orelse dst_reg, max_abi_size), - registerAlias(if (rhs_mcv.isRegister()) - rhs_mcv.getReg().? - else - try self.copyToTmpRegister(operand_tys[sources[1].?], rhs_mcv), max_abi_size), - .u(control), - ) else if (rhs_mcv.isBase()) try self.asmRegisterMemoryImmediate( - .{ ._pd, .shuf }, - dst_alias, - try rhs_mcv.mem(self, .{ .size = .fromSize(max_abi_size) }), - .u(control), - ) else try self.asmRegisterRegisterImmediate( - .{ ._pd, .shuf }, - dst_alias, - registerAlias(if (rhs_mcv.isRegister()) - rhs_mcv.getReg().? - else - try self.copyToTmpRegister(operand_tys[sources[1].?], rhs_mcv), max_abi_size), - .u(control), - ); - break :result dst_mcv; - } - - blend: { - if (elem_abi_size < 2) break :blend; - if (dst_abi_size > self.vectorSize(.float)) break :blend; - if (!self.hasFeature(.sse4_1)) break :blend; - - var control: u8 = 0b0_0_0_0_0_0_0_0; - for (mask_elems, 0..) |maybe_mask_elem, elem_index| { - const mask_elem = maybe_mask_elem orelse continue; - const mask_elem_index = - std.math.cast(u4, if (mask_elem < 0) ~mask_elem else mask_elem) orelse break :blend; - if (mask_elem_index != elem_index) break :blend; - - const select_mask = @as(u8, @intFromBool(mask_elem < 0)) << @truncate(elem_index); - if (elem_index & 0b1000 == 0) - control |= select_mask - else if (control & @as(u8, 0b1) << @truncate(elem_index) != select_mask) break :blend; - } - - if (!elem_ty.isRuntimeFloat() and self.hasFeature(.avx2)) vpblendd: { - const expanded_control = switch (elem_abi_size) { - 4 => control, - 8 => @as(u8, if (control & 0b0001 != 0) 0b00_00_00_11 else 0b00_00_00_00) | - @as(u8, if (control & 0b0010 != 0) 0b00_00_11_00 else 0b00_00_00_00) | - @as(u8, if (control & 0b0100 != 0) 0b00_11_00_00 else 0b00_00_00_00) | - @as(u8, if (control & 0b1000 != 0) 0b11_00_00_00 else 0b00_00_00_00), - else => break :vpblendd, - }; - - const lhs_mcv = try self.resolveInst(extra.a); - const lhs_reg = if (lhs_mcv.isRegister()) - lhs_mcv.getReg().? - else - try self.copyToTmpRegister(dst_ty, lhs_mcv); - const lhs_lock = self.register_manager.lockReg(lhs_reg); - defer if (lhs_lock) |lock| self.register_manager.unlockReg(lock); - - const rhs_mcv = try self.resolveInst(extra.b); - const dst_reg = try self.register_manager.allocReg(inst, abi.RegisterClass.sse); - if (rhs_mcv.isBase()) try self.asmRegisterRegisterMemoryImmediate( - .{ .vp_d, .blend }, - registerAlias(dst_reg, dst_abi_size), - registerAlias(lhs_reg, dst_abi_size), - try rhs_mcv.mem(self, .{ .size = .fromSize(dst_abi_size) }), - .u(expanded_control), - ) else try self.asmRegisterRegisterRegisterImmediate( - .{ .vp_d, .blend }, - registerAlias(dst_reg, dst_abi_size), - registerAlias(lhs_reg, dst_abi_size), - registerAlias(if (rhs_mcv.isRegister()) - rhs_mcv.getReg().? - else - try self.copyToTmpRegister(dst_ty, rhs_mcv), dst_abi_size), - .u(expanded_control), - ); - break :result .{ .register = dst_reg }; - } - - if (!elem_ty.isRuntimeFloat() or elem_abi_size == 2) pblendw: { - const expanded_control = switch (elem_abi_size) { - 2 => control, - 4 => if (dst_abi_size <= 16 or - @as(u4, @intCast(control >> 4)) == @as(u4, @truncate(control >> 0))) - @as(u8, if (control & 0b0001 != 0) 0b00_00_00_11 else 0b00_00_00_00) | - @as(u8, if (control & 0b0010 != 0) 0b00_00_11_00 else 0b00_00_00_00) | - @as(u8, if (control & 0b0100 != 0) 0b00_11_00_00 else 0b00_00_00_00) | - @as(u8, if (control & 0b1000 != 0) 0b11_00_00_00 else 0b00_00_00_00) - else - break :pblendw, - 8 => if (dst_abi_size <= 16 or - @as(u2, @intCast(control >> 2)) == @as(u2, @truncate(control >> 0))) - @as(u8, if (control & 0b01 != 0) 0b0000_1111 else 0b0000_0000) | - @as(u8, if (control & 0b10 != 0) 0b1111_0000 else 0b0000_0000) - else - break :pblendw, - 16 => break :pblendw, - else => unreachable, - }; - - const lhs_mcv = try self.resolveInst(extra.a); - const rhs_mcv = try self.resolveInst(extra.b); - - const dst_mcv: MCValue = if (lhs_mcv.isRegister() and - self.reuseOperand(inst, extra.a, 0, lhs_mcv)) - lhs_mcv - else if (has_avx and lhs_mcv.isRegister()) - .{ .register = try self.register_manager.allocReg(inst, abi.RegisterClass.sse) } - else - try self.copyToRegisterWithInstTracking(inst, dst_ty, lhs_mcv); - const dst_reg = dst_mcv.getReg().?; - - if (has_avx) if (rhs_mcv.isBase()) try self.asmRegisterRegisterMemoryImmediate( - .{ .vp_w, .blend }, - registerAlias(dst_reg, dst_abi_size), - registerAlias(if (lhs_mcv.isRegister()) - lhs_mcv.getReg().? - else - dst_reg, dst_abi_size), - try rhs_mcv.mem(self, .{ .size = .fromSize(dst_abi_size) }), - .u(expanded_control), - ) else try self.asmRegisterRegisterRegisterImmediate( - .{ .vp_w, .blend }, - registerAlias(dst_reg, dst_abi_size), - registerAlias(if (lhs_mcv.isRegister()) - lhs_mcv.getReg().? - else - dst_reg, dst_abi_size), - registerAlias(if (rhs_mcv.isRegister()) - rhs_mcv.getReg().? - else - try self.copyToTmpRegister(dst_ty, rhs_mcv), dst_abi_size), - .u(expanded_control), - ) else if (rhs_mcv.isBase()) try self.asmRegisterMemoryImmediate( - .{ .p_w, .blend }, - registerAlias(dst_reg, dst_abi_size), - try rhs_mcv.mem(self, .{ .size = .fromSize(dst_abi_size) }), - .u(expanded_control), - ) else try self.asmRegisterRegisterImmediate( - .{ .p_w, .blend }, - registerAlias(dst_reg, dst_abi_size), - registerAlias(if (rhs_mcv.isRegister()) - rhs_mcv.getReg().? - else - try self.copyToTmpRegister(dst_ty, rhs_mcv), dst_abi_size), - .u(expanded_control), - ); - break :result .{ .register = dst_reg }; - } - - const expanded_control = switch (elem_abi_size) { - 4, 8 => control, - 16 => @as(u4, if (control & 0b01 != 0) 0b00_11 else 0b00_00) | - @as(u4, if (control & 0b10 != 0) 0b11_00 else 0b00_00), - else => unreachable, - }; - - const lhs_mcv = try self.resolveInst(extra.a); - const rhs_mcv = try self.resolveInst(extra.b); - - const dst_mcv: MCValue = if (lhs_mcv.isRegister() and - self.reuseOperand(inst, extra.a, 0, lhs_mcv)) - lhs_mcv - else if (has_avx and lhs_mcv.isRegister()) - .{ .register = try self.register_manager.allocReg(inst, abi.RegisterClass.sse) } - else - try self.copyToRegisterWithInstTracking(inst, dst_ty, lhs_mcv); - const dst_reg = dst_mcv.getReg().?; - - if (has_avx) if (rhs_mcv.isBase()) try self.asmRegisterRegisterMemoryImmediate( - switch (elem_abi_size) { - 4 => .{ .v_ps, .blend }, - 8, 16 => .{ .v_pd, .blend }, - else => unreachable, - }, - registerAlias(dst_reg, dst_abi_size), - registerAlias(if (lhs_mcv.isRegister()) - lhs_mcv.getReg().? - else - dst_reg, dst_abi_size), - try rhs_mcv.mem(self, .{ .size = .fromSize(dst_abi_size) }), - .u(expanded_control), - ) else try self.asmRegisterRegisterRegisterImmediate( - switch (elem_abi_size) { - 4 => .{ .v_ps, .blend }, - 8, 16 => .{ .v_pd, .blend }, - else => unreachable, - }, - registerAlias(dst_reg, dst_abi_size), - registerAlias(if (lhs_mcv.isRegister()) - lhs_mcv.getReg().? - else - dst_reg, dst_abi_size), - registerAlias(if (rhs_mcv.isRegister()) - rhs_mcv.getReg().? - else - try self.copyToTmpRegister(dst_ty, rhs_mcv), dst_abi_size), - .u(expanded_control), - ) else if (rhs_mcv.isBase()) try self.asmRegisterMemoryImmediate( - switch (elem_abi_size) { - 4 => .{ ._ps, .blend }, - 8, 16 => .{ ._pd, .blend }, - else => unreachable, - }, - registerAlias(dst_reg, dst_abi_size), - try rhs_mcv.mem(self, .{ .size = .fromSize(dst_abi_size) }), - .u(expanded_control), - ) else try self.asmRegisterRegisterImmediate( - switch (elem_abi_size) { - 4 => .{ ._ps, .blend }, - 8, 16 => .{ ._pd, .blend }, - else => unreachable, - }, - registerAlias(dst_reg, dst_abi_size), - registerAlias(if (rhs_mcv.isRegister()) - rhs_mcv.getReg().? - else - try self.copyToTmpRegister(dst_ty, rhs_mcv), dst_abi_size), - .u(expanded_control), - ); - break :result .{ .register = dst_reg }; - } - - blendv: { - if (dst_abi_size > self.vectorSize(if (elem_abi_size >= 4) .float else .int)) break :blendv; - - const select_mask_elem_ty = try pt.intType(.unsigned, elem_abi_size * 8); - const select_mask_ty = try pt.vectorType(.{ - .len = @intCast(mask_elems.len), - .child = select_mask_elem_ty.toIntern(), - }); - var select_mask_elems: [32]InternPool.Index = undefined; - for ( - select_mask_elems[0..mask_elems.len], - mask_elems, - 0.., - ) |*select_mask_elem, maybe_mask_elem, elem_index| { - const mask_elem = maybe_mask_elem orelse continue; - const mask_elem_index = - std.math.cast(u5, if (mask_elem < 0) ~mask_elem else mask_elem) orelse break :blendv; - if (mask_elem_index != elem_index) break :blendv; - - select_mask_elem.* = (if (mask_elem < 0) - try select_mask_elem_ty.maxIntScalar(pt, select_mask_elem_ty) - else - try select_mask_elem_ty.minIntScalar(pt, select_mask_elem_ty)).toIntern(); - } - const select_mask_mcv = try self.lowerValue( - try pt.aggregateValue(select_mask_ty, select_mask_elems[0..mask_elems.len]), - ); - - if (self.hasFeature(.sse4_1)) { - const mir_tag: Mir.Inst.FixedTag = .{ - if ((elem_abi_size >= 4 and elem_ty.isRuntimeFloat()) or - (dst_abi_size > 16 and !self.hasFeature(.avx2))) switch (elem_abi_size) { - 4 => if (has_avx) .v_ps else ._ps, - 8 => if (has_avx) .v_pd else ._pd, - else => unreachable, - } else if (has_avx) .vp_b else .p_b, - .blendv, - }; - - const select_mask_reg = if (!has_avx) reg: { - try self.register_manager.getKnownReg(.xmm0, null); - try self.genSetReg(.xmm0, select_mask_elem_ty, select_mask_mcv, .{}); - break :reg .xmm0; - } else try self.copyToTmpRegister(select_mask_ty, select_mask_mcv); - const select_mask_alias = registerAlias(select_mask_reg, dst_abi_size); - const select_mask_lock = self.register_manager.lockRegAssumeUnused(select_mask_reg); - defer self.register_manager.unlockReg(select_mask_lock); - - const lhs_mcv = try self.resolveInst(extra.a); - const rhs_mcv = try self.resolveInst(extra.b); - - const dst_mcv: MCValue = if (lhs_mcv.isRegister() and - self.reuseOperand(inst, extra.a, 0, lhs_mcv)) - lhs_mcv - else if (has_avx and lhs_mcv.isRegister()) - .{ .register = try self.register_manager.allocReg(inst, abi.RegisterClass.sse) } - else - try self.copyToRegisterWithInstTracking(inst, dst_ty, lhs_mcv); - const dst_reg = dst_mcv.getReg().?; - const dst_alias = registerAlias(dst_reg, dst_abi_size); - - if (has_avx) if (rhs_mcv.isBase()) try self.asmRegisterRegisterMemoryRegister( - mir_tag, - dst_alias, - if (lhs_mcv.isRegister()) - registerAlias(lhs_mcv.getReg().?, dst_abi_size) - else - dst_alias, - try rhs_mcv.mem(self, .{ .size = .fromSize(dst_abi_size) }), - select_mask_alias, - ) else try self.asmRegisterRegisterRegisterRegister( - mir_tag, - dst_alias, - if (lhs_mcv.isRegister()) - registerAlias(lhs_mcv.getReg().?, dst_abi_size) - else - dst_alias, - registerAlias(if (rhs_mcv.isRegister()) - rhs_mcv.getReg().? - else - try self.copyToTmpRegister(dst_ty, rhs_mcv), dst_abi_size), - select_mask_alias, - ) else if (rhs_mcv.isBase()) try self.asmRegisterMemoryRegister( - mir_tag, - dst_alias, - try rhs_mcv.mem(self, .{ .size = .fromSize(dst_abi_size) }), - select_mask_alias, - ) else try self.asmRegisterRegisterRegister( - mir_tag, - dst_alias, - registerAlias(if (rhs_mcv.isRegister()) - rhs_mcv.getReg().? - else - try self.copyToTmpRegister(dst_ty, rhs_mcv), dst_abi_size), - select_mask_alias, - ); - break :result dst_mcv; - } + const elements: []const Air.Inst.Ref = @ptrCast(self.air.extra.items[ty_pl.payload..][0..len]); - const lhs_mcv = try self.resolveInst(extra.a); - const rhs_mcv = try self.resolveInst(extra.b); + assert(result_ty.zigTypeTag(zcu) == .vector); + assert(result_ty.childType(zcu).toIntern() == .bool_type); - const dst_mcv: MCValue = if (rhs_mcv.isRegister() and - self.reuseOperand(inst, extra.b, 1, rhs_mcv)) - rhs_mcv - else - try self.copyToRegisterWithInstTracking(inst, dst_ty, rhs_mcv); - const dst_reg = dst_mcv.getReg().?; - const dst_alias = registerAlias(dst_reg, dst_abi_size); + const result_size = result_ty.abiSize(zcu); + if (result_size > 8) return self.fail("TODO airAggregateInitBoolVec over 8 bytes", .{}); - const mask_reg = try self.copyToTmpRegister(select_mask_ty, select_mask_mcv); - const mask_alias = registerAlias(mask_reg, dst_abi_size); - const mask_lock = self.register_manager.lockRegAssumeUnused(mask_reg); - defer self.register_manager.unlockReg(mask_lock); + const dst_reg = try self.register_manager.allocReg(inst, abi.RegisterClass.gp); - const mir_fixes: Mir.Inst.Fixes = if (elem_ty.isRuntimeFloat()) - switch (elem_ty.floatBits(self.target)) { - 16, 80, 128 => .p_, - 32 => ._ps, - 64 => ._pd, - else => unreachable, - } - else - .p_; - try self.asmRegisterRegister(.{ mir_fixes, .@"and" }, dst_alias, mask_alias); - if (lhs_mcv.isBase()) try self.asmRegisterMemory( - .{ mir_fixes, .andn }, - mask_alias, - try lhs_mcv.mem(self, .{ .size = .fromSize(dst_abi_size) }), - ) else try self.asmRegisterRegister( - .{ mir_fixes, .andn }, - mask_alias, - if (lhs_mcv.isRegister()) - lhs_mcv.getReg().? - else - try self.copyToTmpRegister(dst_ty, lhs_mcv), - ); - try self.asmRegisterRegister(.{ mir_fixes, .@"or" }, dst_alias, mask_alias); - break :result dst_mcv; - } + { + const dst_lock = self.register_manager.lockRegAssumeUnused(dst_reg); + defer self.register_manager.unlockReg(dst_lock); + try self.asmRegisterRegister( + .{ ._, .xor }, + registerAlias(dst_reg, @min(result_size, 4)), + registerAlias(dst_reg, @min(result_size, 4)), + ); - pshufb: { - if (max_abi_size > 16) break :pshufb; - if (!self.hasFeature(.ssse3)) break :pshufb; - - const temp_regs = - try self.register_manager.allocRegs(2, .{ inst, null }, abi.RegisterClass.sse); - const temp_locks = self.register_manager.lockRegsAssumeUnused(2, temp_regs); - defer for (temp_locks) |lock| self.register_manager.unlockReg(lock); - - const lhs_temp_alias = registerAlias(temp_regs[0], max_abi_size); - try self.genSetReg(temp_regs[0], lhs_ty, .{ .air_ref = extra.a }, .{}); - - const rhs_temp_alias = registerAlias(temp_regs[1], max_abi_size); - try self.genSetReg(temp_regs[1], rhs_ty, .{ .air_ref = extra.b }, .{}); - - var lhs_mask_elems: [16]InternPool.Index = undefined; - for (lhs_mask_elems[0..max_abi_size], 0..) |*lhs_mask_elem, byte_index| { - const elem_index = byte_index / elem_abi_size; - lhs_mask_elem.* = (try pt.intValue(.u8, if (elem_index >= mask_elems.len) 0b1_00_00000 else elem: { - const mask_elem = mask_elems[elem_index] orelse break :elem 0b1_00_00000; - if (mask_elem < 0) break :elem 0b1_00_00000; - const mask_elem_index: u31 = @intCast(mask_elem); - const byte_off: u32 = @intCast(byte_index % elem_abi_size); - break :elem mask_elem_index * elem_abi_size + byte_off; - })).toIntern(); - } - const lhs_mask_ty = try pt.vectorType(.{ .len = max_abi_size, .child = .u8_type }); - const lhs_mask_mcv = try self.lowerValue( - try pt.aggregateValue(lhs_mask_ty, lhs_mask_elems[0..max_abi_size]), - ); - const lhs_mask_mem: Memory = .{ - .base = .{ .reg = try self.copyToTmpRegister(.usize, lhs_mask_mcv.address()) }, - .mod = .{ .rm = .{ .size = .fromSize(@max(max_abi_size, 16)) } }, - }; - if (has_avx) try self.asmRegisterRegisterMemory( - .{ .vp_b, .shuf }, - lhs_temp_alias, - lhs_temp_alias, - lhs_mask_mem, - ) else try self.asmRegisterMemory( - .{ .p_b, .shuf }, - lhs_temp_alias, - lhs_mask_mem, - ); + for (elements, 0..) |elem, elem_i| { + const elem_reg = try self.copyToTmpRegister(.bool, .{ .air_ref = elem }); + const elem_lock = self.register_manager.lockRegAssumeUnused(elem_reg); + defer self.register_manager.unlockReg(elem_lock); - var rhs_mask_elems: [16]InternPool.Index = undefined; - for (rhs_mask_elems[0..max_abi_size], 0..) |*rhs_mask_elem, byte_index| { - const elem_index = byte_index / elem_abi_size; - rhs_mask_elem.* = (try pt.intValue(.u8, if (elem_index >= mask_elems.len) 0b1_00_00000 else elem: { - const mask_elem = mask_elems[elem_index] orelse break :elem 0b1_00_00000; - if (mask_elem >= 0) break :elem 0b1_00_00000; - const mask_elem_index: u31 = @intCast(~mask_elem); - const byte_off: u32 = @intCast(byte_index % elem_abi_size); - break :elem mask_elem_index * elem_abi_size + byte_off; - })).toIntern(); - } - const rhs_mask_ty = try pt.vectorType(.{ .len = max_abi_size, .child = .u8_type }); - const rhs_mask_mcv = try self.lowerValue( - try pt.aggregateValue(rhs_mask_ty, rhs_mask_elems[0..max_abi_size]), + try self.asmRegisterImmediate( + .{ ._, .@"and" }, + registerAlias(elem_reg, @min(result_size, 4)), + .u(1), ); - const rhs_mask_mem: Memory = .{ - .base = .{ .reg = try self.copyToTmpRegister(.usize, rhs_mask_mcv.address()) }, - .mod = .{ .rm = .{ .size = .fromSize(@max(max_abi_size, 16)) } }, - }; - if (has_avx) try self.asmRegisterRegisterMemory( - .{ .vp_b, .shuf }, - rhs_temp_alias, - rhs_temp_alias, - rhs_mask_mem, - ) else try self.asmRegisterMemory( - .{ .p_b, .shuf }, - rhs_temp_alias, - rhs_mask_mem, + if (elem_i > 0) try self.asmRegisterImmediate( + .{ ._l, .sh }, + registerAlias(elem_reg, @intCast(result_size)), + .u(@intCast(elem_i)), ); - - if (has_avx) try self.asmRegisterRegisterRegister( - .{ switch (elem_ty.zigTypeTag(zcu)) { - else => break :result null, - .int => .vp_, - .float => switch (elem_ty.floatBits(self.target)) { - 32 => .v_ps, - 64 => .v_pd, - 16, 80, 128 => break :result null, - else => unreachable, - }, - }, .@"or" }, - lhs_temp_alias, - lhs_temp_alias, - rhs_temp_alias, - ) else try self.asmRegisterRegister( - .{ switch (elem_ty.zigTypeTag(zcu)) { - else => break :result null, - .int => .p_, - .float => switch (elem_ty.floatBits(self.target)) { - 32 => ._ps, - 64 => ._pd, - 16, 80, 128 => break :result null, - else => unreachable, - }, - }, .@"or" }, - lhs_temp_alias, - rhs_temp_alias, + try self.asmRegisterRegister( + .{ ._, .@"or" }, + registerAlias(dst_reg, @intCast(result_size)), + registerAlias(elem_reg, @intCast(result_size)), ); - break :result .{ .register = temp_regs[0] }; } + } - break :result null; - }) orelse return self.fail("TODO implement airShuffle from {f} and {f} to {f} with {f}", .{ - lhs_ty.fmt(pt), - rhs_ty.fmt(pt), - dst_ty.fmt(pt), - Value.fromInterned(extra.mask).fmtValue(pt), - }); - return self.finishAir(inst, result, .{ extra.a, extra.b, .none }); -} - -fn airAggregateInit(self: *CodeGen, inst: Air.Inst.Index) !void { - const pt = self.pt; - const zcu = pt.zcu; - const result_ty = self.typeOfIndex(inst); - const len: usize = @intCast(result_ty.arrayLen(zcu)); - const ty_pl = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_pl; - const elements: []const Air.Inst.Ref = @ptrCast(self.air.extra.items[ty_pl.payload..][0..len]); - const result: MCValue = result: { - switch (result_ty.zigTypeTag(zcu)) { - .@"struct" => { - if (result_ty.containerLayout(zcu) == .@"packed") return self.fail( - "TODO implement airAggregateInit for {f}", - .{result_ty.fmt(pt)}, - ); - const frame_index = try self.allocFrameIndex(.initSpill(result_ty, zcu)); - const loaded_struct = zcu.intern_pool.loadStructType(result_ty.toIntern()); - try self.genInlineMemset( - .{ .lea_frame = .{ .index = frame_index } }, - .{ .immediate = 0 }, - .{ .immediate = result_ty.abiSize(zcu) }, - .{}, - ); - for (elements, 0..) |elem, elem_i_usize| { - const elem_i: u32 = @intCast(elem_i_usize); - if ((try result_ty.structFieldValueComptime(pt, elem_i)) != null) continue; - - const elem_ty = result_ty.fieldType(elem_i, zcu); - const elem_bit_size: u32 = @intCast(elem_ty.bitSize(zcu)); - if (elem_bit_size > 64) { - return self.fail( - "TODO airAggregateInit implement packed structs with large fields", - .{}, - ); - } - const elem_abi_size: u32 = @intCast(elem_ty.abiSize(zcu)); - const elem_abi_bits = elem_abi_size * 8; - const elem_off = zcu.structPackedFieldBitOffset(loaded_struct, elem_i); - const elem_byte_off: i32 = @intCast(elem_off / elem_abi_bits * elem_abi_size); - const elem_bit_off = elem_off % elem_abi_bits; - const elem_mcv = try self.resolveInst(elem); - const elem_lock = switch (elem_mcv) { - .register => |reg| self.register_manager.lockReg(reg), - .immediate => |imm| lock: { - if (imm == 0) continue; - break :lock null; - }, - else => null, - }; - defer if (elem_lock) |lock| self.register_manager.unlockReg(lock); - - const elem_extra_bits = self.regExtraBits(elem_ty); - { - const temp_reg = try self.copyToTmpRegister(elem_ty, elem_mcv); - const temp_alias = registerAlias(temp_reg, elem_abi_size); - const temp_lock = self.register_manager.lockRegAssumeUnused(temp_reg); - defer self.register_manager.unlockReg(temp_lock); - - if (elem_bit_off < elem_extra_bits) { - try self.truncateRegister(elem_ty, temp_alias); - } - if (elem_bit_off > 0) try self.genShiftBinOpMir( - .{ ._l, .sh }, - elem_ty, - .{ .register = temp_alias }, - .u8, - .{ .immediate = elem_bit_off }, - ); - try self.genBinOpMir( - .{ ._, .@"or" }, - elem_ty, - .{ .load_frame = .{ .index = frame_index, .off = elem_byte_off } }, - .{ .register = temp_alias }, - ); - } - if (elem_bit_off > elem_extra_bits) { - const temp_reg = try self.copyToTmpRegister(elem_ty, elem_mcv); - const temp_alias = registerAlias(temp_reg, elem_abi_size); - const temp_lock = self.register_manager.lockRegAssumeUnused(temp_reg); - defer self.register_manager.unlockReg(temp_lock); - - if (elem_extra_bits > 0) { - try self.truncateRegister(elem_ty, temp_alias); - } - try self.genShiftBinOpMir( - .{ ._r, .sh }, - elem_ty, - .{ .register = temp_reg }, - .u8, - .{ .immediate = elem_abi_bits - elem_bit_off }, - ); - try self.genBinOpMir( - .{ ._, .@"or" }, - elem_ty, - .{ .load_frame = .{ - .index = frame_index, - .off = elem_byte_off + @as(i32, @intCast(elem_abi_size)), - } }, - .{ .register = temp_alias }, - ); - } - } - break :result .{ .load_frame = .{ .index = frame_index } }; - }, - .vector => { - const elem_ty = result_ty.childType(zcu); - if (elem_ty.toIntern() != .bool_type) return self.fail( - "TODO implement airAggregateInit for {f}", - .{result_ty.fmt(pt)}, - ); - const result_size: u32 = @intCast(result_ty.abiSize(zcu)); - const dst_reg = try self.register_manager.allocReg(inst, abi.RegisterClass.gp); - const dst_lock = self.register_manager.lockRegAssumeUnused(dst_reg); - defer self.register_manager.unlockReg(dst_lock); - try self.asmRegisterRegister( - .{ ._, .xor }, - registerAlias(dst_reg, @min(result_size, 4)), - registerAlias(dst_reg, @min(result_size, 4)), - ); - - for (elements, 0..) |elem, elem_i| { - const elem_reg = try self.copyToTmpRegister(elem_ty, .{ .air_ref = elem }); - const elem_lock = self.register_manager.lockRegAssumeUnused(elem_reg); - defer self.register_manager.unlockReg(elem_lock); - - try self.asmRegisterImmediate( - .{ ._, .@"and" }, - registerAlias(elem_reg, @min(result_size, 4)), - .u(1), - ); - if (elem_i > 0) try self.asmRegisterImmediate( - .{ ._l, .sh }, - registerAlias(elem_reg, result_size), - .u(@intCast(elem_i)), - ); - try self.asmRegisterRegister( - .{ ._, .@"or" }, - registerAlias(dst_reg, result_size), - registerAlias(elem_reg, result_size), - ); - } - break :result .{ .register = dst_reg }; - }, - else => unreachable, - } - }; + const result: MCValue = .{ .register = dst_reg }; if (elements.len <= Air.Liveness.bpi - 1) { var buf: [Air.Liveness.bpi - 1]Air.Inst.Ref = @splat(.none); @@ -182269,15 +180743,6 @@ fn fail(cg: *CodeGen, comptime format: []const u8, args: anytype) error{ OutOfMe }; } -fn failMsg(cg: *CodeGen, msg: *Zcu.ErrorMsg) error{ OutOfMemory, CodegenFail } { - @branchHint(.cold); - const zcu = cg.pt.zcu; - return switch (cg.owner) { - .nav_index => |i| zcu.codegenFailMsg(i, msg), - .lazy_sym => |s| zcu.codegenFailTypeMsg(s.ty, msg), - }; -} - fn parseRegName(name: []const u8) ?Register { if (std.mem.startsWith(u8, name, "db")) return @enumFromInt( @intFromEnum(Register.dr0) + (std.fmt.parseInt(u4, name["db".len..], 0) catch return null), @@ -188819,7 +187284,6 @@ const Select = struct { const ptr_info = ty.ptrInfo(zcu); return switch (ptr_info.flags.vector_index) { .none => false, - .runtime => unreachable, else => ptr_info.child == .bool_type, }; }, @@ -188827,7 +187291,6 @@ const Select = struct { const ptr_info = ty.ptrInfo(zcu); return switch (ptr_info.flags.vector_index) { .none => false, - .runtime => unreachable, else => ptr_info.child == .bool_type and size.bitSize(cg.target) >= ptr_info.packed_offset.host_size, }; }, @@ -190814,7 +189277,7 @@ const Select = struct { .src0_elem_size_mul_src1 => @intCast(Select.Operand.Ref.src0.typeOf(s).elemType2(s.cg.pt.zcu).abiSize(s.cg.pt.zcu) * Select.Operand.Ref.src1.valueOf(s).immediate), .vector_index => switch (op.flags.base.ref.typeOf(s).ptrInfo(s.cg.pt.zcu).flags.vector_index) { - .none, .runtime => unreachable, + .none => unreachable, else => |vector_index| @intFromEnum(vector_index), }, .src1 => @intCast(Select.Operand.Ref.src1.valueOf(s).immediate), diff --git a/src/link/Dwarf.zig b/src/link/Dwarf.zig index e03517f97a2c..e8fea3c988c7 100644 --- a/src/link/Dwarf.zig +++ b/src/link/Dwarf.zig @@ -3158,11 +3158,7 @@ fn updateComptimeNavInner(dwarf: *Dwarf, pt: Zcu.PerThread, nav_index: InternPoo .struct_field else .struct_field); - if (loaded_struct.fieldName(ip, field_index).unwrap()) |field_name| try wip_nav.strp(field_name.toSlice(ip)) else { - var field_name_buf: [std.fmt.count("{d}", .{std.math.maxInt(u32)})]u8 = undefined; - const field_name = std.fmt.bufPrint(&field_name_buf, "{d}", .{field_index}) catch unreachable; - try wip_nav.strp(field_name); - } + try wip_nav.strp(loaded_struct.fieldName(ip, field_index).toSlice(ip)); try wip_nav.refType(field_type); if (!is_comptime) { try diw.writeUleb128(loaded_struct.offsets.get(ip)[field_index]); @@ -3187,7 +3183,7 @@ fn updateComptimeNavInner(dwarf: *Dwarf, pt: Zcu.PerThread, nav_index: InternPoo var field_bit_offset: u16 = 0; for (0..loaded_struct.field_types.len) |field_index| { try wip_nav.abbrevCode(.packed_struct_field); - try wip_nav.strp(loaded_struct.fieldName(ip, field_index).unwrap().?.toSlice(ip)); + try wip_nav.strp(loaded_struct.fieldName(ip, field_index).toSlice(ip)); const field_type: Type = .fromInterned(loaded_struct.field_types.get(ip)[field_index]); try wip_nav.refType(field_type); try diw.writeUleb128(field_bit_offset); @@ -4269,11 +4265,7 @@ fn updateLazyValue( .comptime_value_field_runtime_bits else continue); - if (loaded_struct_type.fieldName(ip, field_index).unwrap()) |field_name| try wip_nav.strp(field_name.toSlice(ip)) else { - var field_name_buf: [std.fmt.count("{d}", .{std.math.maxInt(u32)})]u8 = undefined; - const field_name = std.fmt.bufPrint(&field_name_buf, "{d}", .{field_index}) catch unreachable; - try wip_nav.strp(field_name); - } + try wip_nav.strp(loaded_struct_type.fieldName(ip, field_index).toSlice(ip)); const field_value: Value = .fromInterned(switch (aggregate.storage) { .bytes => unreachable, .elems => |elems| elems[field_index], @@ -4467,11 +4459,7 @@ fn updateContainerTypeWriterError( .struct_field else .struct_field); - if (loaded_struct.fieldName(ip, field_index).unwrap()) |field_name| try wip_nav.strp(field_name.toSlice(ip)) else { - var field_name_buf: [std.fmt.count("{d}", .{std.math.maxInt(u32)})]u8 = undefined; - const field_name = std.fmt.bufPrint(&field_name_buf, "{d}", .{field_index}) catch unreachable; - try wip_nav.strp(field_name); - } + try wip_nav.strp(loaded_struct.fieldName(ip, field_index).toSlice(ip)); try wip_nav.refType(field_type); if (!is_comptime) { try diw.writeUleb128(loaded_struct.offsets.get(ip)[field_index]); @@ -4573,11 +4561,7 @@ fn updateContainerTypeWriterError( .struct_field else .struct_field); - if (loaded_struct.fieldName(ip, field_index).unwrap()) |field_name| try wip_nav.strp(field_name.toSlice(ip)) else { - var field_name_buf: [std.fmt.count("{d}", .{std.math.maxInt(u32)})]u8 = undefined; - const field_name = std.fmt.bufPrint(&field_name_buf, "{d}", .{field_index}) catch unreachable; - try wip_nav.strp(field_name); - } + try wip_nav.strp(loaded_struct.fieldName(ip, field_index).toSlice(ip)); try wip_nav.refType(field_type); if (!is_comptime) { try diw.writeUleb128(loaded_struct.offsets.get(ip)[field_index]); @@ -4600,7 +4584,7 @@ fn updateContainerTypeWriterError( var field_bit_offset: u16 = 0; for (0..loaded_struct.field_types.len) |field_index| { try wip_nav.abbrevCode(.packed_struct_field); - try wip_nav.strp(loaded_struct.fieldName(ip, field_index).unwrap().?.toSlice(ip)); + try wip_nav.strp(loaded_struct.fieldName(ip, field_index).toSlice(ip)); const field_type: Type = .fromInterned(loaded_struct.field_types.get(ip)[field_index]); try wip_nav.refType(field_type); try diw.writeUleb128(field_bit_offset); From 20bd5e801826121ffa72ee54e3788bdb371b0fbc Mon Sep 17 00:00:00 2001 From: Matthew Lugg Date: Fri, 31 Oct 2025 11:20:43 +0000 Subject: [PATCH 3/8] compiler-rt: remove dead code `__addosi4`, `__addodi4`, `__addoti4`, `__subosi4`, `__subodi4`, and `__suboti4` were all functions which we invented for no apparent reason. Neither LLVM, nor GCC, nor the Zig compiler use these functions. It appears the functions were created in a kind of misunderstanding of an old language proposal; see https://github.com/ziglang/zig/pull/10824. There is no benefit to these functions existing; if a Zig compiler backend needs this operation, it is trivial to implement, and *far* simpler than calling a compiler-rt routine. Therefore, this commit deletes them. A small amount of that code was used by other parts of compiler-rt; the logic is trivial so has just been inlined where needed. I also chose to quickly implement `__addvdi3` (a standard function) because it is trivial and we already implement the `sub` parallel. --- CMakeLists.txt | 3 +- lib/compiler_rt.zig | 5 +- lib/compiler_rt/addo.zig | 46 ----------------- lib/compiler_rt/addodi4_test.zig | 77 ----------------------------- lib/compiler_rt/addosi4_test.zig | 78 ----------------------------- lib/compiler_rt/addoti4_test.zig | 80 ------------------------------ lib/compiler_rt/addvdi3.zig | 26 ++++++++++ lib/compiler_rt/addvsi3.zig | 8 +-- lib/compiler_rt/subo.zig | 47 ------------------ lib/compiler_rt/subodi4_test.zig | 81 ------------------------------ lib/compiler_rt/subosi4_test.zig | 82 ------------------------------- lib/compiler_rt/suboti4_test.zig | 84 -------------------------------- lib/compiler_rt/subvdi3.zig | 8 +-- lib/compiler_rt/subvsi3.zig | 8 +-- 14 files changed, 42 insertions(+), 591 deletions(-) delete mode 100644 lib/compiler_rt/addo.zig delete mode 100644 lib/compiler_rt/addodi4_test.zig delete mode 100644 lib/compiler_rt/addosi4_test.zig delete mode 100644 lib/compiler_rt/addoti4_test.zig create mode 100644 lib/compiler_rt/addvdi3.zig delete mode 100644 lib/compiler_rt/subo.zig delete mode 100644 lib/compiler_rt/subodi4_test.zig delete mode 100644 lib/compiler_rt/subosi4_test.zig delete mode 100644 lib/compiler_rt/suboti4_test.zig diff --git a/CMakeLists.txt b/CMakeLists.txt index 6bab57bd1fc8..7090f8852768 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -211,10 +211,10 @@ set(ZIG_STAGE2_SOURCES lib/compiler_rt/absvti2.zig lib/compiler_rt/adddf3.zig lib/compiler_rt/addf3.zig - lib/compiler_rt/addo.zig lib/compiler_rt/addsf3.zig lib/compiler_rt/addtf3.zig lib/compiler_rt/addvsi3.zig + lib/compiler_rt/addvdi3.zig lib/compiler_rt/addxf3.zig lib/compiler_rt/arm.zig lib/compiler_rt/atomics.zig @@ -354,7 +354,6 @@ set(ZIG_STAGE2_SOURCES lib/compiler_rt/sqrt.zig lib/compiler_rt/stack_probe.zig lib/compiler_rt/subdf3.zig - lib/compiler_rt/subo.zig lib/compiler_rt/subsf3.zig lib/compiler_rt/subtf3.zig lib/compiler_rt/subvdi3.zig diff --git a/lib/compiler_rt.zig b/lib/compiler_rt.zig index aac81bf414f4..040d2c6c411f 100644 --- a/lib/compiler_rt.zig +++ b/lib/compiler_rt.zig @@ -28,12 +28,13 @@ comptime { _ = @import("compiler_rt/negv.zig"); _ = @import("compiler_rt/addvsi3.zig"); + _ = @import("compiler_rt/addvdi3.zig"); + _ = @import("compiler_rt/subvsi3.zig"); _ = @import("compiler_rt/subvdi3.zig"); + _ = @import("compiler_rt/mulvsi3.zig"); - _ = @import("compiler_rt/addo.zig"); - _ = @import("compiler_rt/subo.zig"); _ = @import("compiler_rt/mulo.zig"); // Float routines diff --git a/lib/compiler_rt/addo.zig b/lib/compiler_rt/addo.zig deleted file mode 100644 index 610d6206904b..000000000000 --- a/lib/compiler_rt/addo.zig +++ /dev/null @@ -1,46 +0,0 @@ -const std = @import("std"); -const common = @import("./common.zig"); -pub const panic = @import("common.zig").panic; - -comptime { - @export(&__addosi4, .{ .name = "__addosi4", .linkage = common.linkage, .visibility = common.visibility }); - @export(&__addodi4, .{ .name = "__addodi4", .linkage = common.linkage, .visibility = common.visibility }); - @export(&__addoti4, .{ .name = "__addoti4", .linkage = common.linkage, .visibility = common.visibility }); -} - -// addo - add overflow -// * return a+%b. -// * return if a+b overflows => 1 else => 0 -// - addoXi4_generic as default - -inline fn addoXi4_generic(comptime ST: type, a: ST, b: ST, overflow: *c_int) ST { - @setRuntimeSafety(common.test_safety); - overflow.* = 0; - const sum: ST = a +% b; - // Hackers Delight: section Overflow Detection, subsection Signed Add/Subtract - // Let sum = a +% b == a + b + carry == wraparound addition. - // Overflow in a+b+carry occurs, iff a and b have opposite signs - // and the sign of a+b+carry is the same as a (or equivalently b). - // Slower routine: res = ~(a ^ b) & ((sum ^ a) - // Faster routine: res = (sum ^ a) & (sum ^ b) - // Overflow occurred, iff (res < 0) - if (((sum ^ a) & (sum ^ b)) < 0) - overflow.* = 1; - return sum; -} - -pub fn __addosi4(a: i32, b: i32, overflow: *c_int) callconv(.c) i32 { - return addoXi4_generic(i32, a, b, overflow); -} -pub fn __addodi4(a: i64, b: i64, overflow: *c_int) callconv(.c) i64 { - return addoXi4_generic(i64, a, b, overflow); -} -pub fn __addoti4(a: i128, b: i128, overflow: *c_int) callconv(.c) i128 { - return addoXi4_generic(i128, a, b, overflow); -} - -test { - _ = @import("addosi4_test.zig"); - _ = @import("addodi4_test.zig"); - _ = @import("addoti4_test.zig"); -} diff --git a/lib/compiler_rt/addodi4_test.zig b/lib/compiler_rt/addodi4_test.zig deleted file mode 100644 index 92f8e9c1f26f..000000000000 --- a/lib/compiler_rt/addodi4_test.zig +++ /dev/null @@ -1,77 +0,0 @@ -const addv = @import("addo.zig"); -const std = @import("std"); -const testing = std.testing; -const math = std.math; - -fn test__addodi4(a: i64, b: i64) !void { - var result_ov: c_int = undefined; - var expected_ov: c_int = undefined; - const result = addv.__addodi4(a, b, &result_ov); - const expected: i64 = simple_addodi4(a, b, &expected_ov); - try testing.expectEqual(expected, result); - try testing.expectEqual(expected_ov, result_ov); -} - -fn simple_addodi4(a: i64, b: i64, overflow: *c_int) i64 { - overflow.* = 0; - const min: i64 = math.minInt(i64); - const max: i64 = math.maxInt(i64); - if (((a > 0) and (b > max - a)) or - ((a < 0) and (b < min - a))) - overflow.* = 1; - return a +% b; -} - -test "addodi4" { - const min: i64 = math.minInt(i64); - const max: i64 = math.maxInt(i64); - var i: i64 = 1; - while (i < max) : (i *|= 2) { - try test__addodi4(i, i); - try test__addodi4(-i, -i); - try test__addodi4(i, -i); - try test__addodi4(-i, i); - } - - // edge cases - // 0 + 0 = 0 - // MIN + MIN overflow - // MAX + MAX overflow - // 0 + MIN MIN - // 0 + MAX MAX - // MIN + 0 MIN - // MAX + 0 MAX - // MIN + MAX -1 - // MAX + MIN -1 - try test__addodi4(0, 0); - try test__addodi4(min, min); - try test__addodi4(max, max); - try test__addodi4(0, min); - try test__addodi4(0, max); - try test__addodi4(min, 0); - try test__addodi4(max, 0); - try test__addodi4(min, max); - try test__addodi4(max, min); - - // derived edge cases - // MIN+1 + MIN overflow - // MAX-1 + MAX overflow - // 1 + MIN = MIN+1 - // -1 + MIN overflow - // -1 + MAX = MAX-1 - // +1 + MAX overflow - // MIN + 1 = MIN+1 - // MIN + -1 overflow - // MAX + 1 overflow - // MAX + -1 = MAX-1 - try test__addodi4(min + 1, min); - try test__addodi4(max - 1, max); - try test__addodi4(1, min); - try test__addodi4(-1, min); - try test__addodi4(-1, max); - try test__addodi4(1, max); - try test__addodi4(min, 1); - try test__addodi4(min, -1); - try test__addodi4(max, -1); - try test__addodi4(max, 1); -} diff --git a/lib/compiler_rt/addosi4_test.zig b/lib/compiler_rt/addosi4_test.zig deleted file mode 100644 index 3494909f50a6..000000000000 --- a/lib/compiler_rt/addosi4_test.zig +++ /dev/null @@ -1,78 +0,0 @@ -const addv = @import("addo.zig"); -const testing = @import("std").testing; - -fn test__addosi4(a: i32, b: i32) !void { - var result_ov: c_int = undefined; - var expected_ov: c_int = undefined; - const result = addv.__addosi4(a, b, &result_ov); - const expected: i32 = simple_addosi4(a, b, &expected_ov); - try testing.expectEqual(expected, result); - try testing.expectEqual(expected_ov, result_ov); -} - -fn simple_addosi4(a: i32, b: i32, overflow: *c_int) i32 { - overflow.* = 0; - const min: i32 = -2147483648; - const max: i32 = 2147483647; - if (((a > 0) and (b > max - a)) or - ((a < 0) and (b < min - a))) - overflow.* = 1; - return a +% b; -} - -test "addosi4" { - // -2^31 <= i32 <= 2^31-1 - // 2^31 = 2147483648 - // 2^31-1 = 2147483647 - const min: i32 = -2147483648; - const max: i32 = 2147483647; - var i: i32 = 1; - while (i < max) : (i *|= 2) { - try test__addosi4(i, i); - try test__addosi4(-i, -i); - try test__addosi4(i, -i); - try test__addosi4(-i, i); - } - - // edge cases - // 0 + 0 = 0 - // MIN + MIN overflow - // MAX + MAX overflow - // 0 + MIN MIN - // 0 + MAX MAX - // MIN + 0 MIN - // MAX + 0 MAX - // MIN + MAX -1 - // MAX + MIN -1 - try test__addosi4(0, 0); - try test__addosi4(min, min); - try test__addosi4(max, max); - try test__addosi4(0, min); - try test__addosi4(0, max); - try test__addosi4(min, 0); - try test__addosi4(max, 0); - try test__addosi4(min, max); - try test__addosi4(max, min); - - // derived edge cases - // MIN+1 + MIN overflow - // MAX-1 + MAX overflow - // 1 + MIN = MIN+1 - // -1 + MIN overflow - // -1 + MAX = MAX-1 - // +1 + MAX overflow - // MIN + 1 = MIN+1 - // MIN + -1 overflow - // MAX + 1 overflow - // MAX + -1 = MAX-1 - try test__addosi4(min + 1, min); - try test__addosi4(max - 1, max); - try test__addosi4(1, min); - try test__addosi4(-1, min); - try test__addosi4(-1, max); - try test__addosi4(1, max); - try test__addosi4(min, 1); - try test__addosi4(min, -1); - try test__addosi4(max, -1); - try test__addosi4(max, 1); -} diff --git a/lib/compiler_rt/addoti4_test.zig b/lib/compiler_rt/addoti4_test.zig deleted file mode 100644 index d031d1d428a2..000000000000 --- a/lib/compiler_rt/addoti4_test.zig +++ /dev/null @@ -1,80 +0,0 @@ -const addv = @import("addo.zig"); -const builtin = @import("builtin"); -const std = @import("std"); -const testing = std.testing; -const math = std.math; - -fn test__addoti4(a: i128, b: i128) !void { - var result_ov: c_int = undefined; - var expected_ov: c_int = undefined; - const result = addv.__addoti4(a, b, &result_ov); - const expected: i128 = simple_addoti4(a, b, &expected_ov); - try testing.expectEqual(expected, result); - try testing.expectEqual(expected_ov, result_ov); -} - -fn simple_addoti4(a: i128, b: i128, overflow: *c_int) i128 { - overflow.* = 0; - const min: i128 = math.minInt(i128); - const max: i128 = math.maxInt(i128); - if (((a > 0) and (b > max - a)) or - ((a < 0) and (b < min - a))) - overflow.* = 1; - return a +% b; -} - -test "addoti4" { - if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; - - const min: i128 = math.minInt(i128); - const max: i128 = math.maxInt(i128); - var i: i128 = 1; - while (i < max) : (i *|= 2) { - try test__addoti4(i, i); - try test__addoti4(-i, -i); - try test__addoti4(i, -i); - try test__addoti4(-i, i); - } - - // edge cases - // 0 + 0 = 0 - // MIN + MIN overflow - // MAX + MAX overflow - // 0 + MIN MIN - // 0 + MAX MAX - // MIN + 0 MIN - // MAX + 0 MAX - // MIN + MAX -1 - // MAX + MIN -1 - try test__addoti4(0, 0); - try test__addoti4(min, min); - try test__addoti4(max, max); - try test__addoti4(0, min); - try test__addoti4(0, max); - try test__addoti4(min, 0); - try test__addoti4(max, 0); - try test__addoti4(min, max); - try test__addoti4(max, min); - - // derived edge cases - // MIN+1 + MIN overflow - // MAX-1 + MAX overflow - // 1 + MIN = MIN+1 - // -1 + MIN overflow - // -1 + MAX = MAX-1 - // +1 + MAX overflow - // MIN + 1 = MIN+1 - // MIN + -1 overflow - // MAX + 1 overflow - // MAX + -1 = MAX-1 - try test__addoti4(min + 1, min); - try test__addoti4(max - 1, max); - try test__addoti4(1, min); - try test__addoti4(-1, min); - try test__addoti4(-1, max); - try test__addoti4(1, max); - try test__addoti4(min, 1); - try test__addoti4(min, -1); - try test__addoti4(max, -1); - try test__addoti4(max, 1); -} diff --git a/lib/compiler_rt/addvdi3.zig b/lib/compiler_rt/addvdi3.zig new file mode 100644 index 000000000000..03aa9b91c717 --- /dev/null +++ b/lib/compiler_rt/addvdi3.zig @@ -0,0 +1,26 @@ +const common = @import("./common.zig"); +const testing = @import("std").testing; + +pub const panic = common.panic; + +comptime { + @export(&__addvdi3, .{ .name = "__addvdi3", .linkage = common.linkage, .visibility = common.visibility }); +} + +pub fn __addvdi3(a: i64, b: i64) callconv(.c) i64 { + const sum = a +% b; + // Overflow occurred iff both operands have the same sign, and the sign of the sum does + // not match it. In other words, iff the sum sign is not the sign of either operand. + if (((sum ^ a) & (sum ^ b)) < 0) @panic("compiler-rt: integer overflow"); + return sum; +} + +test "addvdi3" { + // const min: i64 = -9223372036854775808 + // const max: i64 = 9223372036854775807 + // TODO write panic handler for testing panics + // try test__addvdi3(-9223372036854775808, -1, -1); // panic + // try test__addvdi3(9223372036854775807, 1, 1); // panic + try testing.expectEqual(-9223372036854775808, __addvdi3(-9223372036854775807, -1)); + try testing.expectEqual(9223372036854775807, __addvdi3(9223372036854775806, 1)); +} diff --git a/lib/compiler_rt/addvsi3.zig b/lib/compiler_rt/addvsi3.zig index 04c19881bc01..e688fdba5844 100644 --- a/lib/compiler_rt/addvsi3.zig +++ b/lib/compiler_rt/addvsi3.zig @@ -1,4 +1,3 @@ -const addv = @import("addo.zig"); const common = @import("./common.zig"); const testing = @import("std").testing; @@ -9,9 +8,10 @@ comptime { } pub fn __addvsi3(a: i32, b: i32) callconv(.c) i32 { - var overflow: c_int = 0; - const sum = addv.__addosi4(a, b, &overflow); - if (overflow != 0) @panic("compiler-rt: integer overflow"); + const sum = a +% b; + // Overflow occurred iff both operands have the same sign, and the sign of the sum does + // not match it. In other words, iff the sum sign is not the sign of either operand. + if (((sum ^ a) & (sum ^ b)) < 0) @panic("compiler-rt: integer overflow"); return sum; } diff --git a/lib/compiler_rt/subo.zig b/lib/compiler_rt/subo.zig deleted file mode 100644 index b4fb8f77106a..000000000000 --- a/lib/compiler_rt/subo.zig +++ /dev/null @@ -1,47 +0,0 @@ -//! subo - subtract overflow -//! * return a-%b. -//! * return if a-b overflows => 1 else => 0 -//! - suboXi4_generic as default - -const std = @import("std"); -const builtin = @import("builtin"); -const common = @import("common.zig"); - -pub const panic = common.panic; - -comptime { - @export(&__subosi4, .{ .name = "__subosi4", .linkage = common.linkage, .visibility = common.visibility }); - @export(&__subodi4, .{ .name = "__subodi4", .linkage = common.linkage, .visibility = common.visibility }); - @export(&__suboti4, .{ .name = "__suboti4", .linkage = common.linkage, .visibility = common.visibility }); -} - -pub fn __subosi4(a: i32, b: i32, overflow: *c_int) callconv(.c) i32 { - return suboXi4_generic(i32, a, b, overflow); -} -pub fn __subodi4(a: i64, b: i64, overflow: *c_int) callconv(.c) i64 { - return suboXi4_generic(i64, a, b, overflow); -} -pub fn __suboti4(a: i128, b: i128, overflow: *c_int) callconv(.c) i128 { - return suboXi4_generic(i128, a, b, overflow); -} - -inline fn suboXi4_generic(comptime ST: type, a: ST, b: ST, overflow: *c_int) ST { - overflow.* = 0; - const sum: ST = a -% b; - // Hackers Delight: section Overflow Detection, subsection Signed Add/Subtract - // Let sum = a -% b == a - b - carry == wraparound subtraction. - // Overflow in a-b-carry occurs, iff a and b have opposite signs - // and the sign of a-b-carry is opposite of a (or equivalently same as b). - // Faster routine: res = (a ^ b) & (sum ^ a) - // Slower routine: res = (sum^a) & ~(sum^b) - // Overflow occurred, iff (res < 0) - if (((a ^ b) & (sum ^ a)) < 0) - overflow.* = 1; - return sum; -} - -test { - _ = @import("subosi4_test.zig"); - _ = @import("subodi4_test.zig"); - _ = @import("suboti4_test.zig"); -} diff --git a/lib/compiler_rt/subodi4_test.zig b/lib/compiler_rt/subodi4_test.zig deleted file mode 100644 index 2dd717e14b4b..000000000000 --- a/lib/compiler_rt/subodi4_test.zig +++ /dev/null @@ -1,81 +0,0 @@ -const subo = @import("subo.zig"); -const std = @import("std"); -const testing = std.testing; -const math = std.math; - -fn test__subodi4(a: i64, b: i64) !void { - var result_ov: c_int = undefined; - var expected_ov: c_int = undefined; - const result = subo.__subodi4(a, b, &result_ov); - const expected: i64 = simple_subodi4(a, b, &expected_ov); - try testing.expectEqual(expected, result); - try testing.expectEqual(expected_ov, result_ov); -} - -// 2 cases on evaluating `a-b`: -// 1. `a-b` may underflow, iff b>0 && a<0 and a-b < min <=> a0 and a-b > max <=> a>max+b -// `-b` evaluation may overflow, iff b==min, but this is handled by the hardware -pub fn simple_subodi4(a: i64, b: i64, overflow: *c_int) i64 { - overflow.* = 0; - const min: i64 = math.minInt(i64); - const max: i64 = math.maxInt(i64); - if (((b > 0) and (a < min + b)) or - ((b < 0) and (a > max + b))) - overflow.* = 1; - return a -% b; -} - -test "subodi3" { - const min: i64 = math.minInt(i64); - const max: i64 = math.maxInt(i64); - var i: i64 = 1; - while (i < max) : (i *|= 2) { - try test__subodi4(i, i); - try test__subodi4(-i, -i); - try test__subodi4(i, -i); - try test__subodi4(-i, i); - } - - // edge cases - // 0 - 0 = 0 - // MIN - MIN = 0 - // MAX - MAX = 0 - // 0 - MIN overflow - // 0 - MAX = MIN+1 - // MIN - 0 = MIN - // MAX - 0 = MAX - // MIN - MAX overflow - // MAX - MIN overflow - try test__subodi4(0, 0); - try test__subodi4(min, min); - try test__subodi4(max, max); - try test__subodi4(0, min); - try test__subodi4(0, max); - try test__subodi4(min, 0); - try test__subodi4(max, 0); - try test__subodi4(min, max); - try test__subodi4(max, min); - - // derived edge cases - // MIN+1 - MIN = 1 - // MAX-1 - MAX = -1 - // 1 - MIN overflow - // -1 - MIN = MAX - // -1 - MAX = MIN - // +1 - MAX = MIN+2 - // MIN - 1 overflow - // MIN - -1 = MIN+1 - // MAX - 1 = MAX-1 - // MAX - -1 overflow - try test__subodi4(min + 1, min); - try test__subodi4(max - 1, max); - try test__subodi4(1, min); - try test__subodi4(-1, min); - try test__subodi4(-1, max); - try test__subodi4(1, max); - try test__subodi4(min, 1); - try test__subodi4(min, -1); - try test__subodi4(max, -1); - try test__subodi4(max, 1); -} diff --git a/lib/compiler_rt/subosi4_test.zig b/lib/compiler_rt/subosi4_test.zig deleted file mode 100644 index 8644e8100ef6..000000000000 --- a/lib/compiler_rt/subosi4_test.zig +++ /dev/null @@ -1,82 +0,0 @@ -const subo = @import("subo.zig"); -const testing = @import("std").testing; - -fn test__subosi4(a: i32, b: i32) !void { - var result_ov: c_int = undefined; - var expected_ov: c_int = undefined; - const result = subo.__subosi4(a, b, &result_ov); - const expected: i32 = simple_subosi4(a, b, &expected_ov); - try testing.expectEqual(expected, result); - try testing.expectEqual(expected_ov, result_ov); -} - -// 2 cases on evaluating `a-b`: -// 1. `a-b` may underflow, iff b>0 && a<0 and a-b < min <=> a0 and a-b > max <=> a>max+b -// `-b` evaluation may overflow, iff b==min, but this is handled by the hardware -pub fn simple_subosi4(a: i32, b: i32, overflow: *c_int) i32 { - overflow.* = 0; - const min: i32 = -2147483648; - const max: i32 = 2147483647; - if (((b > 0) and (a < min + b)) or - ((b < 0) and (a > max + b))) - overflow.* = 1; - return a -% b; -} - -test "subosi3" { - // -2^31 <= i32 <= 2^31-1 - // 2^31 = 2147483648 - // 2^31-1 = 2147483647 - const min: i32 = -2147483648; - const max: i32 = 2147483647; - var i: i32 = 1; - while (i < max) : (i *|= 2) { - try test__subosi4(i, i); - try test__subosi4(-i, -i); - try test__subosi4(i, -i); - try test__subosi4(-i, i); - } - - // edge cases - // 0 - 0 = 0 - // MIN - MIN = 0 - // MAX - MAX = 0 - // 0 - MIN overflow - // 0 - MAX = MIN+1 - // MIN - 0 = MIN - // MAX - 0 = MAX - // MIN - MAX overflow - // MAX - MIN overflow - try test__subosi4(0, 0); - try test__subosi4(min, min); - try test__subosi4(max, max); - try test__subosi4(0, min); - try test__subosi4(0, max); - try test__subosi4(min, 0); - try test__subosi4(max, 0); - try test__subosi4(min, max); - try test__subosi4(max, min); - - // derived edge cases - // MIN+1 - MIN = 1 - // MAX-1 - MAX = -1 - // 1 - MIN overflow - // -1 - MIN = MAX - // -1 - MAX = MIN - // +1 - MAX = MIN+2 - // MIN - 1 overflow - // MIN - -1 = MIN+1 - // MAX - 1 = MAX-1 - // MAX - -1 overflow - try test__subosi4(min + 1, min); - try test__subosi4(max - 1, max); - try test__subosi4(1, min); - try test__subosi4(-1, min); - try test__subosi4(-1, max); - try test__subosi4(1, max); - try test__subosi4(min, 1); - try test__subosi4(min, -1); - try test__subosi4(max, -1); - try test__subosi4(max, 1); -} diff --git a/lib/compiler_rt/suboti4_test.zig b/lib/compiler_rt/suboti4_test.zig deleted file mode 100644 index 65018bc966e5..000000000000 --- a/lib/compiler_rt/suboti4_test.zig +++ /dev/null @@ -1,84 +0,0 @@ -const subo = @import("subo.zig"); -const builtin = @import("builtin"); -const std = @import("std"); -const testing = std.testing; -const math = std.math; - -fn test__suboti4(a: i128, b: i128) !void { - var result_ov: c_int = undefined; - var expected_ov: c_int = undefined; - const result = subo.__suboti4(a, b, &result_ov); - const expected: i128 = simple_suboti4(a, b, &expected_ov); - try testing.expectEqual(expected, result); - try testing.expectEqual(expected_ov, result_ov); -} - -// 2 cases on evaluating `a-b`: -// 1. `a-b` may underflow, iff b>0 && a<0 and a-b < min <=> a0 and a-b > max <=> a>max+b -// `-b` evaluation may overflow, iff b==min, but this is handled by the hardware -pub fn simple_suboti4(a: i128, b: i128, overflow: *c_int) i128 { - overflow.* = 0; - const min: i128 = math.minInt(i128); - const max: i128 = math.maxInt(i128); - if (((b > 0) and (a < min + b)) or - ((b < 0) and (a > max + b))) - overflow.* = 1; - return a -% b; -} - -test "suboti3" { - if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; - - const min: i128 = math.minInt(i128); - const max: i128 = math.maxInt(i128); - var i: i128 = 1; - while (i < max) : (i *|= 2) { - try test__suboti4(i, i); - try test__suboti4(-i, -i); - try test__suboti4(i, -i); - try test__suboti4(-i, i); - } - - // edge cases - // 0 - 0 = 0 - // MIN - MIN = 0 - // MAX - MAX = 0 - // 0 - MIN overflow - // 0 - MAX = MIN+1 - // MIN - 0 = MIN - // MAX - 0 = MAX - // MIN - MAX overflow - // MAX - MIN overflow - try test__suboti4(0, 0); - try test__suboti4(min, min); - try test__suboti4(max, max); - try test__suboti4(0, min); - try test__suboti4(0, max); - try test__suboti4(min, 0); - try test__suboti4(max, 0); - try test__suboti4(min, max); - try test__suboti4(max, min); - - // derived edge cases - // MIN+1 - MIN = 1 - // MAX-1 - MAX = -1 - // 1 - MIN overflow - // -1 - MIN = MAX - // -1 - MAX = MIN - // +1 - MAX = MIN+2 - // MIN - 1 overflow - // MIN - -1 = MIN+1 - // MAX - 1 = MAX-1 - // MAX - -1 overflow - try test__suboti4(min + 1, min); - try test__suboti4(max - 1, max); - try test__suboti4(1, min); - try test__suboti4(-1, min); - try test__suboti4(-1, max); - try test__suboti4(1, max); - try test__suboti4(min, 1); - try test__suboti4(min, -1); - try test__suboti4(max, -1); - try test__suboti4(max, 1); -} diff --git a/lib/compiler_rt/subvdi3.zig b/lib/compiler_rt/subvdi3.zig index 8248e930222d..a34deb2da1bf 100644 --- a/lib/compiler_rt/subvdi3.zig +++ b/lib/compiler_rt/subvdi3.zig @@ -1,4 +1,3 @@ -const subv = @import("subo.zig"); const common = @import("./common.zig"); const testing = @import("std").testing; @@ -9,9 +8,10 @@ comptime { } pub fn __subvdi3(a: i64, b: i64) callconv(.c) i64 { - var overflow: c_int = 0; - const sum = subv.__subodi4(a, b, &overflow); - if (overflow != 0) @panic("compiler-rt: integer overflow"); + const sum = a -% b; + // Overflow occurred iff the operands have opposite signs, and the sign of the + // sum is the opposite of the lhs sign. + if (((a ^ b) & (sum ^ a)) < 0) @panic("compiler-rt: integer overflow"); return sum; } diff --git a/lib/compiler_rt/subvsi3.zig b/lib/compiler_rt/subvsi3.zig index 8a2ea6c6a612..c524a3a63499 100644 --- a/lib/compiler_rt/subvsi3.zig +++ b/lib/compiler_rt/subvsi3.zig @@ -1,4 +1,3 @@ -const subv = @import("subo.zig"); const common = @import("./common.zig"); const testing = @import("std").testing; @@ -9,9 +8,10 @@ comptime { } pub fn __subvsi3(a: i32, b: i32) callconv(.c) i32 { - var overflow: c_int = 0; - const sum = subv.__subosi4(a, b, &overflow); - if (overflow != 0) @panic("compiler-rt: integer overflow"); + const sum = a -% b; + // Overflow occurred iff the operands have opposite signs, and the sign of the + // sum is the opposite of the lhs sign. + if (((a ^ b) & (sum ^ a)) < 0) @panic("compiler-rt: integer overflow"); return sum; } From 6576c3b898fb57a588c4344211c06b079a82ee91 Mon Sep 17 00:00:00 2001 From: Matthew Lugg Date: Fri, 31 Oct 2025 13:01:42 +0000 Subject: [PATCH 4/8] x86_64: spill eflags when initializing bool vector --- src/codegen/x86_64/CodeGen.zig | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/codegen/x86_64/CodeGen.zig b/src/codegen/x86_64/CodeGen.zig index 8e54a3a8c0e4..94394185bd58 100644 --- a/src/codegen/x86_64/CodeGen.zig +++ b/src/codegen/x86_64/CodeGen.zig @@ -180028,6 +180028,8 @@ fn airAggregateInitBoolVec(self: *CodeGen, inst: Air.Inst.Index) !void { { const dst_lock = self.register_manager.lockRegAssumeUnused(dst_reg); defer self.register_manager.unlockReg(dst_lock); + + try self.spillEflagsIfOccupied(); try self.asmRegisterRegister( .{ ._, .xor }, registerAlias(dst_reg, @min(result_size, 4)), From 99a7884308d288bd39df9192c9094439b179ff60 Mon Sep 17 00:00:00 2001 From: Matthew Lugg Date: Fri, 31 Oct 2025 13:50:29 +0000 Subject: [PATCH 5/8] behavior: disable test on cbe This isn't so much a regression as it is foreshadowing of accepted proposal https://github.com/ziglang/zig/issues/24657. --- test/behavior/union.zig | 3 +++ 1 file changed, 3 insertions(+) diff --git a/test/behavior/union.zig b/test/behavior/union.zig index 27663feeb670..11356c09b76c 100644 --- a/test/behavior/union.zig +++ b/test/behavior/union.zig @@ -218,10 +218,13 @@ test "union with specified enum tag" { } test "packed union generates correctly aligned type" { + // This test will be removed after the following accepted proposal is implemented: + // https://github.com/ziglang/zig/issues/24657 if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_spirv) return error.SkipZigTest; if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest; + if (builtin.zig_backend == .stage2_c) return error.SkipZigTest; const U = packed union { f1: *const fn () error{TestUnexpectedResult}!void, From 69f39868b4125e79e4070a88bbdfcd3643dbc90d Mon Sep 17 00:00:00 2001 From: Matthew Lugg Date: Sun, 9 Nov 2025 15:16:49 +0000 Subject: [PATCH 6/8] Air.Legalize: revert to loops for scalarizations I had tried unrolling the loops to avoid requiring the `vector_store_elem` instruction, but it's arguably a problem to generate O(N) code for an operation on `@Vector(N, T)`. In addition, that lowering emitted a lot of `.aggregate_init` instructions, which is itself a quite difficult operation to codegen. This requires reintroducing runtime vector indexing internally. However, I've put it in a couple of instructions which are intended only for use by `Air.Legalize`, named `legalize_vec_elem_val` (like `array_elem_val`, but for indexing a vector with a runtime-known index) and `legalize_vec_store_elem` (like the old `vector_store_elem` instruction). These are explicitly documented as *not* being emitted by Sema, so need only be implemented by backends if they actually use an `Air.Legalize.Feature` which emits them (otherwise they can be marked as `unreachable`). --- src/Air.zig | 29 +- src/Air/Legalize.zig | 1106 +++++++++++++++++++++++-------- src/Air/Liveness.zig | 7 + src/Air/Liveness/Verify.zig | 6 + src/Air/print.zig | 14 + src/Air/types_resolved.zig | 2 + src/Sema.zig | 23 +- src/codegen/aarch64/Select.zig | 9 + src/codegen/c.zig | 4 + src/codegen/llvm.zig | 5 + src/codegen/riscv64/CodeGen.zig | 5 + src/codegen/sparc64/CodeGen.zig | 5 + src/codegen/wasm/CodeGen.zig | 4 + src/codegen/x86_64/CodeGen.zig | 630 +++++++++++++++++- 14 files changed, 1548 insertions(+), 301 deletions(-) diff --git a/src/Air.zig b/src/Air.zig index db5307f4591a..722ea28305ac 100644 --- a/src/Air.zig +++ b/src/Air.zig @@ -660,8 +660,8 @@ pub const Inst = struct { /// Given a pointer to a slice, return a pointer to the pointer of the slice. /// Uses the `ty_op` field. ptr_slice_ptr_ptr, - /// Given an (array value or vector value) and element index, - /// return the element value at that index. + /// Given an (array value or vector value) and element index, return the element value at + /// that index. If the lhs is a vector value, the index is guaranteed to be comptime-known. /// Result type is the element type of the array operand. /// Uses the `bin_op` field. array_elem_val, @@ -915,6 +915,26 @@ pub const Inst = struct { /// Operand is unused and set to Ref.none work_group_id, + // The remaining instructions are not emitted by Sema. They are only emitted by `Legalize`, + // depending on the enabled features. As such, backends can consider them `unreachable` if + // they do not enable the relevant legalizations. + + /// Given a pointer to a vector, a runtime-known index, and a scalar value, store the value + /// into the vector at the given index. Zig does not support this operation, but `Legalize` + /// may emit it when scalarizing vector operations. + /// + /// Uses the `pl_op` field with payload `Bin`. `operand` is the vector pointer. `lhs` is the + /// element index of type `usize`. `rhs` is the element value. Result is always void. + legalize_vec_store_elem, + /// Given a vector value and a runtime-known index, return the element value at that index. + /// This instruction is similar to `array_elem_val`; the only difference is that the index + /// here is runtime-known, which is usually not allowed for vectors. `Legalize` may emit + /// this instruction when scalarizing vector operations. + /// + /// Uses the `bin_op` field. `lhs` is the vector pointer. `rhs` is the element index. Result + /// type is the vector element type. + legalize_vec_elem_val, + pub fn fromCmpOp(op: std.math.CompareOperator, optimized: bool) Tag { switch (op) { .lt => return if (optimized) .cmp_lt_optimized else .cmp_lt, @@ -1681,6 +1701,7 @@ pub fn typeOfIndex(air: *const Air, inst: Air.Inst.Index, ip: *const InternPool) .prefetch, .set_err_return_trace, .c_va_end, + .legalize_vec_store_elem, => return .void, .slice_len, @@ -1699,7 +1720,7 @@ pub fn typeOfIndex(air: *const Air, inst: Air.Inst.Index, ip: *const InternPool) return .fromInterned(ip.funcTypeReturnType(callee_ty.toIntern())); }, - .slice_elem_val, .ptr_elem_val, .array_elem_val => { + .slice_elem_val, .ptr_elem_val, .array_elem_val, .legalize_vec_elem_val => { const ptr_ty = air.typeOf(datas[@intFromEnum(inst)].bin_op.lhs, ip); return ptr_ty.childTypeIp(ip); }, @@ -1857,6 +1878,7 @@ pub fn mustLower(air: Air, inst: Air.Inst.Index, ip: *const InternPool) bool { .intcast_safe, .int_from_float_safe, .int_from_float_optimized_safe, + .legalize_vec_store_elem, => true, .add, @@ -2002,6 +2024,7 @@ pub fn mustLower(air: Air, inst: Air.Inst.Index, ip: *const InternPool) bool { .work_item_id, .work_group_size, .work_group_id, + .legalize_vec_elem_val, => false, .is_non_null_ptr, .is_null_ptr, .is_non_err_ptr, .is_err_ptr => air.typeOf(data.un_op, ip).isVolatilePtrIp(ip), diff --git a/src/Air/Legalize.zig b/src/Air/Legalize.zig index a26ed8996473..1d935bd360df 100644 --- a/src/Air/Legalize.zig +++ b/src/Air/Legalize.zig @@ -320,28 +320,36 @@ fn legalizeBody(l: *Legalize, body_start: usize, body_len: usize) Error!void { .xor, => |air_tag| if (l.features.has(comptime .scalarize(air_tag))) { const bin_op = l.air_instructions.items(.data)[@intFromEnum(inst)].bin_op; - if (l.typeOf(bin_op.lhs).isVector(zcu)) continue :inst try l.scalarize(inst, .bin_op); + if (l.typeOf(bin_op.lhs).isVector(zcu)) { + continue :inst l.replaceInst(inst, .block, try l.scalarizeBlockPayload(inst, .bin_op)); + } }, .add_safe => if (l.features.has(.expand_add_safe)) { assert(!l.features.has(.scalarize_add_safe)); // it doesn't make sense to do both continue :inst l.replaceInst(inst, .block, try l.safeArithmeticBlockPayload(inst, .add_with_overflow)); } else if (l.features.has(.scalarize_add_safe)) { const bin_op = l.air_instructions.items(.data)[@intFromEnum(inst)].bin_op; - if (l.typeOf(bin_op.lhs).isVector(zcu)) continue :inst try l.scalarize(inst, .bin_op); + if (l.typeOf(bin_op.lhs).isVector(zcu)) { + continue :inst l.replaceInst(inst, .block, try l.scalarizeBlockPayload(inst, .bin_op)); + } }, .sub_safe => if (l.features.has(.expand_sub_safe)) { assert(!l.features.has(.scalarize_sub_safe)); // it doesn't make sense to do both continue :inst l.replaceInst(inst, .block, try l.safeArithmeticBlockPayload(inst, .sub_with_overflow)); } else if (l.features.has(.scalarize_sub_safe)) { const bin_op = l.air_instructions.items(.data)[@intFromEnum(inst)].bin_op; - if (l.typeOf(bin_op.lhs).isVector(zcu)) continue :inst try l.scalarize(inst, .bin_op); + if (l.typeOf(bin_op.lhs).isVector(zcu)) { + continue :inst l.replaceInst(inst, .block, try l.scalarizeBlockPayload(inst, .bin_op)); + } }, .mul_safe => if (l.features.has(.expand_mul_safe)) { assert(!l.features.has(.scalarize_mul_safe)); // it doesn't make sense to do both continue :inst l.replaceInst(inst, .block, try l.safeArithmeticBlockPayload(inst, .mul_with_overflow)); } else if (l.features.has(.scalarize_mul_safe)) { const bin_op = l.air_instructions.items(.data)[@intFromEnum(inst)].bin_op; - if (l.typeOf(bin_op.lhs).isVector(zcu)) continue :inst try l.scalarize(inst, .bin_op); + if (l.typeOf(bin_op.lhs).isVector(zcu)) { + continue :inst l.replaceInst(inst, .block, try l.scalarizeBlockPayload(inst, .bin_op)); + } }, .ptr_add, .ptr_sub => {}, inline .add_with_overflow, @@ -350,7 +358,9 @@ fn legalizeBody(l: *Legalize, body_start: usize, body_len: usize) Error!void { .shl_with_overflow, => |air_tag| if (l.features.has(comptime .scalarize(air_tag))) { const ty_pl = l.air_instructions.items(.data)[@intFromEnum(inst)].ty_pl; - if (ty_pl.ty.toType().fieldType(0, zcu).isVector(zcu)) continue :inst l.replaceInst(inst, .block, try l.scalarizeOverflowBlockPayload(inst)); + if (ty_pl.ty.toType().fieldType(0, zcu).isVector(zcu)) { + continue :inst l.replaceInst(inst, .block, try l.scalarizeOverflowBlockPayload(inst)); + } }, .alloc => {}, .inferred_alloc, .inferred_alloc_comptime => unreachable, @@ -387,7 +397,9 @@ fn legalizeBody(l: *Legalize, body_start: usize, body_len: usize) Error!void { } } } - if (l.features.has(comptime .scalarize(air_tag))) continue :inst try l.scalarize(inst, .bin_op); + if (l.features.has(comptime .scalarize(air_tag))) { + continue :inst l.replaceInst(inst, .block, try l.scalarizeBlockPayload(inst, .bin_op)); + } } }, inline .not, @@ -406,7 +418,9 @@ fn legalizeBody(l: *Legalize, body_start: usize, body_len: usize) Error!void { .float_from_int, => |air_tag| if (l.features.has(comptime .scalarize(air_tag))) { const ty_op = l.air_instructions.items(.data)[@intFromEnum(inst)].ty_op; - if (ty_op.ty.toType().isVector(zcu)) continue :inst try l.scalarize(inst, .ty_op); + if (ty_op.ty.toType().isVector(zcu)) { + continue :inst l.replaceInst(inst, .block, try l.scalarizeBlockPayload(inst, .ty_op)); + } }, .bitcast => if (l.features.has(.scalarize_bitcast)) { if (try l.scalarizeBitcastBlockPayload(inst)) |payload| { @@ -418,21 +432,27 @@ fn legalizeBody(l: *Legalize, body_start: usize, body_len: usize) Error!void { continue :inst l.replaceInst(inst, .block, try l.safeIntcastBlockPayload(inst)); } else if (l.features.has(.scalarize_intcast_safe)) { const ty_op = l.air_instructions.items(.data)[@intFromEnum(inst)].ty_op; - if (ty_op.ty.toType().isVector(zcu)) continue :inst try l.scalarize(inst, .ty_op); + if (ty_op.ty.toType().isVector(zcu)) { + continue :inst l.replaceInst(inst, .block, try l.scalarizeBlockPayload(inst, .ty_op)); + } }, .int_from_float_safe => if (l.features.has(.expand_int_from_float_safe)) { assert(!l.features.has(.scalarize_int_from_float_safe)); continue :inst l.replaceInst(inst, .block, try l.safeIntFromFloatBlockPayload(inst, false)); } else if (l.features.has(.scalarize_int_from_float_safe)) { const ty_op = l.air_instructions.items(.data)[@intFromEnum(inst)].ty_op; - if (ty_op.ty.toType().isVector(zcu)) continue :inst try l.scalarize(inst, .ty_op); + if (ty_op.ty.toType().isVector(zcu)) { + continue :inst l.replaceInst(inst, .block, try l.scalarizeBlockPayload(inst, .ty_op)); + } }, .int_from_float_optimized_safe => if (l.features.has(.expand_int_from_float_optimized_safe)) { assert(!l.features.has(.scalarize_int_from_float_optimized_safe)); continue :inst l.replaceInst(inst, .block, try l.safeIntFromFloatBlockPayload(inst, true)); } else if (l.features.has(.scalarize_int_from_float_optimized_safe)) { const ty_op = l.air_instructions.items(.data)[@intFromEnum(inst)].ty_op; - if (ty_op.ty.toType().isVector(zcu)) continue :inst try l.scalarize(inst, .ty_op); + if (ty_op.ty.toType().isVector(zcu)) { + continue :inst l.replaceInst(inst, .block, try l.scalarizeBlockPayload(inst, .ty_op)); + } }, .block, .loop => { const ty_pl = l.air_instructions.items(.data)[@intFromEnum(inst)].ty_pl; @@ -467,7 +487,9 @@ fn legalizeBody(l: *Legalize, body_start: usize, body_len: usize) Error!void { .neg_optimized, => |air_tag| if (l.features.has(comptime .scalarize(air_tag))) { const un_op = l.air_instructions.items(.data)[@intFromEnum(inst)].un_op; - if (l.typeOf(un_op).isVector(zcu)) continue :inst try l.scalarize(inst, .un_op); + if (l.typeOf(un_op).isVector(zcu)) { + continue :inst l.replaceInst(inst, .block, try l.scalarizeBlockPayload(inst, .un_op)); + } }, .cmp_lt, .cmp_lt_optimized, @@ -484,7 +506,9 @@ fn legalizeBody(l: *Legalize, body_start: usize, body_len: usize) Error!void { => {}, inline .cmp_vector, .cmp_vector_optimized => |air_tag| if (l.features.has(comptime .scalarize(air_tag))) { const ty_pl = l.air_instructions.items(.data)[@intFromEnum(inst)].ty_pl; - if (ty_pl.ty.toType().isVector(zcu)) continue :inst try l.scalarize(inst, .cmp_vector); + if (ty_pl.ty.toType().isVector(zcu)) { + continue :inst l.replaceInst(inst, .block, try l.scalarizeBlockPayload(inst, .cmp_vector)); + } }, .cond_br => { const pl_op = l.air_instructions.items(.data)[@intFromEnum(inst)].pl_op; @@ -614,9 +638,15 @@ fn legalizeBody(l: *Legalize, body_start: usize, body_len: usize) Error!void { else => {}, } }, - .shuffle_one => if (l.features.has(.scalarize_shuffle_one)) continue :inst try l.scalarize(inst, .shuffle_one), - .shuffle_two => if (l.features.has(.scalarize_shuffle_two)) continue :inst try l.scalarize(inst, .shuffle_two), - .select => if (l.features.has(.scalarize_select)) continue :inst try l.scalarize(inst, .select), + .shuffle_one => if (l.features.has(.scalarize_shuffle_one)) { + continue :inst l.replaceInst(inst, .block, try l.scalarizeShuffleOneBlockPayload(inst)); + }, + .shuffle_two => if (l.features.has(.scalarize_shuffle_two)) { + continue :inst l.replaceInst(inst, .block, try l.scalarizeShuffleTwoBlockPayload(inst)); + }, + .select => if (l.features.has(.scalarize_select)) { + continue :inst l.replaceInst(inst, .block, try l.scalarizeBlockPayload(inst, .select)); + }, .memset, .memset_safe, .memcpy, @@ -657,7 +687,9 @@ fn legalizeBody(l: *Legalize, body_start: usize, body_len: usize) Error!void { .union_init, .prefetch => {}, .mul_add => if (l.features.has(.scalarize_mul_add)) { const pl_op = l.air_instructions.items(.data)[@intFromEnum(inst)].pl_op; - if (l.typeOf(pl_op.operand).isVector(zcu)) continue :inst try l.scalarize(inst, .pl_op_bin); + if (l.typeOf(pl_op.operand).isVector(zcu)) { + continue :inst l.replaceInst(inst, .block, try l.scalarizeBlockPayload(inst, .pl_op_bin)); + } }, .field_parent_ptr, .wasm_memory_size, @@ -675,96 +707,123 @@ fn legalizeBody(l: *Legalize, body_start: usize, body_len: usize) Error!void { .work_item_id, .work_group_size, .work_group_id, + .legalize_vec_elem_val, + .legalize_vec_store_elem, => {}, } } } -const ScalarizeForm = enum { un_op, ty_op, bin_op, pl_op_bin, cmp_vector, shuffle_one, shuffle_two, select }; -/// inline to propagate comptime-known `replaceInst` result. -inline fn scalarize(l: *Legalize, orig_inst: Air.Inst.Index, comptime form: ScalarizeForm) Error!Air.Inst.Tag { - return l.replaceInst(orig_inst, .block, try l.scalarizeBlockPayload(orig_inst, form)); -} -fn scalarizeBlockPayload(l: *Legalize, orig_inst: Air.Inst.Index, comptime form: ScalarizeForm) Error!Air.Inst.Data { +const ScalarizeForm = enum { un_op, ty_op, bin_op, pl_op_bin, cmp_vector, select }; +fn scalarizeBlockPayload(l: *Legalize, orig_inst: Air.Inst.Index, form: ScalarizeForm) Error!Air.Inst.Data { const pt = l.pt; const zcu = pt.zcu; - const gpa = zcu.gpa; const orig = l.air_instructions.get(@intFromEnum(orig_inst)); const res_ty = l.typeOfIndex(orig_inst); - const res_len = res_ty.vectorLen(zcu); + const result_is_array = switch (res_ty.zigTypeTag(zcu)) { + .vector => false, + .array => true, + else => unreachable, + }; + const res_len = res_ty.arrayLen(zcu); + const res_elem_ty = res_ty.childType(zcu); - const inst_per_elem = switch (form) { + if (result_is_array) { + // This is only allowed when legalizing an elementwise bitcast. + assert(orig.tag == .bitcast); + assert(form == .ty_op); + } + + // Our output will be a loop doing elementwise stores: + // + // %1 = block(@Vector(N, Scalar), { + // %2 = alloc(*usize) + // %3 = alloc(*@Vector(N, Scalar)) + // %4 = store(%2, @zero_usize) + // %5 = loop({ + // %6 = load(%2) + // %7 = + // %8 = legalize_vec_store_elem(%3, %5, %6) + // %9 = cmp_eq(%6, ) + // %10 = cond_br(%9, { + // %11 = load(%3) + // %12 = br(%1, %11) + // }, { + // %13 = add(%6, @one_usize) + // %14 = store(%2, %13) + // %15 = repeat(%5) + // }) + // }) + // }) + // + // If scalarizing an elementwise bitcast, the result might be an array, in which case + // `legalize_vec_store_elem` becomes two instructions (`ptr_elem_ptr` and `store`). + // Therefore, there are 13 or 14 instructions in the block, plus however many are + // needed to compute each result element for `form`. + const inst_per_form: usize = switch (form) { .un_op, .ty_op => 2, .bin_op, .cmp_vector => 3, .pl_op_bin => 4, - .shuffle_one, .shuffle_two => 1, .select => 7, }; + const max_inst_per_form = 7; // maximum value in the above switch + var inst_buf: [14 + max_inst_per_form]Air.Inst.Index = undefined; - var sfba_state = std.heap.stackFallback(@sizeOf([inst_per_elem * 32 + 2]Air.Inst.Index) + @sizeOf([32]Air.Inst.Ref), gpa); - const sfba = sfba_state.get(); + var main_block: Block = .init(&inst_buf); + try l.air_instructions.ensureUnusedCapacity(zcu.gpa, inst_buf.len); - // Plus 2 extra instructions for `aggregate_init` and `br`. - const inst_buf = try sfba.alloc(Air.Inst.Index, inst_per_elem * res_len + 2); - defer sfba.free(inst_buf); + const index_ptr = main_block.addTy(l, .alloc, .ptr_usize).toRef(); + const result_ptr = main_block.addTy(l, .alloc, try pt.singleMutPtrType(res_ty)).toRef(); - var main_block: Block = .init(inst_buf); - try l.air_instructions.ensureUnusedCapacity(gpa, inst_buf.len); + _ = main_block.addBinOp(l, .store, index_ptr, .zero_usize); - const elem_buf = try sfba.alloc(Air.Inst.Ref, res_len); - defer sfba.free(elem_buf); + var loop: Loop = .init(l, &main_block); + loop.block = .init(main_block.stealRemainingCapacity()); - switch (form) { - .un_op => { + const index_val = loop.block.addTyOp(l, .load, .usize, index_ptr).toRef(); + const elem_val: Air.Inst.Ref = switch (form) { + .un_op => elem: { const orig_operand = orig.data.un_op; - const un_op_tag = orig.tag; - for (elem_buf, 0..) |*elem, elem_idx| { - const elem_idx_ref: Air.Inst.Ref = .fromValue(try pt.intValue(.usize, elem_idx)); - const operand = main_block.addBinOp(l, .array_elem_val, orig_operand, elem_idx_ref).toRef(); - elem.* = main_block.addUnOp(l, un_op_tag, operand).toRef(); - } + const operand = loop.block.addBinOp(l, .legalize_vec_elem_val, orig_operand, index_val).toRef(); + break :elem loop.block.addUnOp(l, orig.tag, operand).toRef(); }, - .ty_op => { + .ty_op => elem: { const orig_operand = orig.data.ty_op.operand; - const orig_ty: Type = .fromInterned(orig.data.ty_op.ty.toInterned().?); - const scalar_ty = orig_ty.childType(zcu); - const ty_op_tag = orig.tag; - for (elem_buf, 0..) |*elem, elem_idx| { - const elem_idx_ref: Air.Inst.Ref = .fromValue(try pt.intValue(.usize, elem_idx)); - const operand = main_block.addBinOp(l, .array_elem_val, orig_operand, elem_idx_ref).toRef(); - elem.* = main_block.addTyOp(l, ty_op_tag, scalar_ty, operand).toRef(); - } + const operand_is_array = switch (l.typeOf(orig_operand).zigTypeTag(zcu)) { + .vector => false, + .array => true, + else => unreachable, + }; + const operand = loop.block.addBinOp( + l, + if (operand_is_array) .array_elem_val else .legalize_vec_elem_val, + orig_operand, + index_val, + ).toRef(); + break :elem loop.block.addTyOp(l, orig.tag, res_elem_ty, operand).toRef(); }, - .bin_op => { - const orig_operands = orig.data.bin_op; - const bin_op_tag = orig.tag; - for (elem_buf, 0..) |*elem, elem_idx| { - const elem_idx_ref: Air.Inst.Ref = .fromValue(try pt.intValue(.usize, elem_idx)); - const lhs = main_block.addBinOp(l, .array_elem_val, orig_operands.lhs, elem_idx_ref).toRef(); - const rhs = main_block.addBinOp(l, .array_elem_val, orig_operands.rhs, elem_idx_ref).toRef(); - elem.* = main_block.addBinOp(l, bin_op_tag, lhs, rhs).toRef(); - } + .bin_op => elem: { + const orig_bin = orig.data.bin_op; + const lhs = loop.block.addBinOp(l, .legalize_vec_elem_val, orig_bin.lhs, index_val).toRef(); + const rhs = loop.block.addBinOp(l, .legalize_vec_elem_val, orig_bin.rhs, index_val).toRef(); + break :elem loop.block.addBinOp(l, orig.tag, lhs, rhs).toRef(); }, - .pl_op_bin => { + .pl_op_bin => elem: { const orig_operand = orig.data.pl_op.operand; - const orig_payload = l.extraData(Air.Bin, orig.data.pl_op.payload).data; - const pl_op_tag = orig.tag; - for (elem_buf, 0..) |*elem, elem_idx| { - const elem_idx_ref: Air.Inst.Ref = .fromValue(try pt.intValue(.usize, elem_idx)); - const operand = main_block.addBinOp(l, .array_elem_val, orig_operand, elem_idx_ref).toRef(); - const lhs = main_block.addBinOp(l, .array_elem_val, orig_payload.lhs, elem_idx_ref).toRef(); - const rhs = main_block.addBinOp(l, .array_elem_val, orig_payload.rhs, elem_idx_ref).toRef(); - elem.* = main_block.add(l, .{ - .tag = pl_op_tag, - .data = .{ .pl_op = .{ - .payload = try l.addExtra(Air.Bin, .{ .lhs = lhs, .rhs = rhs }), - .operand = operand, - } }, - }).toRef(); - } + const orig_bin = l.extraData(Air.Bin, orig.data.pl_op.payload).data; + const operand = loop.block.addBinOp(l, .legalize_vec_elem_val, orig_operand, index_val).toRef(); + const lhs = loop.block.addBinOp(l, .legalize_vec_elem_val, orig_bin.lhs, index_val).toRef(); + const rhs = loop.block.addBinOp(l, .legalize_vec_elem_val, orig_bin.rhs, index_val).toRef(); + break :elem loop.block.add(l, .{ + .tag = orig.tag, + .data = .{ .pl_op = .{ + .operand = operand, + .payload = try l.addExtra(Air.Bin, .{ .lhs = lhs, .rhs = rhs }), + } }, + }).toRef(); }, - .cmp_vector => { + .cmp_vector => elem: { const orig_payload = l.extraData(Air.VectorCmp, orig.data.ty_pl.payload).data; const cmp_op = orig_payload.compareOperator(); const optimized = switch (orig.tag) { @@ -772,116 +831,393 @@ fn scalarizeBlockPayload(l: *Legalize, orig_inst: Air.Inst.Index, comptime form: .cmp_vector_optimized => true, else => unreachable, }; - for (elem_buf, 0..) |*elem, elem_idx| { - const elem_idx_ref: Air.Inst.Ref = .fromValue(try pt.intValue(.usize, elem_idx)); - const lhs = main_block.addBinOp(l, .array_elem_val, orig_payload.lhs, elem_idx_ref).toRef(); - const rhs = main_block.addBinOp(l, .array_elem_val, orig_payload.rhs, elem_idx_ref).toRef(); - elem.* = main_block.addCmpScalar(l, cmp_op, lhs, rhs, optimized).toRef(); - } - }, - .shuffle_one => { - const shuffle = l.getTmpAir().unwrapShuffleOne(zcu, orig_inst); - for (elem_buf, shuffle.mask) |*elem, mask| elem.* = switch (mask.unwrap()) { - .value => |val| .fromIntern(val), - .elem => |src_idx| elem: { - const src_idx_ref: Air.Inst.Ref = .fromValue(try pt.intValue(.usize, src_idx)); - break :elem main_block.addBinOp(l, .array_elem_val, shuffle.operand, src_idx_ref).toRef(); - }, - }; + const lhs = loop.block.addBinOp(l, .legalize_vec_elem_val, orig_payload.lhs, index_val).toRef(); + const rhs = loop.block.addBinOp(l, .legalize_vec_elem_val, orig_payload.rhs, index_val).toRef(); + break :elem loop.block.addCmpScalar(l, cmp_op, lhs, rhs, optimized).toRef(); }, - .shuffle_two => { - const shuffle = l.getTmpAir().unwrapShuffleTwo(zcu, orig_inst); - const scalar_ty = res_ty.childType(zcu); - for (elem_buf, shuffle.mask) |*elem, mask| elem.* = switch (mask.unwrap()) { - .undef => .fromValue(try pt.undefValue(scalar_ty)), - .a_elem => |src_idx| elem: { - const src_idx_ref: Air.Inst.Ref = .fromValue(try pt.intValue(.usize, src_idx)); - break :elem main_block.addBinOp(l, .array_elem_val, shuffle.operand_a, src_idx_ref).toRef(); - }, - .b_elem => |src_idx| elem: { - const src_idx_ref: Air.Inst.Ref = .fromValue(try pt.intValue(.usize, src_idx)); - break :elem main_block.addBinOp(l, .array_elem_val, shuffle.operand_b, src_idx_ref).toRef(); - }, - }; - }, - .select => { + .select => elem: { const orig_cond = orig.data.pl_op.operand; const orig_bin = l.extraData(Air.Bin, orig.data.pl_op.payload).data; - const res_scalar_ty = res_ty.childType(zcu); - for (elem_buf, 0..) |*elem, elem_idx| { - // Payload to be populated later; we need the index early for `br`s. - const elem_block_inst = main_block.add(l, .{ - .tag = .block, - .data = .{ .ty_pl = .{ - .ty = .fromType(res_scalar_ty), - .payload = undefined, - } }, - }); - var elem_block: Block = .init(main_block.stealCapacity(2)); - const elem_idx_ref: Air.Inst.Ref = .fromValue(try pt.intValue(.usize, elem_idx)); - const cond = elem_block.addBinOp(l, .array_elem_val, orig_cond, elem_idx_ref).toRef(); - var condbr: CondBr = .init(l, cond, &elem_block, .{}); + const elem_block_inst = loop.block.add(l, .{ + .tag = .block, + .data = .{ .ty_pl = .{ + .ty = .fromType(res_elem_ty), + .payload = undefined, + } }, + }); + var elem_block: Block = .init(loop.block.stealCapacity(2)); + const cond = elem_block.addBinOp(l, .legalize_vec_elem_val, orig_cond, index_val).toRef(); + + var condbr: CondBr = .init(l, cond, &elem_block, .{}); - condbr.then_block = .init(main_block.stealCapacity(2)); - const lhs = condbr.then_block.addBinOp(l, .array_elem_val, orig_bin.lhs, elem_idx_ref).toRef(); - condbr.then_block.addBr(l, elem_block_inst, lhs); + condbr.then_block = .init(loop.block.stealCapacity(2)); + const lhs = condbr.then_block.addBinOp(l, .legalize_vec_elem_val, orig_bin.lhs, index_val).toRef(); + condbr.then_block.addBr(l, elem_block_inst, lhs); - condbr.else_block = .init(main_block.stealCapacity(2)); - const rhs = condbr.else_block.addBinOp(l, .array_elem_val, orig_bin.rhs, elem_idx_ref).toRef(); - condbr.else_block.addBr(l, elem_block_inst, rhs); + condbr.else_block = .init(loop.block.stealCapacity(2)); + const rhs = condbr.else_block.addBinOp(l, .legalize_vec_elem_val, orig_bin.rhs, index_val).toRef(); + condbr.else_block.addBr(l, elem_block_inst, rhs); - try condbr.finish(l); + try condbr.finish(l); - const inst_data = l.air_instructions.items(.data); - inst_data[@intFromEnum(elem_block_inst)].ty_pl.payload = try l.addBlockBody(elem_block.body()); + const inst_data = l.air_instructions.items(.data); + inst_data[@intFromEnum(elem_block_inst)].ty_pl.payload = try l.addBlockBody(elem_block.body()); - elem.* = elem_block_inst.toRef(); - } + break :elem elem_block_inst.toRef(); }, + }; + _ = loop.block.stealCapacity(max_inst_per_form - inst_per_form); + if (result_is_array) { + const elem_ptr = loop.block.add(l, .{ + .tag = .ptr_elem_ptr, + .data = .{ .ty_pl = .{ + .ty = .fromType(try pt.singleMutPtrType(res_elem_ty)), + .payload = try l.addExtra(Air.Bin, .{ + .lhs = result_ptr, + .rhs = index_val, + }), + } }, + }).toRef(); + _ = loop.block.addBinOp(l, .store, elem_ptr, elem_val); + } else { + _ = loop.block.add(l, .{ + .tag = .legalize_vec_store_elem, + .data = .{ .pl_op = .{ + .operand = result_ptr, + .payload = try l.addExtra(Air.Bin, .{ + .lhs = index_val, + .rhs = elem_val, + }), + } }, + }); + _ = loop.block.stealCapacity(1); } + const is_end_val = loop.block.addBinOp(l, .cmp_eq, index_val, .fromValue(try pt.intValue(.usize, res_len - 1))).toRef(); - const result = main_block.add(l, .{ - .tag = .aggregate_init, - .data = .{ .ty_pl = .{ - .ty = .fromType(res_ty), - .payload = payload: { - const idx = l.air_extra.items.len; - try l.air_extra.appendSlice(gpa, @ptrCast(elem_buf)); - break :payload @intCast(idx); - }, - } }, - }).toRef(); + var condbr: CondBr = .init(l, is_end_val, &loop.block, .{}); + condbr.then_block = .init(loop.block.stealRemainingCapacity()); + const result_val = condbr.then_block.addTyOp(l, .load, res_ty, result_ptr).toRef(); + condbr.then_block.addBr(l, orig_inst, result_val); - main_block.addBr(l, orig_inst, result); + condbr.else_block = .init(condbr.then_block.stealRemainingCapacity()); + const new_index_val = condbr.else_block.addBinOp(l, .add, index_val, .one_usize).toRef(); + _ = condbr.else_block.addBinOp(l, .store, index_ptr, new_index_val); + _ = condbr.else_block.add(l, .{ + .tag = .repeat, + .data = .{ .repeat = .{ .loop_inst = loop.inst } }, + }); - // Some `form` values may intentionally not use the full instruction buffer. - switch (form) { - .un_op, - .ty_op, - .bin_op, - .pl_op_bin, - .cmp_vector, - .select, - => {}, - .shuffle_one, - .shuffle_two, - => _ = main_block.stealRemainingCapacity(), - } + try condbr.finish(l); + + try loop.finish(l); return .{ .ty_pl = .{ .ty = .fromType(res_ty), .payload = try l.addBlockBody(main_block.body()), } }; } -fn scalarizeBitcastBlockPayload(l: *Legalize, orig_inst: Air.Inst.Index) Error!?Air.Inst.Data { +fn scalarizeShuffleOneBlockPayload(l: *Legalize, orig_inst: Air.Inst.Index) Error!Air.Inst.Data { const pt = l.pt; const zcu = pt.zcu; const gpa = zcu.gpa; + const shuffle = l.getTmpAir().unwrapShuffleOne(zcu, orig_inst); + + // We're going to emit something like this: + // + // var x: @Vector(N, T) = all_comptime_known_elems; + // for (out_idxs, in_idxs) |i, j| x[i] = operand[j]; + // + // So we must first compute `out_idxs` and `in_idxs`. + var sfba_state = std.heap.stackFallback(512, gpa); const sfba = sfba_state.get(); + const out_idxs_buf = try sfba.alloc(InternPool.Index, shuffle.mask.len); + defer sfba.free(out_idxs_buf); + + const in_idxs_buf = try sfba.alloc(InternPool.Index, shuffle.mask.len); + defer sfba.free(in_idxs_buf); + + var n: usize = 0; + for (shuffle.mask, 0..) |mask, out_idx| switch (mask.unwrap()) { + .value => {}, + .elem => |in_idx| { + out_idxs_buf[n] = (try pt.intValue(.usize, out_idx)).toIntern(); + in_idxs_buf[n] = (try pt.intValue(.usize, in_idx)).toIntern(); + n += 1; + }, + }; + + const init_val: Value = init: { + const undef_val = try pt.undefValue(shuffle.result_ty.childType(zcu)); + const elems = try sfba.alloc(InternPool.Index, shuffle.mask.len); + defer sfba.free(elems); + for (shuffle.mask, elems) |mask, *elem| elem.* = switch (mask.unwrap()) { + .value => |ip_index| ip_index, + .elem => undef_val.toIntern(), + }; + break :init try pt.aggregateValue(shuffle.result_ty, elems); + }; + + // %1 = block(@Vector(N, T), { + // %2 = alloc(*@Vector(N, T)) + // %3 = alloc(*usize) + // %4 = store(%2, ) + // %5 = [addScalarizedShuffle] + // %6 = load(%2) + // %7 = br(%1, %6) + // }) + + var inst_buf: [6]Air.Inst.Index = undefined; + var main_block: Block = .init(&inst_buf); + try l.air_instructions.ensureUnusedCapacity(gpa, 19); + + const result_ptr = main_block.addTy(l, .alloc, try pt.singleMutPtrType(shuffle.result_ty)).toRef(); + const index_ptr = main_block.addTy(l, .alloc, .ptr_usize).toRef(); + + _ = main_block.addBinOp(l, .store, result_ptr, .fromValue(init_val)); + + try l.addScalarizedShuffle( + &main_block, + shuffle.operand, + result_ptr, + index_ptr, + out_idxs_buf[0..n], + in_idxs_buf[0..n], + ); + + const result_val = main_block.addTyOp(l, .load, shuffle.result_ty, result_ptr).toRef(); + main_block.addBr(l, orig_inst, result_val); + + return .{ .ty_pl = .{ + .ty = .fromType(shuffle.result_ty), + .payload = try l.addBlockBody(main_block.body()), + } }; +} +fn scalarizeShuffleTwoBlockPayload(l: *Legalize, orig_inst: Air.Inst.Index) Error!Air.Inst.Data { + const pt = l.pt; + const zcu = pt.zcu; + const gpa = zcu.gpa; + + const shuffle = l.getTmpAir().unwrapShuffleTwo(zcu, orig_inst); + + // We're going to emit something like this: + // + // var x: @Vector(N, T) = undefined; + // for (out_idxs_a, in_idxs_a) |i, j| x[i] = operand_a[j]; + // for (out_idxs_b, in_idxs_b) |i, j| x[i] = operand_b[j]; + // + // The AIR will look like this: + // + // %1 = block(@Vector(N, T), { + // %2 = alloc(*@Vector(N, T)) + // %3 = alloc(*usize) + // %4 = store(%2, <@Vector(N, T), undefined>) + // %5 = [addScalarizedShuffle] + // %6 = [addScalarizedShuffle] + // %7 = load(%2) + // %8 = br(%1, %7) + // }) + + var sfba_state = std.heap.stackFallback(512, gpa); + const sfba = sfba_state.get(); + + const out_idxs_buf = try sfba.alloc(InternPool.Index, shuffle.mask.len); + defer sfba.free(out_idxs_buf); + + const in_idxs_buf = try sfba.alloc(InternPool.Index, shuffle.mask.len); + defer sfba.free(in_idxs_buf); + + // Iterate `shuffle.mask` before doing anything, because modifying AIR invalidates it. + const out_idxs_a, const in_idxs_a, const out_idxs_b, const in_idxs_b = idxs: { + var n: usize = 0; + for (shuffle.mask, 0..) |mask, out_idx| switch (mask.unwrap()) { + .undef, .b_elem => {}, + .a_elem => |in_idx| { + out_idxs_buf[n] = (try pt.intValue(.usize, out_idx)).toIntern(); + in_idxs_buf[n] = (try pt.intValue(.usize, in_idx)).toIntern(); + n += 1; + }, + }; + const a_len = n; + for (shuffle.mask, 0..) |mask, out_idx| switch (mask.unwrap()) { + .undef, .a_elem => {}, + .b_elem => |in_idx| { + out_idxs_buf[n] = (try pt.intValue(.usize, out_idx)).toIntern(); + in_idxs_buf[n] = (try pt.intValue(.usize, in_idx)).toIntern(); + n += 1; + }, + }; + break :idxs .{ + out_idxs_buf[0..a_len], + in_idxs_buf[0..a_len], + out_idxs_buf[a_len..n], + in_idxs_buf[a_len..n], + }; + }; + + var inst_buf: [7]Air.Inst.Index = undefined; + var main_block: Block = .init(&inst_buf); + try l.air_instructions.ensureUnusedCapacity(gpa, 33); + + const result_ptr = main_block.addTy(l, .alloc, try pt.singleMutPtrType(shuffle.result_ty)).toRef(); + const index_ptr = main_block.addTy(l, .alloc, .ptr_usize).toRef(); + + _ = main_block.addBinOp(l, .store, result_ptr, .fromValue(try pt.undefValue(shuffle.result_ty))); + + if (out_idxs_a.len == 0) { + _ = main_block.stealCapacity(1); + } else { + try l.addScalarizedShuffle( + &main_block, + shuffle.operand_a, + result_ptr, + index_ptr, + out_idxs_a, + in_idxs_a, + ); + } + + if (out_idxs_b.len == 0) { + _ = main_block.stealCapacity(1); + } else { + try l.addScalarizedShuffle( + &main_block, + shuffle.operand_b, + result_ptr, + index_ptr, + out_idxs_b, + in_idxs_b, + ); + } + + const result_val = main_block.addTyOp(l, .load, shuffle.result_ty, result_ptr).toRef(); + main_block.addBr(l, orig_inst, result_val); + + return .{ .ty_pl = .{ + .ty = .fromType(shuffle.result_ty), + .payload = try l.addBlockBody(main_block.body()), + } }; +} +/// Adds code to `parent_block` which behaves like this loop: +/// +/// for (out_idxs, in_idxs) |i, j| result_vec_ptr[i] = operand_vec[j]; +/// +/// The actual AIR adds exactly one instruction to `parent_block` itself and 14 instructions +/// overall, and is as follows: +/// +/// %1 = block(void, { +/// %2 = store(index_ptr, @zero_usize) +/// %3 = loop({ +/// %4 = load(index_ptr) +/// %5 = ptr_elem_val(out_idxs_ptr, %4) +/// %6 = ptr_elem_val(in_idxs_ptr, %4) +/// %7 = legalize_vec_elem_val(operand_vec, %6) +/// %8 = legalize_vec_store_elem(result_vec_ptr, %4, %7) +/// %9 = cmp_eq(%4, ) +/// %10 = cond_br(%9, { +/// %11 = br(%1, @void_value) +/// }, { +/// %12 = add(%4, @one_usize) +/// %13 = store(index_ptr, %12) +/// %14 = repeat(%3) +/// }) +/// }) +/// }) +/// +/// The caller is responsible for reserving space in `l.air_instructions`. +fn addScalarizedShuffle( + l: *Legalize, + parent_block: *Block, + operand_vec: Air.Inst.Ref, + result_vec_ptr: Air.Inst.Ref, + index_ptr: Air.Inst.Ref, + out_idxs: []const InternPool.Index, + in_idxs: []const InternPool.Index, +) Error!void { + const pt = l.pt; + + assert(out_idxs.len == in_idxs.len); + const n = out_idxs.len; + + const idxs_ty = try pt.arrayType(.{ .len = n, .child = .usize_type }); + const idxs_ptr_ty = try pt.singleConstPtrType(idxs_ty); + const manyptr_usize_ty = try pt.manyConstPtrType(.usize); + + const out_idxs_ptr = try pt.intern(.{ .ptr = .{ + .ty = manyptr_usize_ty.toIntern(), + .base_addr = .{ .uav = .{ + .val = (try pt.aggregateValue(idxs_ty, out_idxs)).toIntern(), + .orig_ty = idxs_ptr_ty.toIntern(), + } }, + .byte_offset = 0, + } }); + const in_idxs_ptr = try pt.intern(.{ .ptr = .{ + .ty = manyptr_usize_ty.toIntern(), + .base_addr = .{ .uav = .{ + .val = (try pt.aggregateValue(idxs_ty, in_idxs)).toIntern(), + .orig_ty = idxs_ptr_ty.toIntern(), + } }, + .byte_offset = 0, + } }); + + const main_block_inst = parent_block.add(l, .{ + .tag = .block, + .data = .{ .ty_pl = .{ + .ty = .void_type, + .payload = undefined, + } }, + }); + + var inst_buf: [13]Air.Inst.Index = undefined; + var main_block: Block = .init(&inst_buf); + + _ = main_block.addBinOp(l, .store, index_ptr, .zero_usize); + + var loop: Loop = .init(l, &main_block); + loop.block = .init(main_block.stealRemainingCapacity()); + + const index_val = loop.block.addTyOp(l, .load, .usize, index_ptr).toRef(); + const in_idx_val = loop.block.addBinOp(l, .ptr_elem_val, .fromIntern(in_idxs_ptr), index_val).toRef(); + const out_idx_val = loop.block.addBinOp(l, .ptr_elem_val, .fromIntern(out_idxs_ptr), index_val).toRef(); + + const elem_val = loop.block.addBinOp(l, .legalize_vec_elem_val, operand_vec, in_idx_val).toRef(); + _ = loop.block.add(l, .{ + .tag = .legalize_vec_store_elem, + .data = .{ .pl_op = .{ + .operand = result_vec_ptr, + .payload = try l.addExtra(Air.Bin, .{ + .lhs = out_idx_val, + .rhs = elem_val, + }), + } }, + }); + + const is_end_val = loop.block.addBinOp(l, .cmp_eq, index_val, .fromValue(try pt.intValue(.usize, n - 1))).toRef(); + var condbr: CondBr = .init(l, is_end_val, &loop.block, .{}); + condbr.then_block = .init(loop.block.stealRemainingCapacity()); + condbr.then_block.addBr(l, main_block_inst, .void_value); + + condbr.else_block = .init(condbr.then_block.stealRemainingCapacity()); + const new_index_val = condbr.else_block.addBinOp(l, .add, index_val, .one_usize).toRef(); + _ = condbr.else_block.addBinOp(l, .store, index_ptr, new_index_val); + _ = condbr.else_block.add(l, .{ + .tag = .repeat, + .data = .{ .repeat = .{ .loop_inst = loop.inst } }, + }); + + try condbr.finish(l); + try loop.finish(l); + + const inst_data = l.air_instructions.items(.data); + inst_data[@intFromEnum(main_block_inst)].ty_pl.payload = try l.addBlockBody(main_block.body()); +} +fn scalarizeBitcastBlockPayload(l: *Legalize, orig_inst: Air.Inst.Index) Error!?Air.Inst.Data { + const pt = l.pt; + const zcu = pt.zcu; + const ty_op = l.air_instructions.items(.data)[@intFromEnum(orig_inst)].ty_op; const dest_ty = ty_op.ty.toType(); @@ -920,72 +1256,204 @@ fn scalarizeBitcastBlockPayload(l: *Legalize, orig_inst: Air.Inst.Index) Error!? const uint_ty = try pt.intType(.unsigned, num_bits); const shift_ty = try pt.intType(.unsigned, std.math.log2_int_ceil(u16, num_bits)); - const inst_buf = try sfba.alloc(Air.Inst.Index, len: { - const operand_to_uint_len: u64 = if (operand_legal) 1 else (operand_ty.arrayLen(zcu) * 5); - const uint_to_dest_len: u64 = if (dest_legal) 1 else (dest_ty.arrayLen(zcu) * 3 + 1); - break :len @intCast(operand_to_uint_len + uint_to_dest_len + 1); - }); - defer sfba.free(inst_buf); - var main_block: Block = .init(inst_buf); - try l.air_instructions.ensureUnusedCapacity(gpa, inst_buf.len); + var inst_buf: [39]Air.Inst.Index = undefined; + var main_block: Block = .init(&inst_buf); + try l.air_instructions.ensureUnusedCapacity(zcu.gpa, inst_buf.len); // First, convert `operand_ty` to `uint_ty` (`uN`). const uint_val: Air.Inst.Ref = uint_val: { - if (operand_legal) break :uint_val main_block.addBitCast(l, uint_ty, ty_op.operand); - - const bits_per_elem: u16 = @intCast(operand_ty.childType(zcu).bitSize(zcu)); - const bits_per_elem_ref: Air.Inst.Ref = .fromValue(try pt.intValue(shift_ty, bits_per_elem)); - const elem_uint_ty = try pt.intType(.unsigned, bits_per_elem); - - var cur_uint: Air.Inst.Ref = .fromValue(try pt.intValue(uint_ty, 0)); - var elem_idx = operand_ty.arrayLen(zcu); - while (elem_idx > 0) { - elem_idx -= 1; - const elem_idx_ref: Air.Inst.Ref = .fromValue(try pt.intValue(.usize, elem_idx)); - const orig_elem = main_block.addBinOp(l, .array_elem_val, ty_op.operand, elem_idx_ref).toRef(); - const elem_as_uint = main_block.addBitCast(l, elem_uint_ty, orig_elem); - const elem_extended = main_block.addTyOp(l, .intcast, uint_ty, elem_as_uint).toRef(); - cur_uint = main_block.addBinOp(l, .shl_exact, cur_uint, bits_per_elem_ref).toRef(); - cur_uint = main_block.addBinOp(l, .bit_or, cur_uint, elem_extended).toRef(); + if (operand_legal) { + _ = main_block.stealCapacity(19); + break :uint_val main_block.addBitCast(l, uint_ty, ty_op.operand); } - break :uint_val cur_uint; + + // %1 = block({ + // %2 = alloc(*usize) + // %3 = alloc(*uN) + // %4 = store(%2, ) + // %5 = store(%3, ) + // %6 = loop({ + // %7 = load(%2) + // %8 = array_elem_val(orig_operand, %7) + // %9 = bitcast(uE, %8) + // %10 = intcast(uN, %9) + // %11 = load(%3) + // %12 = shl_exact(%11, ) + // %13 = bit_or(%12, %10) + // %14 = cmp_eq(%4, @zero_usize) + // %15 = cond_br(%14, { + // %16 = br(%1, %13) + // }, { + // %17 = store(%3, %13) + // %18 = sub(%7, @one_usize) + // %19 = store(%2, %18) + // %20 = repeat(%6) + // }) + // }) + // }) + + const elem_bits = operand_ty.childType(zcu).bitSize(zcu); + const elem_bits_val = try pt.intValue(shift_ty, elem_bits); + const elem_uint_ty = try pt.intType(.unsigned, @intCast(elem_bits)); + + const uint_block_inst = main_block.add(l, .{ + .tag = .block, + .data = .{ .ty_pl = .{ + .ty = .fromType(uint_ty), + .payload = undefined, + } }, + }); + var uint_block: Block = .init(main_block.stealCapacity(19)); + + const index_ptr = uint_block.addTy(l, .alloc, .ptr_usize).toRef(); + const result_ptr = uint_block.addTy(l, .alloc, try pt.singleMutPtrType(uint_ty)).toRef(); + _ = uint_block.addBinOp( + l, + .store, + index_ptr, + .fromValue(try pt.intValue(.usize, operand_ty.arrayLen(zcu))), + ); + _ = uint_block.addBinOp(l, .store, result_ptr, .fromValue(try pt.intValue(uint_ty, 0))); + + var loop: Loop = .init(l, &uint_block); + loop.block = .init(uint_block.stealRemainingCapacity()); + + const index_val = loop.block.addTyOp(l, .load, .usize, index_ptr).toRef(); + const raw_elem = loop.block.addBinOp( + l, + if (operand_ty.zigTypeTag(zcu) == .vector) .legalize_vec_elem_val else .array_elem_val, + ty_op.operand, + index_val, + ).toRef(); + const elem_uint = loop.block.addBitCast(l, elem_uint_ty, raw_elem); + const elem_extended = loop.block.addTyOp(l, .intcast, uint_ty, elem_uint).toRef(); + const old_result = loop.block.addTyOp(l, .load, uint_ty, result_ptr).toRef(); + const shifted_result = loop.block.addBinOp(l, .shl_exact, old_result, .fromValue(elem_bits_val)).toRef(); + const new_result = loop.block.addBinOp(l, .bit_or, shifted_result, elem_extended).toRef(); + + const is_end_val = loop.block.addBinOp(l, .cmp_eq, index_val, .zero_usize).toRef(); + var condbr: CondBr = .init(l, is_end_val, &loop.block, .{}); + + condbr.then_block = .init(loop.block.stealRemainingCapacity()); + condbr.then_block.addBr(l, uint_block_inst, new_result); + + condbr.else_block = .init(condbr.then_block.stealRemainingCapacity()); + _ = condbr.else_block.addBinOp(l, .store, result_ptr, new_result); + const new_index_val = condbr.else_block.addBinOp(l, .sub, index_val, .one_usize).toRef(); + _ = condbr.else_block.addBinOp(l, .store, index_ptr, new_index_val); + _ = condbr.else_block.add(l, .{ + .tag = .repeat, + .data = .{ .repeat = .{ .loop_inst = loop.inst } }, + }); + + try condbr.finish(l); + try loop.finish(l); + + const inst_data = l.air_instructions.items(.data); + inst_data[@intFromEnum(uint_block_inst)].ty_pl.payload = try l.addBlockBody(uint_block.body()); + + break :uint_val uint_block_inst.toRef(); }; // Now convert `uint_ty` (`uN`) to `dest_ty`. - const result: Air.Inst.Ref = result: { - if (dest_legal) break :result main_block.addBitCast(l, dest_ty, uint_val); + if (dest_legal) { + _ = main_block.stealCapacity(17); + const result = main_block.addBitCast(l, dest_ty, uint_val); + main_block.addBr(l, orig_inst, result); + } else { + // %1 = alloc(*usize) + // %2 = alloc(*@Vector(N, Result)) + // %3 = store(%1, @zero_usize) + // %4 = loop({ + // %5 = load(%1) + // %6 = mul(%5, ) + // %7 = intcast(uS, %6) + // %8 = shr(uint_val, %7) + // %9 = trunc(uE, %8) + // %10 = bitcast(Result, %9) + // %11 = legalize_vec_store_elem(%2, %5, %10) + // %12 = cmp_eq(%5, ) + // %13 = cond_br(%12, { + // %14 = load(%2) + // %15 = br(%0, %14) + // }, { + // %16 = add(%5, @one_usize) + // %17 = store(%1, %16) + // %18 = repeat(%4) + // }) + // }) + // + // The result might be an array, in which case `legalize_vec_store_elem` + // becomes `ptr_elem_ptr` followed by `store`. const elem_ty = dest_ty.childType(zcu); - const bits_per_elem: u16 = @intCast(elem_ty.bitSize(zcu)); - const bits_per_elem_ref: Air.Inst.Ref = .fromValue(try pt.intValue(shift_ty, bits_per_elem)); - const elem_uint_ty = try pt.intType(.unsigned, bits_per_elem); - - const elem_buf = try sfba.alloc(Air.Inst.Ref, dest_ty.arrayLen(zcu)); - defer sfba.free(elem_buf); - - var cur_uint = uint_val; - for (elem_buf) |*elem| { - const elem_as_uint = main_block.addTyOp(l, .trunc, elem_uint_ty, cur_uint).toRef(); - elem.* = main_block.addBitCast(l, elem_ty, elem_as_uint); - cur_uint = main_block.addBinOp(l, .shr, cur_uint, bits_per_elem_ref).toRef(); + const elem_bits = elem_ty.bitSize(zcu); + const elem_uint_ty = try pt.intType(.unsigned, @intCast(elem_bits)); + + const index_ptr = main_block.addTy(l, .alloc, .ptr_usize).toRef(); + const result_ptr = main_block.addTy(l, .alloc, try pt.singleMutPtrType(dest_ty)).toRef(); + _ = main_block.addBinOp(l, .store, index_ptr, .zero_usize); + + var loop: Loop = .init(l, &main_block); + loop.block = .init(main_block.stealRemainingCapacity()); + + const index_val = loop.block.addTyOp(l, .load, .usize, index_ptr).toRef(); + const bit_offset = loop.block.addBinOp(l, .mul, index_val, .fromValue(try pt.intValue(.usize, elem_bits))).toRef(); + const casted_bit_offset = loop.block.addTyOp(l, .intcast, shift_ty, bit_offset).toRef(); + const shifted_uint = loop.block.addBinOp(l, .shr, index_val, casted_bit_offset).toRef(); + const elem_uint = loop.block.addTyOp(l, .trunc, elem_uint_ty, shifted_uint).toRef(); + const elem_val = loop.block.addBitCast(l, elem_ty, elem_uint); + switch (dest_ty.zigTypeTag(zcu)) { + .array => { + const elem_ptr = loop.block.add(l, .{ + .tag = .ptr_elem_ptr, + .data = .{ .ty_pl = .{ + .ty = .fromType(try pt.singleMutPtrType(elem_ty)), + .payload = try l.addExtra(Air.Bin, .{ + .lhs = result_ptr, + .rhs = index_val, + }), + } }, + }).toRef(); + _ = loop.block.addBinOp(l, .store, elem_ptr, elem_val); + }, + .vector => { + _ = loop.block.add(l, .{ + .tag = .legalize_vec_store_elem, + .data = .{ .pl_op = .{ + .operand = result_ptr, + .payload = try l.addExtra(Air.Bin, .{ + .lhs = index_val, + .rhs = elem_val, + }), + } }, + }); + _ = loop.block.stealCapacity(1); + }, + else => unreachable, } - break :result main_block.add(l, .{ - .tag = .aggregate_init, - .data = .{ .ty_pl = .{ - .ty = .fromType(dest_ty), - .payload = payload: { - const idx = l.air_extra.items.len; - try l.air_extra.appendSlice(gpa, @ptrCast(elem_buf)); - break :payload @intCast(idx); - }, - } }, - }).toRef(); - }; + const is_end_val = loop.block.addBinOp(l, .cmp_eq, index_val, .fromValue(try pt.intValue(.usize, dest_ty.arrayLen(zcu) - 1))).toRef(); - main_block.addBr(l, orig_inst, result); + var condbr: CondBr = .init(l, is_end_val, &loop.block, .{}); + + condbr.then_block = .init(loop.block.stealRemainingCapacity()); + const result_val = condbr.then_block.addTyOp(l, .load, dest_ty, result_ptr).toRef(); + condbr.then_block.addBr(l, orig_inst, result_val); + + condbr.else_block = .init(condbr.then_block.stealRemainingCapacity()); + const new_index_val = condbr.else_block.addBinOp(l, .add, index_val, .one_usize).toRef(); + _ = condbr.else_block.addBinOp(l, .store, index_ptr, new_index_val); + _ = condbr.else_block.add(l, .{ + .tag = .repeat, + .data = .{ .repeat = .{ .loop_inst = loop.inst } }, + }); + + try condbr.finish(l); + try loop.finish(l); + } return .{ .ty_pl = .{ .ty = .fromType(dest_ty), @@ -995,10 +1463,6 @@ fn scalarizeBitcastBlockPayload(l: *Legalize, orig_inst: Air.Inst.Index) Error!? fn scalarizeOverflowBlockPayload(l: *Legalize, orig_inst: Air.Inst.Index) Error!Air.Inst.Data { const pt = l.pt; const zcu = pt.zcu; - const gpa = zcu.gpa; - - var sfba_state = std.heap.stackFallback(512, gpa); - const sfba = sfba_state.get(); const orig = l.air_instructions.get(@intFromEnum(orig_inst)); const orig_operands = l.extraData(Air.Bin, orig.data.ty_pl.payload).data; @@ -1015,89 +1479,127 @@ fn scalarizeOverflowBlockPayload(l: *Legalize, orig_inst: Air.Inst.Index) Error! const scalar_int_ty = vec_int_ty.childType(zcu); const scalar_tuple_ty = try pt.overflowArithmeticTupleType(scalar_int_ty); - const elems_len = vec_int_ty.vectorLen(zcu); - - const inst_buf = try sfba.alloc(Air.Inst.Index, 5 * elems_len + 4); - defer sfba.free(inst_buf); + // %1 = block(struct { @Vector(N, Int), @Vector(N, u1) }, { + // %2 = alloc(*usize) + // %3 = alloc(*struct { @Vector(N, Int), @Vector(N, u1) }) + // %4 = struct_field_ptr_index_0(*@Vector(N, Int), %3) + // %5 = struct_field_ptr_index_1(*@Vector(N, u1), %3) + // %6 = store(%2, @zero_usize) + // %7 = loop({ + // %8 = load(%2) + // %9 = legalize_vec_elem_val(orig_lhs, %8) + // %10 = legalize_vec_elem_val(orig_rhs, %8) + // %11 = ???_with_overflow(struct { Int, u1 }, %9, %10) + // %12 = struct_field_val(%11, 0) + // %13 = struct_field_val(%11, 1) + // %14 = legalize_vec_store_elem(%4, %8, %12) + // %15 = legalize_vec_store_elem(%4, %8, %13) + // %16 = cmp_eq(%8, ) + // %17 = cond_br(%16, { + // %18 = load(%3) + // %19 = br(%1, %18) + // }, { + // %20 = add(%8, @one_usize) + // %21 = store(%2, %20) + // %22 = repeat(%7) + // }) + // }) + // }) - var main_block: Block = .init(inst_buf); - try l.air_instructions.ensureUnusedCapacity(gpa, inst_buf.len); + const elems_len = vec_int_ty.vectorLen(zcu); - const int_elem_buf = try sfba.alloc(Air.Inst.Ref, elems_len); - defer sfba.free(int_elem_buf); - const overflow_elem_buf = try sfba.alloc(Air.Inst.Ref, elems_len); - defer sfba.free(overflow_elem_buf); - - for (int_elem_buf, overflow_elem_buf, 0..) |*int_elem, *overflow_elem, elem_idx| { - const elem_idx_ref: Air.Inst.Ref = .fromValue(try pt.intValue(.usize, elem_idx)); - const lhs = main_block.addBinOp(l, .array_elem_val, orig_operands.lhs, elem_idx_ref).toRef(); - const rhs = main_block.addBinOp(l, .array_elem_val, orig_operands.rhs, elem_idx_ref).toRef(); - const elem_result = main_block.add(l, .{ - .tag = orig.tag, - .data = .{ .ty_pl = .{ - .ty = .fromType(scalar_tuple_ty), - .payload = try l.addExtra(Air.Bin, .{ .lhs = lhs, .rhs = rhs }), - } }, - }).toRef(); - int_elem.* = main_block.add(l, .{ - .tag = .struct_field_val, - .data = .{ .ty_pl = .{ - .ty = .fromType(scalar_int_ty), - .payload = try l.addExtra(Air.StructField, .{ - .struct_operand = elem_result, - .field_index = 0, - }), - } }, - }).toRef(); - overflow_elem.* = main_block.add(l, .{ - .tag = .struct_field_val, - .data = .{ .ty_pl = .{ - .ty = .bool_type, - .payload = try l.addExtra(Air.StructField, .{ - .struct_operand = elem_result, - .field_index = 1, - }), - } }, - }).toRef(); - } + var inst_buf: [21]Air.Inst.Index = undefined; + var main_block: Block = .init(&inst_buf); + try l.air_instructions.ensureUnusedCapacity(zcu.gpa, inst_buf.len); - const int_vec = main_block.add(l, .{ - .tag = .aggregate_init, + const index_ptr = main_block.addTy(l, .alloc, .ptr_usize).toRef(); + const result_ptr = main_block.addTy(l, .alloc, try pt.singleMutPtrType(vec_tuple_ty)).toRef(); + const result_int_ptr = main_block.addTyOp( + l, + .struct_field_ptr_index_0, + try pt.singleMutPtrType(vec_int_ty), + result_ptr, + ).toRef(); + const result_overflow_ptr = main_block.addTyOp( + l, + .struct_field_ptr_index_1, + try pt.singleMutPtrType(vec_overflow_ty), + result_ptr, + ).toRef(); + + _ = main_block.addBinOp(l, .store, index_ptr, .zero_usize); + + var loop: Loop = .init(l, &main_block); + loop.block = .init(main_block.stealRemainingCapacity()); + + const index_val = loop.block.addTyOp(l, .load, .usize, index_ptr).toRef(); + const lhs = loop.block.addBinOp(l, .legalize_vec_elem_val, orig_operands.lhs, index_val).toRef(); + const rhs = loop.block.addBinOp(l, .legalize_vec_elem_val, orig_operands.rhs, index_val).toRef(); + const elem_result = loop.block.add(l, .{ + .tag = orig.tag, .data = .{ .ty_pl = .{ - .ty = .fromType(vec_int_ty), - .payload = payload: { - const idx = l.air_extra.items.len; - try l.air_extra.appendSlice(gpa, @ptrCast(int_elem_buf)); - break :payload @intCast(idx); - }, + .ty = .fromType(scalar_tuple_ty), + .payload = try l.addExtra(Air.Bin, .{ .lhs = lhs, .rhs = rhs }), } }, }).toRef(); - const overflow_vec = main_block.add(l, .{ - .tag = .aggregate_init, + const int_elem = loop.block.add(l, .{ + .tag = .struct_field_val, .data = .{ .ty_pl = .{ - .ty = .fromType(vec_overflow_ty), - .payload = payload: { - const idx = l.air_extra.items.len; - try l.air_extra.appendSlice(gpa, @ptrCast(overflow_elem_buf)); - break :payload @intCast(idx); - }, + .ty = .fromType(scalar_int_ty), + .payload = try l.addExtra(Air.StructField, .{ + .struct_operand = elem_result, + .field_index = 0, + }), } }, }).toRef(); - - const tuple_elems: [2]Air.Inst.Ref = .{ int_vec, overflow_vec }; - const result = main_block.add(l, .{ - .tag = .aggregate_init, + const overflow_elem = loop.block.add(l, .{ + .tag = .struct_field_val, .data = .{ .ty_pl = .{ - .ty = .fromType(vec_tuple_ty), - .payload = payload: { - const idx = l.air_extra.items.len; - try l.air_extra.appendSlice(gpa, @ptrCast(&tuple_elems)); - break :payload @intCast(idx); - }, + .ty = .u1_type, + .payload = try l.addExtra(Air.StructField, .{ + .struct_operand = elem_result, + .field_index = 1, + }), } }, }).toRef(); + _ = loop.block.add(l, .{ + .tag = .legalize_vec_store_elem, + .data = .{ .pl_op = .{ + .operand = result_int_ptr, + .payload = try l.addExtra(Air.Bin, .{ + .lhs = index_val, + .rhs = int_elem, + }), + } }, + }); + _ = loop.block.add(l, .{ + .tag = .legalize_vec_store_elem, + .data = .{ .pl_op = .{ + .operand = result_overflow_ptr, + .payload = try l.addExtra(Air.Bin, .{ + .lhs = index_val, + .rhs = overflow_elem, + }), + } }, + }); - main_block.addBr(l, orig_inst, result); + const is_end_val = loop.block.addBinOp(l, .cmp_eq, index_val, .fromValue(try pt.intValue(.usize, elems_len - 1))).toRef(); + var condbr: CondBr = .init(l, is_end_val, &loop.block, .{}); + + condbr.then_block = .init(loop.block.stealRemainingCapacity()); + const result_val = condbr.then_block.addTyOp(l, .load, vec_tuple_ty, result_ptr).toRef(); + condbr.then_block.addBr(l, orig_inst, result_val); + + condbr.else_block = .init(condbr.then_block.stealRemainingCapacity()); + const new_index_val = condbr.else_block.addBinOp(l, .add, index_val, .one_usize).toRef(); + _ = condbr.else_block.addBinOp(l, .store, index_ptr, new_index_val); + _ = condbr.else_block.add(l, .{ + .tag = .repeat, + .data = .{ .repeat = .{ .loop_inst = loop.inst } }, + }); + + try condbr.finish(l); + try loop.finish(l); return .{ .ty_pl = .{ .ty = .fromType(vec_tuple_ty), @@ -1288,7 +1790,7 @@ fn safeIntFromFloatBlockPayload(l: *Legalize, orig_inst: Air.Inst.Index, optimiz // We emit 9 instructions in the worst case. var inst_buf: [9]Air.Inst.Index = undefined; - try l.air_instructions.ensureUnusedCapacity(zcu.gpa, inst_buf.len); + try l.air_instructions.ensureUnusedCapacity(gpa, inst_buf.len); var main_block: Block = .init(&inst_buf); // This check is a bit annoying because of floating-point rounding and the fact that this @@ -1771,6 +2273,9 @@ const Block = struct { .data = .{ .br = .{ .block_inst = target, .operand = operand } }, }); } + fn addTy(b: *Block, l: *Legalize, tag: Air.Inst.Tag, ty: Type) Air.Inst.Index { + return b.add(l, .{ .tag = tag, .data = .{ .ty = ty } }); + } fn addBinOp(b: *Block, l: *Legalize, tag: Air.Inst.Tag, lhs: Air.Inst.Ref, rhs: Air.Inst.Ref) Air.Inst.Index { return b.add(l, .{ .tag = tag, @@ -1921,6 +2426,31 @@ const Block = struct { } }; +const Loop = struct { + inst: Air.Inst.Index, + block: Block, + + /// The return value has `block` initialized to `undefined`; it is the caller's reponsibility + /// to initialize it. + fn init(l: *Legalize, parent_block: *Block) Loop { + return .{ + .inst = parent_block.add(l, .{ + .tag = .loop, + .data = .{ .ty_pl = .{ + .ty = .noreturn_type, + .payload = undefined, + } }, + }), + .block = undefined, + }; + } + + fn finish(loop: Loop, l: *Legalize) Error!void { + const data = &l.air_instructions.items(.data)[@intFromEnum(loop.inst)]; + data.ty_pl.payload = try l.addBlockBody(loop.block.body()); + } +}; + const CondBr = struct { inst: Air.Inst.Index, hints: Air.CondBr.BranchHints, diff --git a/src/Air/Liveness.zig b/src/Air/Liveness.zig index ea170d0893d6..c60ece5e4f8d 100644 --- a/src/Air/Liveness.zig +++ b/src/Air/Liveness.zig @@ -458,6 +458,7 @@ fn analyzeInst( .memset_safe, .memcpy, .memmove, + .legalize_vec_elem_val, => { const o = inst_datas[@intFromEnum(inst)].bin_op; return analyzeOperands(a, pass, data, inst, .{ o.lhs, o.rhs, .none }); @@ -769,6 +770,12 @@ fn analyzeInst( const pl_op = inst_datas[@intFromEnum(inst)].pl_op; return analyzeOperands(a, pass, data, inst, .{ pl_op.operand, .none, .none }); }, + + .legalize_vec_store_elem => { + const pl_op = inst_datas[@intFromEnum(inst)].pl_op; + const bin = a.air.extraData(Air.Bin, pl_op.payload).data; + return analyzeOperands(a, pass, data, inst, .{ pl_op.operand, bin.lhs, bin.rhs }); + }, } } diff --git a/src/Air/Liveness/Verify.zig b/src/Air/Liveness/Verify.zig index 2f50937bbe40..f522e1367e49 100644 --- a/src/Air/Liveness/Verify.zig +++ b/src/Air/Liveness/Verify.zig @@ -272,6 +272,7 @@ fn verifyBody(self: *Verify, body: []const Air.Inst.Index) Error!void { .memset_safe, .memcpy, .memmove, + .legalize_vec_elem_val, => { const bin_op = data[@intFromEnum(inst)].bin_op; try self.verifyInstOperands(inst, .{ bin_op.lhs, bin_op.rhs, .none }); @@ -577,6 +578,11 @@ fn verifyBody(self: *Verify, body: []const Air.Inst.Index) Error!void { try self.verifyInst(inst); }, + .legalize_vec_store_elem => { + const pl_op = data[@intFromEnum(inst)].pl_op; + const bin = self.air.extraData(Air.Bin, pl_op.payload).data; + try self.verifyInstOperands(inst, .{ pl_op.operand, bin.lhs, bin.rhs }); + }, } } } diff --git a/src/Air/print.zig b/src/Air/print.zig index 4b44af3206b8..3324055dc70a 100644 --- a/src/Air/print.zig +++ b/src/Air/print.zig @@ -171,6 +171,7 @@ const Writer = struct { .memmove, .memset, .memset_safe, + .legalize_vec_elem_val, => try w.writeBinOp(s, inst), .is_null, @@ -331,6 +332,7 @@ const Writer = struct { .reduce, .reduce_optimized => try w.writeReduce(s, inst), .cmp_vector, .cmp_vector_optimized => try w.writeCmpVector(s, inst), .runtime_nav_ptr => try w.writeRuntimeNavPtr(s, inst), + .legalize_vec_store_elem => try w.writeLegalizeVecStoreElem(s, inst), .work_item_id, .work_group_size, @@ -508,6 +510,18 @@ const Writer = struct { try w.writeOperand(s, inst, 2, pl_op.operand); } + fn writeLegalizeVecStoreElem(w: *Writer, s: *std.Io.Writer, inst: Air.Inst.Index) Error!void { + const pl_op = w.air.instructions.items(.data)[@intFromEnum(inst)].pl_op; + const bin = w.air.extraData(Air.Bin, pl_op.payload).data; + + try w.writeOperand(s, inst, 0, pl_op.operand); + try s.writeAll(", "); + try w.writeOperand(s, inst, 1, bin.lhs); + try s.writeAll(", "); + try w.writeOperand(s, inst, 2, bin.rhs); + try s.writeAll(", "); + } + fn writeShuffleOne(w: *Writer, s: *std.Io.Writer, inst: Air.Inst.Index) Error!void { const unwrapped = w.air.unwrapShuffleOne(w.pt.zcu, inst); try w.writeType(s, unwrapped.result_ty); diff --git a/src/Air/types_resolved.zig b/src/Air/types_resolved.zig index cac981cb00d2..d90550982d83 100644 --- a/src/Air/types_resolved.zig +++ b/src/Air/types_resolved.zig @@ -88,6 +88,7 @@ fn checkBody(air: Air, body: []const Air.Inst.Index, zcu: *Zcu) bool { .atomic_store_monotonic, .atomic_store_release, .atomic_store_seq_cst, + .legalize_vec_elem_val, => { if (!checkRef(data.bin_op.lhs, zcu)) return false; if (!checkRef(data.bin_op.rhs, zcu)) return false; @@ -322,6 +323,7 @@ fn checkBody(air: Air, body: []const Air.Inst.Index, zcu: *Zcu) bool { .select, .mul_add, + .legalize_vec_store_elem, => { const bin = air.extraData(Air.Bin, data.pl_op.payload).data; if (!checkRef(data.pl_op.operand, zcu)) return false; diff --git a/src/Sema.zig b/src/Sema.zig index 8b5c7c5de22b..7974b4791388 100644 --- a/src/Sema.zig +++ b/src/Sema.zig @@ -15930,16 +15930,21 @@ fn zirOverflowArithmetic( } } // If either of the arguments is one, the result is the other and no overflow occured. - const scalar_one = try pt.intValue(dest_ty.scalarType(zcu), 1); - const vec_one = try sema.splat(dest_ty, scalar_one); - if (maybe_lhs_val) |lhs_val| { - if (!lhs_val.isUndef(zcu) and try sema.compareAll(lhs_val, .eq, vec_one, dest_ty)) { - break :result .{ .overflow_bit = try sema.splat(overflow_ty, .zero_u1), .inst = rhs }; + const dest_scalar_ty = dest_ty.scalarType(zcu); + const dest_scalar_int = dest_scalar_ty.intInfo(zcu); + // We could still be working with i1, where '1' is not a legal value! + if (!(dest_scalar_int.bits == 1 and dest_scalar_int.signedness == .signed)) { + const scalar_one = try pt.intValue(dest_scalar_ty, 1); + const vec_one = try sema.splat(dest_ty, scalar_one); + if (maybe_lhs_val) |lhs_val| { + if (!lhs_val.isUndef(zcu) and try sema.compareAll(lhs_val, .eq, vec_one, dest_ty)) { + break :result .{ .overflow_bit = try sema.splat(overflow_ty, .zero_u1), .inst = rhs }; + } } - } - if (maybe_rhs_val) |rhs_val| { - if (!rhs_val.isUndef(zcu) and try sema.compareAll(rhs_val, .eq, vec_one, dest_ty)) { - break :result .{ .overflow_bit = try sema.splat(overflow_ty, .zero_u1), .inst = lhs }; + if (maybe_rhs_val) |rhs_val| { + if (!rhs_val.isUndef(zcu) and try sema.compareAll(rhs_val, .eq, vec_one, dest_ty)) { + break :result .{ .overflow_bit = try sema.splat(overflow_ty, .zero_u1), .inst = lhs }; + } } } diff --git a/src/codegen/aarch64/Select.zig b/src/codegen/aarch64/Select.zig index 36ca69e589ee..64aeeb7ff48b 100644 --- a/src/codegen/aarch64/Select.zig +++ b/src/codegen/aarch64/Select.zig @@ -134,6 +134,10 @@ pub fn analyze(isel: *Select, air_body: []const Air.Inst.Index) !void { var air_inst_index = air_body[air_body_index]; const initial_def_order_len = isel.def_order.count(); air_tag: switch (air_tags[@intFromEnum(air_inst_index)]) { + // No "scalarize" legalizations are enabled, so these instructions never appear. + .legalize_vec_elem_val => unreachable, + .legalize_vec_store_elem => unreachable, + .arg, .ret_addr, .frame_addr, @@ -950,6 +954,11 @@ pub fn body(isel: *Select, air_body: []const Air.Inst.Index) error{ OutOfMemory, }; air_tag: switch (air.next().?) { else => |air_tag| return isel.fail("unimplemented {t}", .{air_tag}), + + // No "scalarize" legalizations are enabled, so these instructions never appear. + .legalize_vec_elem_val => unreachable, + .legalize_vec_store_elem => unreachable, + .arg => { const arg_vi = isel.live_values.fetchRemove(air.inst_index).?.value; defer arg_vi.deref(isel); diff --git a/src/codegen/c.zig b/src/codegen/c.zig index 0abea3d50396..a19c4bb34681 100644 --- a/src/codegen/c.zig +++ b/src/codegen/c.zig @@ -3325,6 +3325,10 @@ fn genBodyInner(f: *Function, body: []const Air.Inst.Index) Error!void { // zig fmt: off .inferred_alloc, .inferred_alloc_comptime => unreachable, + // No "scalarize" legalizations are enabled, so these instructions never appear. + .legalize_vec_elem_val => unreachable, + .legalize_vec_store_elem => unreachable, + .arg => try airArg(f, inst), .breakpoint => try airBreakpoint(f), diff --git a/src/codegen/llvm.zig b/src/codegen/llvm.zig index 1160c2958e6c..b862a23ddc97 100644 --- a/src/codegen/llvm.zig +++ b/src/codegen/llvm.zig @@ -4886,6 +4886,11 @@ pub const FuncGen = struct { const val: Builder.Value = switch (air_tags[@intFromEnum(inst)]) { // zig fmt: off + + // No "scalarize" legalizations are enabled, so these instructions never appear. + .legalize_vec_elem_val => unreachable, + .legalize_vec_store_elem => unreachable, + .add => try self.airAdd(inst, .normal), .add_optimized => try self.airAdd(inst, .fast), .add_wrap => try self.airAddWrap(inst), diff --git a/src/codegen/riscv64/CodeGen.zig b/src/codegen/riscv64/CodeGen.zig index bf5e5b6718a0..cdca3c2fd8bd 100644 --- a/src/codegen/riscv64/CodeGen.zig +++ b/src/codegen/riscv64/CodeGen.zig @@ -1391,6 +1391,11 @@ fn genBody(func: *Func, body: []const Air.Inst.Index) InnerError!void { const tag = air_tags[@intFromEnum(inst)]; switch (tag) { // zig fmt: off + + // No "scalarize" legalizations are enabled, so these instructions never appear. + .legalize_vec_elem_val => unreachable, + .legalize_vec_store_elem => unreachable, + .add, .add_wrap, .sub, diff --git a/src/codegen/sparc64/CodeGen.zig b/src/codegen/sparc64/CodeGen.zig index 684bfcfabb01..4cbe07c76279 100644 --- a/src/codegen/sparc64/CodeGen.zig +++ b/src/codegen/sparc64/CodeGen.zig @@ -479,6 +479,11 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void { self.reused_operands = @TypeOf(self.reused_operands).initEmpty(); switch (air_tags[@intFromEnum(inst)]) { // zig fmt: off + + // No "scalarize" legalizations are enabled, so these instructions never appear. + .legalize_vec_elem_val => unreachable, + .legalize_vec_store_elem => unreachable, + .ptr_add => try self.airPtrArithmetic(inst, .ptr_add), .ptr_sub => try self.airPtrArithmetic(inst, .ptr_sub), diff --git a/src/codegen/wasm/CodeGen.zig b/src/codegen/wasm/CodeGen.zig index b7f7aa151daf..684513bf8281 100644 --- a/src/codegen/wasm/CodeGen.zig +++ b/src/codegen/wasm/CodeGen.zig @@ -1786,6 +1786,10 @@ fn buildPointerOffset(cg: *CodeGen, ptr_value: WValue, offset: u64, action: enum fn genInst(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void { const air_tags = cg.air.instructions.items(.tag); return switch (air_tags[@intFromEnum(inst)]) { + // No "scalarize" legalizations are enabled, so these instructions never appear. + .legalize_vec_elem_val => unreachable, + .legalize_vec_store_elem => unreachable, + .inferred_alloc, .inferred_alloc_comptime => unreachable, .add => cg.airBinOp(inst, .add), diff --git a/src/codegen/x86_64/CodeGen.zig b/src/codegen/x86_64/CodeGen.zig index 94394185bd58..f0772dcd73c3 100644 --- a/src/codegen/x86_64/CodeGen.zig +++ b/src/codegen/x86_64/CodeGen.zig @@ -103926,7 +103926,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { try ops[0].toOffset(0, cg); try ops[0].finish(inst, &.{ty_op.operand}, &ops, cg); }, - .array_elem_val => { + .array_elem_val, .legalize_vec_elem_val => { const bin_op = air_datas[@intFromEnum(inst)].bin_op; const array_ty = cg.typeOf(bin_op.lhs); const res_ty = array_ty.elemType2(zcu); @@ -173061,6 +173061,634 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .c_va_copy => try cg.airVaCopy(inst), .c_va_end => try cg.airVaEnd(inst), .c_va_start => try cg.airVaStart(inst), + .legalize_vec_store_elem => { + const pl_op = air_datas[@intFromEnum(inst)].pl_op; + const bin = cg.air.extraData(Air.Bin, pl_op.payload).data; + // vector_ptr, index, elem_val + var ops = try cg.tempsFromOperands(inst, .{ pl_op.operand, bin.lhs, bin.rhs }); + cg.select(&.{}, &.{}, &ops, comptime &.{ .{ + .src_constraints = .{ .{ .ptr_bool_vec = .byte }, .any, .bool }, + .patterns = &.{ + .{ .src = .{ .to_gpr, .to_gpr, .{ .imm = 0 } } }, + }, + .extra_temps = .{ + .{ .type = .u8, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .movzx, .tmp0d, .lea(.src0b), ._, ._ }, + .{ ._, ._r, .bt, .tmp0d, .src1d, ._, ._ }, + .{ ._, ._, .mov, .lea(.src0b), .tmp0b, ._, ._ }, + } }, + }, .{ + .src_constraints = .{ .{ .ptr_bool_vec = .byte }, .any, .bool }, + .patterns = &.{ + .{ .src = .{ .to_gpr, .to_gpr, .{ .imm = 1 } } }, + }, + .extra_temps = .{ + .{ .type = .u8, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .movzx, .tmp0d, .lea(.src0b), ._, ._ }, + .{ ._, ._s, .bt, .tmp0d, .src1d, ._, ._ }, + .{ ._, ._, .mov, .lea(.src0b), .tmp0b, ._, ._ }, + } }, + }, .{ + .required_features = .{ .cmov, null, null, null }, + .src_constraints = .{ .{ .ptr_bool_vec = .byte }, .any, .bool }, + .patterns = &.{ + .{ .src = .{ .to_gpr, .to_gpr, .to_gpr } }, + }, + .extra_temps = .{ + .{ .type = .u8, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u8, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .movzx, .tmp0d, .lea(.src0b), ._, ._ }, + .{ ._, ._, .mov, .tmp1d, .tmp0d, ._, ._ }, + .{ ._, ._r, .bt, .tmp1d, .src1d, ._, ._ }, + .{ ._, ._s, .bt, .tmp0d, .src1d, ._, ._ }, + .{ ._, ._, .@"test", .src2b, .si(1), ._, ._ }, + .{ ._, ._z, .cmov, .tmp0d, .tmp1d, ._, ._ }, + .{ ._, ._, .mov, .lea(.src0b), .tmp0b, ._, ._ }, + } }, + }, .{ + .src_constraints = .{ .{ .ptr_bool_vec = .byte }, .any, .bool }, + .patterns = &.{ + .{ .src = .{ .to_gpr, .to_gpr, .to_gpr } }, + }, + .extra_temps = .{ + .{ .type = .u8, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .movzx, .tmp0d, .lea(.src0b), ._, ._ }, + .{ ._, ._, .@"test", .src2b, .si(1), ._, ._ }, + .{ ._, ._nz, .j, .@"0f", ._, ._, ._ }, + .{ ._, ._r, .bt, .tmp0d, .src1d, ._, ._ }, + .{ ._, ._mp, .j, .@"1f", ._, ._, ._ }, + .{ .@"0:", ._s, .bt, .tmp0d, .src1d, ._, ._ }, + .{ .@"1:", ._, .mov, .lea(.src0b), .tmp0b, ._, ._ }, + } }, + }, .{ + .src_constraints = .{ .{ .ptr_bool_vec = .word }, .any, .bool }, + .patterns = &.{ + .{ .src = .{ .to_gpr, .to_gpr, .{ .imm = 0 } } }, + }, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._r, .bt, .lea(.src0w), .src1w, ._, ._ }, + } }, + }, .{ + .src_constraints = .{ .{ .ptr_bool_vec = .word }, .any, .bool }, + .patterns = &.{ + .{ .src = .{ .to_gpr, .to_gpr, .{ .imm = 1 } } }, + }, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._s, .bt, .lea(.src0d), .src1d, ._, ._ }, + } }, + }, .{ + .required_features = .{ .cmov, null, null, null }, + .src_constraints = .{ .{ .ptr_bool_vec = .word }, .any, .bool }, + .patterns = &.{ + .{ .src = .{ .to_gpr, .to_gpr, .to_gpr } }, + }, + .extra_temps = .{ + .{ .type = .u16, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u16, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .movzx, .tmp0d, .lea(.src0w), ._, ._ }, + .{ ._, ._, .mov, .tmp1d, .tmp0d, ._, ._ }, + .{ ._, ._r, .bt, .tmp1d, .src1d, ._, ._ }, + .{ ._, ._s, .bt, .tmp0d, .src1d, ._, ._ }, + .{ ._, ._, .@"test", .src2b, .si(1), ._, ._ }, + .{ ._, ._z, .cmov, .tmp0d, .tmp1d, ._, ._ }, + .{ ._, ._, .mov, .lea(.src0w), .tmp0w, ._, ._ }, + } }, + }, .{ + .src_constraints = .{ .{ .ptr_bool_vec = .word }, .any, .bool }, + .patterns = &.{ + .{ .src = .{ .to_gpr, .to_gpr, .to_gpr } }, + }, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .@"test", .src2b, .si(1), ._, ._ }, + .{ ._, ._nz, .j, .@"1f", ._, ._, ._ }, + .{ ._, ._r, .bt, .lea(.src0w), .src1w, ._, ._ }, + .{ ._, ._mp, .j, .@"0f", ._, ._, ._ }, + .{ .@"1:", ._s, .bt, .lea(.src0w), .src1w, ._, ._ }, + } }, + }, .{ + .src_constraints = .{ .ptr_any_bool_vec, .any, .bool }, + .patterns = &.{ + .{ .src = .{ .to_gpr, .to_gpr, .{ .imm = 0 } } }, + }, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._r, .bt, .lea(.src0d), .src1d, ._, ._ }, + } }, + }, .{ + .src_constraints = .{ .ptr_any_bool_vec, .any, .bool }, + .patterns = &.{ + .{ .src = .{ .to_gpr, .to_gpr, .{ .imm = 1 } } }, + }, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._s, .bt, .lea(.src0d), .src1d, ._, ._ }, + } }, + }, .{ + .required_features = .{ .cmov, null, null, null }, + .src_constraints = .{ .{ .ptr_bool_vec = .dword }, .any, .bool }, + .patterns = &.{ + .{ .src = .{ .to_gpr, .to_gpr, .to_gpr } }, + }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0d, .lea(.src0d), ._, ._ }, + .{ ._, ._, .mov, .tmp1d, .tmp0d, ._, ._ }, + .{ ._, ._r, .bt, .tmp1d, .src1d, ._, ._ }, + .{ ._, ._s, .bt, .tmp0d, .src1d, ._, ._ }, + .{ ._, ._, .@"test", .src2b, .si(1), ._, ._ }, + .{ ._, ._z, .cmov, .tmp0d, .tmp1d, ._, ._ }, + .{ ._, ._, .mov, .lea(.src0d), .tmp0d, ._, ._ }, + } }, + }, .{ + .required_features = .{ .@"64bit", .cmov, null, null }, + .src_constraints = .{ .{ .ptr_bool_vec = .qword }, .any, .bool }, + .patterns = &.{ + .{ .src = .{ .to_gpr, .to_gpr, .to_gpr } }, + }, + .extra_temps = .{ + .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0q, .lea(.src0q), ._, ._ }, + .{ ._, ._, .mov, .tmp1q, .tmp0q, ._, ._ }, + .{ ._, ._r, .bt, .tmp1q, .src1q, ._, ._ }, + .{ ._, ._s, .bt, .tmp0q, .src1q, ._, ._ }, + .{ ._, ._, .@"test", .src2b, .si(1), ._, ._ }, + .{ ._, ._z, .cmov, .tmp0q, .tmp1q, ._, ._ }, + .{ ._, ._, .mov, .lea(.src0q), .tmp0q, ._, ._ }, + } }, + }, .{ + .required_features = .{ .cmov, null, null, null }, + .src_constraints = .{ .ptr_any_bool_vec, .any, .bool }, + .patterns = &.{ + .{ .src = .{ .to_gpr, .to_gpr, .to_gpr } }, + }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0d, .src1d, ._, ._ }, + .{ ._, ._r, .sh, .tmp0d, .ui(5), ._, ._ }, + .{ ._, ._, .mov, .tmp1d, .leasi(.src0d, .@"4", .tmp0), ._, ._ }, + .{ ._, ._, .mov, .tmp2d, .tmp1d, ._, ._ }, + .{ ._, ._r, .bt, .tmp2d, .src1d, ._, ._ }, + .{ ._, ._s, .bt, .tmp1d, .src1d, ._, ._ }, + .{ ._, ._, .@"test", .src2b, .si(1), ._, ._ }, + .{ ._, ._z, .cmov, .tmp1d, .tmp2d, ._, ._ }, + .{ ._, ._, .mov, .leasi(.src0d, .@"4", .tmp0), .tmp1d, ._, ._ }, + } }, + }, .{ + .src_constraints = .{ .ptr_any_bool_vec, .any, .bool }, + .patterns = &.{ + .{ .src = .{ .to_gpr, .to_gpr, .to_gpr } }, + }, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .@"test", .src2b, .si(1), ._, ._ }, + .{ ._, ._nz, .j, .@"1f", ._, ._, ._ }, + .{ ._, ._r, .bt, .lea(.src0d), .src1d, ._, ._ }, + .{ ._, ._mp, .j, .@"0f", ._, ._, ._ }, + .{ .@"1:", ._s, .bt, .lea(.src0d), .src1d, ._, ._ }, + } }, + }, .{ + .src_constraints = .{ .any, .any, .{ .int = .byte } }, + .patterns = &.{ + .{ .src = .{ .to_gpr, .simm32, .imm8 } }, + .{ .src = .{ .to_gpr, .simm32, .to_gpr } }, + }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .leaa(.src0b, .add_src0_elem_size_mul_src1), .src2b, ._, ._ }, + } }, + }, .{ + .src_constraints = .{ .any, .any, .{ .int = .byte } }, + .patterns = &.{ + .{ .src = .{ .to_gpr, .to_gpr, .imm8 } }, + .{ .src = .{ .to_gpr, .to_gpr, .to_gpr } }, + }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .leai(.src0b, .src1), .src2b, ._, ._ }, + } }, + }, .{ + .src_constraints = .{ .any, .any, .{ .int = .word } }, + .patterns = &.{ + .{ .src = .{ .to_gpr, .simm32, .imm16 } }, + .{ .src = .{ .to_gpr, .simm32, .to_gpr } }, + }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .leaa(.src0w, .add_src0_elem_size_mul_src1), .src2w, ._, ._ }, + } }, + }, .{ + .src_constraints = .{ .any, .any, .{ .int = .word } }, + .patterns = &.{ + .{ .src = .{ .to_gpr, .to_gpr, .imm16 } }, + .{ .src = .{ .to_gpr, .to_gpr, .to_gpr } }, + }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .leasi(.src0w, .@"2", .src1), .src2w, ._, ._ }, + } }, + }, .{ + .required_features = .{ .avx, null, null, null }, + .src_constraints = .{ .any, .any, .{ .float = .word } }, + .patterns = &.{ + .{ .src = .{ .to_gpr, .simm32, .to_sse } }, + }, + .each = .{ .once = &.{ + .{ ._, .vp_w, .extr, .leaa(.src0w, .add_src0_elem_size_mul_src1), .src2x, .ui(0), ._ }, + } }, + }, .{ + .required_features = .{ .sse4_1, null, null, null }, + .src_constraints = .{ .any, .any, .{ .float = .word } }, + .patterns = &.{ + .{ .src = .{ .to_gpr, .simm32, .to_sse } }, + }, + .each = .{ .once = &.{ + .{ ._, .p_w, .extr, .leaa(.src0w, .add_src0_elem_size_mul_src1), .src2x, .ui(0), ._ }, + } }, + }, .{ + .required_features = .{ .sse2, null, null, null }, + .src_constraints = .{ .any, .any, .{ .float = .word } }, + .patterns = &.{ + .{ .src = .{ .to_gpr, .simm32, .to_sse } }, + }, + .extra_temps = .{ + .{ .type = .f16, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .each = .{ .once = &.{ + .{ ._, .p_w, .extr, .tmp0d, .src2x, .ui(0), ._ }, + .{ ._, ._, .mov, .leaa(.src0w, .add_src0_elem_size_mul_src1), .tmp0w, ._, ._ }, + } }, + }, .{ + .required_features = .{ .sse, null, null, null }, + .src_constraints = .{ .any, .any, .{ .float = .word } }, + .patterns = &.{ + .{ .src = .{ .to_gpr, .simm32, .to_sse } }, + }, + .extra_temps = .{ + .{ .type = .f32, .kind = .mem }, + .{ .type = .f16, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .each = .{ .once = &.{ + .{ ._, ._ss, .mov, .mem(.tmp1d), .src2x, ._, ._ }, + .{ ._, ._, .mov, .tmp1d, .mem(.tmp1d), ._, ._ }, + .{ ._, ._, .mov, .leaa(.src0w, .add_src0_elem_size_mul_src1), .tmp1w, ._, ._ }, + } }, + }, .{ + .required_features = .{ .avx, null, null, null }, + .src_constraints = .{ .any, .any, .{ .float = .word } }, + .patterns = &.{ + .{ .src = .{ .to_gpr, .to_gpr, .to_sse } }, + }, + .each = .{ .once = &.{ + .{ ._, .vp_w, .extr, .leasi(.src0w, .@"2", .src1), .src2x, .ui(0), ._ }, + } }, + }, .{ + .required_features = .{ .sse4_1, null, null, null }, + .src_constraints = .{ .any, .any, .{ .float = .word } }, + .patterns = &.{ + .{ .src = .{ .to_gpr, .to_gpr, .to_sse } }, + }, + .each = .{ .once = &.{ + .{ ._, .p_w, .extr, .leasi(.src0w, .@"2", .src1), .src2x, .ui(0), ._ }, + } }, + }, .{ + .required_features = .{ .sse2, null, null, null }, + .src_constraints = .{ .any, .any, .{ .float = .word } }, + .patterns = &.{ + .{ .src = .{ .to_gpr, .simm32, .to_sse } }, + }, + .extra_temps = .{ + .{ .type = .f16, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .each = .{ .once = &.{ + .{ ._, .p_w, .extr, .tmp0d, .src2x, .ui(0), ._ }, + .{ ._, ._, .mov, .leasi(.src0w, .@"2", .src1), .tmp0w, ._, ._ }, + } }, + }, .{ + .required_features = .{ .sse, null, null, null }, + .src_constraints = .{ .any, .any, .{ .float = .word } }, + .patterns = &.{ + .{ .src = .{ .to_gpr, .simm32, .to_sse } }, + }, + .extra_temps = .{ + .{ .type = .f32, .kind = .mem }, + .{ .type = .f16, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .each = .{ .once = &.{ + .{ ._, ._ss, .mov, .mem(.tmp1d), .src2x, ._, ._ }, + .{ ._, ._, .mov, .tmp1d, .mem(.tmp1d), ._, ._ }, + .{ ._, ._, .mov, .leasi(.src0w, .@"2", .src1), .tmp1w, ._, ._ }, + } }, + }, .{ + .src_constraints = .{ .any, .any, .{ .int = .dword } }, + .patterns = &.{ + .{ .src = .{ .to_gpr, .simm32, .imm32 } }, + .{ .src = .{ .to_gpr, .simm32, .to_gpr } }, + }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .leaa(.src0d, .add_src0_elem_size_mul_src1), .src2d, ._, ._ }, + } }, + }, .{ + .src_constraints = .{ .any, .any, .{ .int = .dword } }, + .patterns = &.{ + .{ .src = .{ .to_gpr, .to_gpr, .imm32 } }, + .{ .src = .{ .to_gpr, .to_gpr, .to_gpr } }, + }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .leasi(.src0d, .@"4", .src1), .src2d, ._, ._ }, + } }, + }, .{ + .required_features = .{ .avx, null, null, null }, + .src_constraints = .{ .any, .any, .{ .float = .dword } }, + .patterns = &.{ + .{ .src = .{ .to_gpr, .simm32, .to_sse } }, + }, + .each = .{ .once = &.{ + .{ ._, .v_ss, .mov, .leaa(.src0d, .add_src0_elem_size_mul_src1), .src2x, ._, ._ }, + } }, + }, .{ + .required_features = .{ .sse, null, null, null }, + .src_constraints = .{ .any, .any, .{ .float = .dword } }, + .patterns = &.{ + .{ .src = .{ .to_gpr, .simm32, .to_sse } }, + }, + .each = .{ .once = &.{ + .{ ._, ._ss, .mov, .leaa(.src0d, .add_src0_elem_size_mul_src1), .src2x, ._, ._ }, + } }, + }, .{ + .required_features = .{ .avx, null, null, null }, + .src_constraints = .{ .any, .any, .{ .float = .dword } }, + .patterns = &.{ + .{ .src = .{ .to_gpr, .to_gpr, .to_sse } }, + }, + .each = .{ .once = &.{ + .{ ._, .v_ss, .mov, .leasi(.src0d, .@"4", .src1), .src2x, ._, ._ }, + } }, + }, .{ + .required_features = .{ .sse, null, null, null }, + .src_constraints = .{ .any, .any, .{ .float = .dword } }, + .patterns = &.{ + .{ .src = .{ .to_gpr, .to_gpr, .to_sse } }, + }, + .each = .{ .once = &.{ + .{ ._, ._ss, .mov, .leasi(.src0d, .@"4", .src1), .src2x, ._, ._ }, + } }, + }, .{ + .required_features = .{ .@"64bit", null, null, null }, + .src_constraints = .{ .any, .any, .{ .int = .qword } }, + .patterns = &.{ + .{ .src = .{ .to_gpr, .simm32, .simm32 } }, + .{ .src = .{ .to_gpr, .simm32, .to_gpr } }, + }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .leaa(.src0q, .add_src0_elem_size_mul_src1), .src2q, ._, ._ }, + } }, + }, .{ + .required_features = .{ .@"64bit", null, null, null }, + .src_constraints = .{ .any, .any, .{ .int = .qword } }, + .patterns = &.{ + .{ .src = .{ .to_gpr, .to_gpr, .simm32 } }, + .{ .src = .{ .to_gpr, .to_gpr, .to_gpr } }, + }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .leasi(.src0q, .@"8", .src1), .src2q, ._, ._ }, + } }, + }, .{ + .required_features = .{ .avx, null, null, null }, + .src_constraints = .{ .any, .any, .{ .float = .qword } }, + .patterns = &.{ + .{ .src = .{ .to_gpr, .simm32, .to_sse } }, + }, + .each = .{ .once = &.{ + .{ ._, .v_sd, .mov, .leaa(.src0q, .add_src0_elem_size_mul_src1), .src2x, ._, ._ }, + } }, + }, .{ + .required_features = .{ .sse2, null, null, null }, + .src_constraints = .{ .any, .any, .{ .float = .qword } }, + .patterns = &.{ + .{ .src = .{ .to_gpr, .simm32, .to_sse } }, + }, + .each = .{ .once = &.{ + .{ ._, ._sd, .mov, .leaa(.src0q, .add_src0_elem_size_mul_src1), .src2x, ._, ._ }, + } }, + }, .{ + .required_features = .{ .sse, null, null, null }, + .src_constraints = .{ .any, .any, .{ .float = .qword } }, + .patterns = &.{ + .{ .src = .{ .to_gpr, .simm32, .to_sse } }, + }, + .each = .{ .once = &.{ + .{ ._, ._ps, .movl, .leaa(.src0q, .add_src0_elem_size_mul_src1), .src2x, ._, ._ }, + } }, + }, .{ + .required_features = .{ .avx, null, null, null }, + .src_constraints = .{ .any, .any, .{ .float = .qword } }, + .patterns = &.{ + .{ .src = .{ .to_gpr, .to_gpr, .to_sse } }, + }, + .each = .{ .once = &.{ + .{ ._, .v_sd, .mov, .leasi(.src0q, .@"8", .src1), .src2x, ._, ._ }, + } }, + }, .{ + .required_features = .{ .sse2, null, null, null }, + .src_constraints = .{ .any, .any, .{ .float = .qword } }, + .patterns = &.{ + .{ .src = .{ .to_gpr, .to_gpr, .to_sse } }, + }, + .each = .{ .once = &.{ + .{ ._, ._sd, .mov, .leasi(.src0q, .@"8", .src1), .src2x, ._, ._ }, + } }, + }, .{ + .required_features = .{ .sse, null, null, null }, + .src_constraints = .{ .any, .any, .{ .float = .qword } }, + .patterns = &.{ + .{ .src = .{ .to_gpr, .to_gpr, .to_sse } }, + }, + .each = .{ .once = &.{ + .{ ._, ._ps, .movl, .leasi(.src0q, .@"8", .src1), .src2x, ._, ._ }, + } }, + } }) catch |err| switch (err) { + error.SelectFailed => { + const elem_size = cg.typeOf(bin.rhs).abiSize(zcu); + while (try ops[0].toRegClass(true, .general_purpose, cg) or + try ops[1].toRegClass(true, .general_purpose, cg)) + {} + const base_reg = ops[0].tracking(cg).short.register.to64(); + const rhs_reg = ops[1].tracking(cg).short.register.to64(); + if (!std.math.isPowerOfTwo(elem_size)) { + try cg.spillEflagsIfOccupied(); + try cg.asmRegisterRegisterImmediate( + .{ .i_, .mul }, + rhs_reg, + rhs_reg, + .u(elem_size), + ); + try cg.asmRegisterMemory(.{ ._, .lea }, base_reg, .{ + .base = .{ .reg = base_reg }, + .mod = .{ .rm = .{ .index = rhs_reg } }, + }); + } else if (elem_size > 8) { + try cg.spillEflagsIfOccupied(); + try cg.asmRegisterImmediate( + .{ ._l, .sh }, + rhs_reg, + .u(std.math.log2_int(u64, elem_size)), + ); + try cg.asmRegisterMemory(.{ ._, .lea }, base_reg, .{ + .base = .{ .reg = base_reg }, + .mod = .{ .rm = .{ .index = rhs_reg } }, + }); + } else try cg.asmRegisterMemory(.{ ._, .lea }, base_reg, .{ + .base = .{ .reg = base_reg }, + .mod = .{ .rm = .{ + .index = rhs_reg, + .scale = .fromFactor(@intCast(elem_size)), + } }, + }); + try ops[0].store(&ops[2], .{}, cg); + }, + else => |e| return e, + }; + for (ops) |op| try op.die(cg); + }, .work_item_id, .work_group_size, .work_group_id => unreachable, } try cg.resetTemps(@enumFromInt(0)); From 5df5e2ed267deba810811831060a6e1a3593b0f5 Mon Sep 17 00:00:00 2001 From: Matthew Lugg Date: Sun, 9 Nov 2025 20:46:06 +0000 Subject: [PATCH 7/8] zig.h: drop dependency on deleted compiler_rt functions It turns out we did use these in the C backend. However, it's really just as easy, if not easier, to replicate the logic directly in C. Synchronizes stage1/zig.h to make sure the bootstrap doesn't depend on these functions either. The actual zig1 tarball is unmodified because regenerating it is unnecessary in this instance. --- lib/zig.h | 36 +++++++++++--------------------- stage1/zig.h | 59 ++++++++++++++++++++++------------------------------ 2 files changed, 37 insertions(+), 58 deletions(-) diff --git a/lib/zig.h b/lib/zig.h index 7fc86df5c41a..7a7a22b17ba1 100644 --- a/lib/zig.h +++ b/lib/zig.h @@ -809,15 +809,13 @@ static inline bool zig_addo_u32(uint32_t *res, uint32_t lhs, uint32_t rhs, uint8 #endif } -zig_extern int32_t __addosi4(int32_t lhs, int32_t rhs, int *overflow); static inline bool zig_addo_i32(int32_t *res, int32_t lhs, int32_t rhs, uint8_t bits) { #if zig_has_builtin(add_overflow) || defined(zig_gcc) int32_t full_res; bool overflow = __builtin_add_overflow(lhs, rhs, &full_res); #else - int overflow_int; - int32_t full_res = __addosi4(lhs, rhs, &overflow_int); - bool overflow = overflow_int != 0; + int32_t full_res = (int32_t)((uint32_t)lhs + (uint32_t)rhs); + bool overflow = ((full_res ^ lhs) & (full_res ^ rhs)) < 0; #endif *res = zig_wrap_i32(full_res, bits); return overflow || full_res < zig_minInt_i(32, bits) || full_res > zig_maxInt_i(32, bits); @@ -835,15 +833,13 @@ static inline bool zig_addo_u64(uint64_t *res, uint64_t lhs, uint64_t rhs, uint8 #endif } -zig_extern int64_t __addodi4(int64_t lhs, int64_t rhs, int *overflow); static inline bool zig_addo_i64(int64_t *res, int64_t lhs, int64_t rhs, uint8_t bits) { #if zig_has_builtin(add_overflow) || defined(zig_gcc) int64_t full_res; bool overflow = __builtin_add_overflow(lhs, rhs, &full_res); #else - int overflow_int; - int64_t full_res = __addodi4(lhs, rhs, &overflow_int); - bool overflow = overflow_int != 0; + int64_t full_res = (int64_t)((uint64_t)lhs + (uint64_t)rhs); + bool overflow = ((full_res ^ lhs) & (full_res ^ rhs)) < 0; #endif *res = zig_wrap_i64(full_res, bits); return overflow || full_res < zig_minInt_i(64, bits) || full_res > zig_maxInt_i(64, bits); @@ -917,15 +913,13 @@ static inline bool zig_subo_u32(uint32_t *res, uint32_t lhs, uint32_t rhs, uint8 #endif } -zig_extern int32_t __subosi4(int32_t lhs, int32_t rhs, int *overflow); static inline bool zig_subo_i32(int32_t *res, int32_t lhs, int32_t rhs, uint8_t bits) { #if zig_has_builtin(sub_overflow) || defined(zig_gcc) int32_t full_res; bool overflow = __builtin_sub_overflow(lhs, rhs, &full_res); #else - int overflow_int; - int32_t full_res = __subosi4(lhs, rhs, &overflow_int); - bool overflow = overflow_int != 0; + int32_t full_res = (int32_t)((uint32_t)lhs - (uint32_t)rhs); + bool overflow = ((lhs ^ rhs) & (full_res ^ lhs)) < 0; #endif *res = zig_wrap_i32(full_res, bits); return overflow || full_res < zig_minInt_i(32, bits) || full_res > zig_maxInt_i(32, bits); @@ -943,15 +937,13 @@ static inline bool zig_subo_u64(uint64_t *res, uint64_t lhs, uint64_t rhs, uint8 #endif } -zig_extern int64_t __subodi4(int64_t lhs, int64_t rhs, int *overflow); static inline bool zig_subo_i64(int64_t *res, int64_t lhs, int64_t rhs, uint8_t bits) { #if zig_has_builtin(sub_overflow) || defined(zig_gcc) int64_t full_res; bool overflow = __builtin_sub_overflow(lhs, rhs, &full_res); #else - int overflow_int; - int64_t full_res = __subodi4(lhs, rhs, &overflow_int); - bool overflow = overflow_int != 0; + int64_t full_res = (int64_t)((uint64_t)lhs - (uint64_t)rhs); + bool overflow = ((lhs ^ rhs) & (full_res ^ lhs)) < 0; #endif *res = zig_wrap_i64(full_res, bits); return overflow || full_res < zig_minInt_i(64, bits) || full_res > zig_maxInt_i(64, bits); @@ -1755,15 +1747,13 @@ static inline bool zig_addo_u128(zig_u128 *res, zig_u128 lhs, zig_u128 rhs, uint #endif } -zig_extern zig_i128 __addoti4(zig_i128 lhs, zig_i128 rhs, int *overflow); static inline bool zig_addo_i128(zig_i128 *res, zig_i128 lhs, zig_i128 rhs, uint8_t bits) { #if zig_has_builtin(add_overflow) zig_i128 full_res; bool overflow = __builtin_add_overflow(lhs, rhs, &full_res); #else - int overflow_int; - zig_i128 full_res = __addoti4(lhs, rhs, &overflow_int); - bool overflow = overflow_int != 0; + zig_i128 full_res = (zig_i128)((zig_u128)lhs + (zig_u128)rhs); + bool overflow = ((full_res ^ lhs) & (full_res ^ rhs)) < 0; #endif *res = zig_wrap_i128(full_res, bits); return overflow || full_res < zig_minInt_i(128, bits) || full_res > zig_maxInt_i(128, bits); @@ -1781,15 +1771,13 @@ static inline bool zig_subo_u128(zig_u128 *res, zig_u128 lhs, zig_u128 rhs, uint #endif } -zig_extern zig_i128 __suboti4(zig_i128 lhs, zig_i128 rhs, int *overflow); static inline bool zig_subo_i128(zig_i128 *res, zig_i128 lhs, zig_i128 rhs, uint8_t bits) { #if zig_has_builtin(sub_overflow) zig_i128 full_res; bool overflow = __builtin_sub_overflow(lhs, rhs, &full_res); #else - int overflow_int; - zig_i128 full_res = __suboti4(lhs, rhs, &overflow_int); - bool overflow = overflow_int != 0; + zig_i128 full_res = (zig_i128)((zig_u128)lhs - (zig_u128)rhs); + bool overflow = ((lhs ^ rhs) & (full_res ^ lhs)) < 0; #endif *res = zig_wrap_i128(full_res, bits); return overflow || full_res < zig_minInt_i(128, bits) || full_res > zig_maxInt_i(128, bits); diff --git a/stage1/zig.h b/stage1/zig.h index 5253912490ce..baae5926101b 100644 --- a/stage1/zig.h +++ b/stage1/zig.h @@ -40,6 +40,8 @@ #elif defined(__mips__) #define zig_mips32 #define zig_mips +#elif defined(__or1k__) +#define zig_or1k #elif defined(__powerpc64__) #define zig_powerpc64 #define zig_powerpc @@ -72,6 +74,9 @@ #elif defined (__x86_64__) || (defined(zig_msvc) && defined(_M_X64)) #define zig_x86_64 #define zig_x86 +#elif defined(__I86__) +#define zig_x86_16 +#define zig_x86 #endif #if defined(zig_msvc) || __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ @@ -82,9 +87,7 @@ #define zig_big_endian 1 #endif -#if defined(_AIX) -#define zig_aix -#elif defined(__MACH__) +#if defined(__MACH__) #define zig_darwin #elif defined(__DragonFly__) #define zig_dragonfly @@ -114,20 +117,14 @@ #define zig_wasi #elif defined(_WIN32) #define zig_windows -#elif defined(__MVS__) -#define zig_zos #endif #if defined(zig_windows) #define zig_coff #elif defined(__ELF__) #define zig_elf -#elif defined(zig_zos) -#define zig_goff #elif defined(zig_darwin) #define zig_macho -#elif defined(zig_aix) -#define zig_xcoff #endif #define zig_concat(lhs, rhs) lhs##rhs @@ -390,12 +387,16 @@ #define zig_trap() __asm__ volatile(".word 0x0") #elif defined(zig_mips) #define zig_trap() __asm__ volatile(".word 0x3d") +#elif defined(zig_or1k) +#define zig_trap() __asm__ volatile("l.cust8") #elif defined(zig_riscv) #define zig_trap() __asm__ volatile("unimp") #elif defined(zig_s390x) #define zig_trap() __asm__ volatile("j 0x2") #elif defined(zig_sparc) #define zig_trap() __asm__ volatile("illtrap") +#elif defined(zig_x86_16) +#define zig_trap() __asm__ volatile("int $0x3") #elif defined(zig_x86) #define zig_trap() __asm__ volatile("ud2") #else @@ -422,6 +423,8 @@ #define zig_breakpoint() __asm__ volatile("break 0x0") #elif defined(zig_mips) #define zig_breakpoint() __asm__ volatile("break") +#elif defined(zig_or1k) +#define zig_breakpoint() __asm__ volatile("l.trap 0x0") #elif defined(zig_powerpc) #define zig_breakpoint() __asm__ volatile("trap") #elif defined(zig_riscv) @@ -804,15 +807,13 @@ static inline bool zig_addo_u32(uint32_t *res, uint32_t lhs, uint32_t rhs, uint8 #endif } -zig_extern int32_t __addosi4(int32_t lhs, int32_t rhs, int *overflow); static inline bool zig_addo_i32(int32_t *res, int32_t lhs, int32_t rhs, uint8_t bits) { #if zig_has_builtin(add_overflow) || defined(zig_gcc) int32_t full_res; bool overflow = __builtin_add_overflow(lhs, rhs, &full_res); #else - int overflow_int; - int32_t full_res = __addosi4(lhs, rhs, &overflow_int); - bool overflow = overflow_int != 0; + int32_t full_res = (int32_t)((uint32_t)lhs + (uint32_t)rhs); + bool overflow = ((full_res ^ lhs) & (full_res ^ rhs)) < 0; #endif *res = zig_wrap_i32(full_res, bits); return overflow || full_res < zig_minInt_i(32, bits) || full_res > zig_maxInt_i(32, bits); @@ -830,15 +831,13 @@ static inline bool zig_addo_u64(uint64_t *res, uint64_t lhs, uint64_t rhs, uint8 #endif } -zig_extern int64_t __addodi4(int64_t lhs, int64_t rhs, int *overflow); static inline bool zig_addo_i64(int64_t *res, int64_t lhs, int64_t rhs, uint8_t bits) { #if zig_has_builtin(add_overflow) || defined(zig_gcc) int64_t full_res; bool overflow = __builtin_add_overflow(lhs, rhs, &full_res); #else - int overflow_int; - int64_t full_res = __addodi4(lhs, rhs, &overflow_int); - bool overflow = overflow_int != 0; + int64_t full_res = (int64_t)((uint64_t)lhs + (uint64_t)rhs); + bool overflow = ((full_res ^ lhs) & (full_res ^ rhs)) < 0; #endif *res = zig_wrap_i64(full_res, bits); return overflow || full_res < zig_minInt_i(64, bits) || full_res > zig_maxInt_i(64, bits); @@ -912,15 +911,13 @@ static inline bool zig_subo_u32(uint32_t *res, uint32_t lhs, uint32_t rhs, uint8 #endif } -zig_extern int32_t __subosi4(int32_t lhs, int32_t rhs, int *overflow); static inline bool zig_subo_i32(int32_t *res, int32_t lhs, int32_t rhs, uint8_t bits) { #if zig_has_builtin(sub_overflow) || defined(zig_gcc) int32_t full_res; bool overflow = __builtin_sub_overflow(lhs, rhs, &full_res); #else - int overflow_int; - int32_t full_res = __subosi4(lhs, rhs, &overflow_int); - bool overflow = overflow_int != 0; + int32_t full_res = (int32_t)((uint32_t)lhs - (uint32_t)rhs); + bool overflow = ((lhs ^ rhs) & (full_res ^ lhs)) < 0; #endif *res = zig_wrap_i32(full_res, bits); return overflow || full_res < zig_minInt_i(32, bits) || full_res > zig_maxInt_i(32, bits); @@ -938,15 +935,13 @@ static inline bool zig_subo_u64(uint64_t *res, uint64_t lhs, uint64_t rhs, uint8 #endif } -zig_extern int64_t __subodi4(int64_t lhs, int64_t rhs, int *overflow); static inline bool zig_subo_i64(int64_t *res, int64_t lhs, int64_t rhs, uint8_t bits) { #if zig_has_builtin(sub_overflow) || defined(zig_gcc) int64_t full_res; bool overflow = __builtin_sub_overflow(lhs, rhs, &full_res); #else - int overflow_int; - int64_t full_res = __subodi4(lhs, rhs, &overflow_int); - bool overflow = overflow_int != 0; + int64_t full_res = (int64_t)((uint64_t)lhs - (uint64_t)rhs); + bool overflow = ((lhs ^ rhs) & (full_res ^ lhs)) < 0; #endif *res = zig_wrap_i64(full_res, bits); return overflow || full_res < zig_minInt_i(64, bits) || full_res > zig_maxInt_i(64, bits); @@ -1750,15 +1745,13 @@ static inline bool zig_addo_u128(zig_u128 *res, zig_u128 lhs, zig_u128 rhs, uint #endif } -zig_extern zig_i128 __addoti4(zig_i128 lhs, zig_i128 rhs, int *overflow); static inline bool zig_addo_i128(zig_i128 *res, zig_i128 lhs, zig_i128 rhs, uint8_t bits) { #if zig_has_builtin(add_overflow) zig_i128 full_res; bool overflow = __builtin_add_overflow(lhs, rhs, &full_res); #else - int overflow_int; - zig_i128 full_res = __addoti4(lhs, rhs, &overflow_int); - bool overflow = overflow_int != 0; + zig_i128 full_res = (zig_i128)((zig_u128)lhs + (zig_u128)rhs); + bool overflow = ((full_res ^ lhs) & (full_res ^ rhs)) < 0; #endif *res = zig_wrap_i128(full_res, bits); return overflow || full_res < zig_minInt_i(128, bits) || full_res > zig_maxInt_i(128, bits); @@ -1776,15 +1769,13 @@ static inline bool zig_subo_u128(zig_u128 *res, zig_u128 lhs, zig_u128 rhs, uint #endif } -zig_extern zig_i128 __suboti4(zig_i128 lhs, zig_i128 rhs, int *overflow); static inline bool zig_subo_i128(zig_i128 *res, zig_i128 lhs, zig_i128 rhs, uint8_t bits) { #if zig_has_builtin(sub_overflow) zig_i128 full_res; bool overflow = __builtin_sub_overflow(lhs, rhs, &full_res); #else - int overflow_int; - zig_i128 full_res = __suboti4(lhs, rhs, &overflow_int); - bool overflow = overflow_int != 0; + zig_i128 full_res = (zig_i128)((zig_u128)lhs - (zig_u128)rhs); + bool overflow = ((lhs ^ rhs) & (full_res ^ lhs)) < 0; #endif *res = zig_wrap_i128(full_res, bits); return overflow || full_res < zig_minInt_i(128, bits) || full_res > zig_maxInt_i(128, bits); @@ -4213,7 +4204,7 @@ static inline void zig_loongarch_cpucfg(uint32_t word, uint32_t* result) { #endif } -#elif defined(zig_x86) +#elif defined(zig_x86) && !defined(zig_x86_16) static inline void zig_x86_cpuid(uint32_t leaf_id, uint32_t subid, uint32_t* eax, uint32_t* ebx, uint32_t* ecx, uint32_t* edx) { #if defined(zig_msvc) From 532aa3c5758f110eb7cf0992eb394088ab563899 Mon Sep 17 00:00:00 2001 From: Matthew Lugg Date: Mon, 10 Nov 2025 12:12:37 +0000 Subject: [PATCH 8/8] cbe: work around some miscompilations The changes to `codegen.c` are blatant hacks, but the problem they work around isn't a regression: it's an existing miscompilation. This branch happened to *expose* that miscompilation in more cases by changing how an incorrect result is *used*. --- src/Type.zig | 2 +- src/codegen/c.zig | 53 ++++++++++++++++++++++++++++++++++++++++- test/behavior/union.zig | 2 +- 3 files changed, 54 insertions(+), 3 deletions(-) diff --git a/src/Type.zig b/src/Type.zig index d6e38420cb4e..b111650e3454 100644 --- a/src/Type.zig +++ b/src/Type.zig @@ -3556,7 +3556,7 @@ pub fn packedStructFieldPtrInfo( } else .{ switch (zcu.comp.getZigBackend()) { else => (running_bits + 7) / 8, - .stage2_x86_64 => @intCast(struct_ty.abiSize(zcu)), + .stage2_x86_64, .stage2_c => @intCast(struct_ty.abiSize(zcu)), }, bit_offset, }; diff --git a/src/codegen/c.zig b/src/codegen/c.zig index a19c4bb34681..e3b33beb146c 100644 --- a/src/codegen/c.zig +++ b/src/codegen/c.zig @@ -3801,6 +3801,24 @@ fn airAlloc(f: *Function, inst: Air.Inst.Index) !CValue { }); log.debug("%{d}: allocated unfreeable t{d}", .{ inst, local.new_local }); try f.allocs.put(zcu.gpa, local.new_local, true); + + switch (elem_ty.zigTypeTag(zcu)) { + .@"struct", .@"union" => switch (elem_ty.containerLayout(zcu)) { + .@"packed" => { + // For packed aggregates, we zero-initialize to try and work around a design flaw + // related to how `packed`, `undefined`, and RLS interact. See comment in `airStore` + // for details. + const w = &f.object.code.writer; + try w.print("memset(&t{d}, 0x00, sizeof(", .{local.new_local}); + try f.renderType(w, elem_ty); + try w.writeAll("));"); + try f.object.newline(); + }, + .auto, .@"extern" => {}, + }, + else => {}, + } + return .{ .local_ref = local.new_local }; } @@ -3820,6 +3838,24 @@ fn airRetPtr(f: *Function, inst: Air.Inst.Index) !CValue { }); log.debug("%{d}: allocated unfreeable t{d}", .{ inst, local.new_local }); try f.allocs.put(zcu.gpa, local.new_local, true); + + switch (elem_ty.zigTypeTag(zcu)) { + .@"struct", .@"union" => switch (elem_ty.containerLayout(zcu)) { + .@"packed" => { + // For packed aggregates, we zero-initialize to try and work around a design flaw + // related to how `packed`, `undefined`, and RLS interact. See comment in `airStore` + // for details. + const w = &f.object.code.writer; + try w.print("memset(&t{d}, 0x00, sizeof(", .{local.new_local}); + try f.renderType(w, elem_ty); + try w.writeAll("));"); + try f.object.newline(); + }, + .auto, .@"extern" => {}, + }, + else => {}, + } + return .{ .local_ref = local.new_local }; } @@ -4098,9 +4134,24 @@ fn airStore(f: *Function, inst: Air.Inst.Index, safety: bool) !CValue { if (val_is_undef) { try reap(f, inst, &.{ bin_op.lhs, bin_op.rhs }); if (safety and ptr_info.packed_offset.host_size == 0) { + // If the thing we're initializing is a packed struct/union, we set to 0 instead of + // 0xAA. This is a hack to work around a problem with partially-undefined packed + // aggregates. If we used 0xAA here, then a later initialization through RLS would + // not zero the high padding bits (for a packed type which is not 8/16/32/64/etc bits), + // so we would get a miscompilation. Using 0x00 here avoids this bug in some cases. It + // is *not* a correct fix; for instance it misses any case where packed structs are + // nested in other aggregates. A proper fix for this will involve changing the language, + // such as to remove RLS. This just prevents miscompilations in *some* common cases. + const byte_str: []const u8 = switch (src_ty.zigTypeTag(zcu)) { + else => "0xaa", + .@"struct", .@"union" => switch (src_ty.containerLayout(zcu)) { + .auto, .@"extern" => "0xaa", + .@"packed" => "0x00", + }, + }; try w.writeAll("memset("); try f.writeCValue(w, ptr_val, .FunctionArgument); - try w.writeAll(", 0xaa, sizeof("); + try w.print(", {s}, sizeof(", .{byte_str}); try f.renderType(w, .fromInterned(ptr_info.child)); try w.writeAll("));"); try f.object.newline(); diff --git a/test/behavior/union.zig b/test/behavior/union.zig index 11356c09b76c..115c43fbd803 100644 --- a/test/behavior/union.zig +++ b/test/behavior/union.zig @@ -1547,7 +1547,7 @@ test "packed union field pointer has correct alignment" { const host_size = switch (builtin.zig_backend) { else => comptime std.math.divCeil(comptime_int, @bitSizeOf(S), 8) catch unreachable, - .stage2_x86_64 => @sizeOf(S), + .stage2_x86_64, .stage2_c => @sizeOf(S), }; comptime assert(@TypeOf(ap) == *align(4:2:host_size) u20); comptime assert(@TypeOf(bp) == *align(1:2:host_size) u20);