diff --git a/CMakeLists.txt b/CMakeLists.txt index 6bab57bd1fc8..7090f8852768 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -211,10 +211,10 @@ set(ZIG_STAGE2_SOURCES lib/compiler_rt/absvti2.zig lib/compiler_rt/adddf3.zig lib/compiler_rt/addf3.zig - lib/compiler_rt/addo.zig lib/compiler_rt/addsf3.zig lib/compiler_rt/addtf3.zig lib/compiler_rt/addvsi3.zig + lib/compiler_rt/addvdi3.zig lib/compiler_rt/addxf3.zig lib/compiler_rt/arm.zig lib/compiler_rt/atomics.zig @@ -354,7 +354,6 @@ set(ZIG_STAGE2_SOURCES lib/compiler_rt/sqrt.zig lib/compiler_rt/stack_probe.zig lib/compiler_rt/subdf3.zig - lib/compiler_rt/subo.zig lib/compiler_rt/subsf3.zig lib/compiler_rt/subtf3.zig lib/compiler_rt/subvdi3.zig diff --git a/lib/compiler_rt.zig b/lib/compiler_rt.zig index aac81bf414f4..040d2c6c411f 100644 --- a/lib/compiler_rt.zig +++ b/lib/compiler_rt.zig @@ -28,12 +28,13 @@ comptime { _ = @import("compiler_rt/negv.zig"); _ = @import("compiler_rt/addvsi3.zig"); + _ = @import("compiler_rt/addvdi3.zig"); + _ = @import("compiler_rt/subvsi3.zig"); _ = @import("compiler_rt/subvdi3.zig"); + _ = @import("compiler_rt/mulvsi3.zig"); - _ = @import("compiler_rt/addo.zig"); - _ = @import("compiler_rt/subo.zig"); _ = @import("compiler_rt/mulo.zig"); // Float routines diff --git a/lib/compiler_rt/addo.zig b/lib/compiler_rt/addo.zig deleted file mode 100644 index 610d6206904b..000000000000 --- a/lib/compiler_rt/addo.zig +++ /dev/null @@ -1,46 +0,0 @@ -const std = @import("std"); -const common = @import("./common.zig"); -pub const panic = @import("common.zig").panic; - -comptime { - @export(&__addosi4, .{ .name = "__addosi4", .linkage = common.linkage, .visibility = common.visibility }); - @export(&__addodi4, .{ .name = "__addodi4", .linkage = common.linkage, .visibility = common.visibility }); - @export(&__addoti4, .{ .name = "__addoti4", .linkage = common.linkage, .visibility = common.visibility }); -} - -// addo - add overflow -// * return a+%b. -// * return if a+b overflows => 1 else => 0 -// - addoXi4_generic as default - -inline fn addoXi4_generic(comptime ST: type, a: ST, b: ST, overflow: *c_int) ST { - @setRuntimeSafety(common.test_safety); - overflow.* = 0; - const sum: ST = a +% b; - // Hackers Delight: section Overflow Detection, subsection Signed Add/Subtract - // Let sum = a +% b == a + b + carry == wraparound addition. - // Overflow in a+b+carry occurs, iff a and b have opposite signs - // and the sign of a+b+carry is the same as a (or equivalently b). - // Slower routine: res = ~(a ^ b) & ((sum ^ a) - // Faster routine: res = (sum ^ a) & (sum ^ b) - // Overflow occurred, iff (res < 0) - if (((sum ^ a) & (sum ^ b)) < 0) - overflow.* = 1; - return sum; -} - -pub fn __addosi4(a: i32, b: i32, overflow: *c_int) callconv(.c) i32 { - return addoXi4_generic(i32, a, b, overflow); -} -pub fn __addodi4(a: i64, b: i64, overflow: *c_int) callconv(.c) i64 { - return addoXi4_generic(i64, a, b, overflow); -} -pub fn __addoti4(a: i128, b: i128, overflow: *c_int) callconv(.c) i128 { - return addoXi4_generic(i128, a, b, overflow); -} - -test { - _ = @import("addosi4_test.zig"); - _ = @import("addodi4_test.zig"); - _ = @import("addoti4_test.zig"); -} diff --git a/lib/compiler_rt/addodi4_test.zig b/lib/compiler_rt/addodi4_test.zig deleted file mode 100644 index 92f8e9c1f26f..000000000000 --- a/lib/compiler_rt/addodi4_test.zig +++ /dev/null @@ -1,77 +0,0 @@ -const addv = @import("addo.zig"); -const std = @import("std"); -const testing = std.testing; -const math = std.math; - -fn test__addodi4(a: i64, b: i64) !void { - var result_ov: c_int = undefined; - var expected_ov: c_int = undefined; - const result = addv.__addodi4(a, b, &result_ov); - const expected: i64 = simple_addodi4(a, b, &expected_ov); - try testing.expectEqual(expected, result); - try testing.expectEqual(expected_ov, result_ov); -} - -fn simple_addodi4(a: i64, b: i64, overflow: *c_int) i64 { - overflow.* = 0; - const min: i64 = math.minInt(i64); - const max: i64 = math.maxInt(i64); - if (((a > 0) and (b > max - a)) or - ((a < 0) and (b < min - a))) - overflow.* = 1; - return a +% b; -} - -test "addodi4" { - const min: i64 = math.minInt(i64); - const max: i64 = math.maxInt(i64); - var i: i64 = 1; - while (i < max) : (i *|= 2) { - try test__addodi4(i, i); - try test__addodi4(-i, -i); - try test__addodi4(i, -i); - try test__addodi4(-i, i); - } - - // edge cases - // 0 + 0 = 0 - // MIN + MIN overflow - // MAX + MAX overflow - // 0 + MIN MIN - // 0 + MAX MAX - // MIN + 0 MIN - // MAX + 0 MAX - // MIN + MAX -1 - // MAX + MIN -1 - try test__addodi4(0, 0); - try test__addodi4(min, min); - try test__addodi4(max, max); - try test__addodi4(0, min); - try test__addodi4(0, max); - try test__addodi4(min, 0); - try test__addodi4(max, 0); - try test__addodi4(min, max); - try test__addodi4(max, min); - - // derived edge cases - // MIN+1 + MIN overflow - // MAX-1 + MAX overflow - // 1 + MIN = MIN+1 - // -1 + MIN overflow - // -1 + MAX = MAX-1 - // +1 + MAX overflow - // MIN + 1 = MIN+1 - // MIN + -1 overflow - // MAX + 1 overflow - // MAX + -1 = MAX-1 - try test__addodi4(min + 1, min); - try test__addodi4(max - 1, max); - try test__addodi4(1, min); - try test__addodi4(-1, min); - try test__addodi4(-1, max); - try test__addodi4(1, max); - try test__addodi4(min, 1); - try test__addodi4(min, -1); - try test__addodi4(max, -1); - try test__addodi4(max, 1); -} diff --git a/lib/compiler_rt/addosi4_test.zig b/lib/compiler_rt/addosi4_test.zig deleted file mode 100644 index 3494909f50a6..000000000000 --- a/lib/compiler_rt/addosi4_test.zig +++ /dev/null @@ -1,78 +0,0 @@ -const addv = @import("addo.zig"); -const testing = @import("std").testing; - -fn test__addosi4(a: i32, b: i32) !void { - var result_ov: c_int = undefined; - var expected_ov: c_int = undefined; - const result = addv.__addosi4(a, b, &result_ov); - const expected: i32 = simple_addosi4(a, b, &expected_ov); - try testing.expectEqual(expected, result); - try testing.expectEqual(expected_ov, result_ov); -} - -fn simple_addosi4(a: i32, b: i32, overflow: *c_int) i32 { - overflow.* = 0; - const min: i32 = -2147483648; - const max: i32 = 2147483647; - if (((a > 0) and (b > max - a)) or - ((a < 0) and (b < min - a))) - overflow.* = 1; - return a +% b; -} - -test "addosi4" { - // -2^31 <= i32 <= 2^31-1 - // 2^31 = 2147483648 - // 2^31-1 = 2147483647 - const min: i32 = -2147483648; - const max: i32 = 2147483647; - var i: i32 = 1; - while (i < max) : (i *|= 2) { - try test__addosi4(i, i); - try test__addosi4(-i, -i); - try test__addosi4(i, -i); - try test__addosi4(-i, i); - } - - // edge cases - // 0 + 0 = 0 - // MIN + MIN overflow - // MAX + MAX overflow - // 0 + MIN MIN - // 0 + MAX MAX - // MIN + 0 MIN - // MAX + 0 MAX - // MIN + MAX -1 - // MAX + MIN -1 - try test__addosi4(0, 0); - try test__addosi4(min, min); - try test__addosi4(max, max); - try test__addosi4(0, min); - try test__addosi4(0, max); - try test__addosi4(min, 0); - try test__addosi4(max, 0); - try test__addosi4(min, max); - try test__addosi4(max, min); - - // derived edge cases - // MIN+1 + MIN overflow - // MAX-1 + MAX overflow - // 1 + MIN = MIN+1 - // -1 + MIN overflow - // -1 + MAX = MAX-1 - // +1 + MAX overflow - // MIN + 1 = MIN+1 - // MIN + -1 overflow - // MAX + 1 overflow - // MAX + -1 = MAX-1 - try test__addosi4(min + 1, min); - try test__addosi4(max - 1, max); - try test__addosi4(1, min); - try test__addosi4(-1, min); - try test__addosi4(-1, max); - try test__addosi4(1, max); - try test__addosi4(min, 1); - try test__addosi4(min, -1); - try test__addosi4(max, -1); - try test__addosi4(max, 1); -} diff --git a/lib/compiler_rt/addoti4_test.zig b/lib/compiler_rt/addoti4_test.zig deleted file mode 100644 index d031d1d428a2..000000000000 --- a/lib/compiler_rt/addoti4_test.zig +++ /dev/null @@ -1,80 +0,0 @@ -const addv = @import("addo.zig"); -const builtin = @import("builtin"); -const std = @import("std"); -const testing = std.testing; -const math = std.math; - -fn test__addoti4(a: i128, b: i128) !void { - var result_ov: c_int = undefined; - var expected_ov: c_int = undefined; - const result = addv.__addoti4(a, b, &result_ov); - const expected: i128 = simple_addoti4(a, b, &expected_ov); - try testing.expectEqual(expected, result); - try testing.expectEqual(expected_ov, result_ov); -} - -fn simple_addoti4(a: i128, b: i128, overflow: *c_int) i128 { - overflow.* = 0; - const min: i128 = math.minInt(i128); - const max: i128 = math.maxInt(i128); - if (((a > 0) and (b > max - a)) or - ((a < 0) and (b < min - a))) - overflow.* = 1; - return a +% b; -} - -test "addoti4" { - if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; - - const min: i128 = math.minInt(i128); - const max: i128 = math.maxInt(i128); - var i: i128 = 1; - while (i < max) : (i *|= 2) { - try test__addoti4(i, i); - try test__addoti4(-i, -i); - try test__addoti4(i, -i); - try test__addoti4(-i, i); - } - - // edge cases - // 0 + 0 = 0 - // MIN + MIN overflow - // MAX + MAX overflow - // 0 + MIN MIN - // 0 + MAX MAX - // MIN + 0 MIN - // MAX + 0 MAX - // MIN + MAX -1 - // MAX + MIN -1 - try test__addoti4(0, 0); - try test__addoti4(min, min); - try test__addoti4(max, max); - try test__addoti4(0, min); - try test__addoti4(0, max); - try test__addoti4(min, 0); - try test__addoti4(max, 0); - try test__addoti4(min, max); - try test__addoti4(max, min); - - // derived edge cases - // MIN+1 + MIN overflow - // MAX-1 + MAX overflow - // 1 + MIN = MIN+1 - // -1 + MIN overflow - // -1 + MAX = MAX-1 - // +1 + MAX overflow - // MIN + 1 = MIN+1 - // MIN + -1 overflow - // MAX + 1 overflow - // MAX + -1 = MAX-1 - try test__addoti4(min + 1, min); - try test__addoti4(max - 1, max); - try test__addoti4(1, min); - try test__addoti4(-1, min); - try test__addoti4(-1, max); - try test__addoti4(1, max); - try test__addoti4(min, 1); - try test__addoti4(min, -1); - try test__addoti4(max, -1); - try test__addoti4(max, 1); -} diff --git a/lib/compiler_rt/addvdi3.zig b/lib/compiler_rt/addvdi3.zig new file mode 100644 index 000000000000..03aa9b91c717 --- /dev/null +++ b/lib/compiler_rt/addvdi3.zig @@ -0,0 +1,26 @@ +const common = @import("./common.zig"); +const testing = @import("std").testing; + +pub const panic = common.panic; + +comptime { + @export(&__addvdi3, .{ .name = "__addvdi3", .linkage = common.linkage, .visibility = common.visibility }); +} + +pub fn __addvdi3(a: i64, b: i64) callconv(.c) i64 { + const sum = a +% b; + // Overflow occurred iff both operands have the same sign, and the sign of the sum does + // not match it. In other words, iff the sum sign is not the sign of either operand. + if (((sum ^ a) & (sum ^ b)) < 0) @panic("compiler-rt: integer overflow"); + return sum; +} + +test "addvdi3" { + // const min: i64 = -9223372036854775808 + // const max: i64 = 9223372036854775807 + // TODO write panic handler for testing panics + // try test__addvdi3(-9223372036854775808, -1, -1); // panic + // try test__addvdi3(9223372036854775807, 1, 1); // panic + try testing.expectEqual(-9223372036854775808, __addvdi3(-9223372036854775807, -1)); + try testing.expectEqual(9223372036854775807, __addvdi3(9223372036854775806, 1)); +} diff --git a/lib/compiler_rt/addvsi3.zig b/lib/compiler_rt/addvsi3.zig index 04c19881bc01..e688fdba5844 100644 --- a/lib/compiler_rt/addvsi3.zig +++ b/lib/compiler_rt/addvsi3.zig @@ -1,4 +1,3 @@ -const addv = @import("addo.zig"); const common = @import("./common.zig"); const testing = @import("std").testing; @@ -9,9 +8,10 @@ comptime { } pub fn __addvsi3(a: i32, b: i32) callconv(.c) i32 { - var overflow: c_int = 0; - const sum = addv.__addosi4(a, b, &overflow); - if (overflow != 0) @panic("compiler-rt: integer overflow"); + const sum = a +% b; + // Overflow occurred iff both operands have the same sign, and the sign of the sum does + // not match it. In other words, iff the sum sign is not the sign of either operand. + if (((sum ^ a) & (sum ^ b)) < 0) @panic("compiler-rt: integer overflow"); return sum; } diff --git a/lib/compiler_rt/subo.zig b/lib/compiler_rt/subo.zig deleted file mode 100644 index b4fb8f77106a..000000000000 --- a/lib/compiler_rt/subo.zig +++ /dev/null @@ -1,47 +0,0 @@ -//! subo - subtract overflow -//! * return a-%b. -//! * return if a-b overflows => 1 else => 0 -//! - suboXi4_generic as default - -const std = @import("std"); -const builtin = @import("builtin"); -const common = @import("common.zig"); - -pub const panic = common.panic; - -comptime { - @export(&__subosi4, .{ .name = "__subosi4", .linkage = common.linkage, .visibility = common.visibility }); - @export(&__subodi4, .{ .name = "__subodi4", .linkage = common.linkage, .visibility = common.visibility }); - @export(&__suboti4, .{ .name = "__suboti4", .linkage = common.linkage, .visibility = common.visibility }); -} - -pub fn __subosi4(a: i32, b: i32, overflow: *c_int) callconv(.c) i32 { - return suboXi4_generic(i32, a, b, overflow); -} -pub fn __subodi4(a: i64, b: i64, overflow: *c_int) callconv(.c) i64 { - return suboXi4_generic(i64, a, b, overflow); -} -pub fn __suboti4(a: i128, b: i128, overflow: *c_int) callconv(.c) i128 { - return suboXi4_generic(i128, a, b, overflow); -} - -inline fn suboXi4_generic(comptime ST: type, a: ST, b: ST, overflow: *c_int) ST { - overflow.* = 0; - const sum: ST = a -% b; - // Hackers Delight: section Overflow Detection, subsection Signed Add/Subtract - // Let sum = a -% b == a - b - carry == wraparound subtraction. - // Overflow in a-b-carry occurs, iff a and b have opposite signs - // and the sign of a-b-carry is opposite of a (or equivalently same as b). - // Faster routine: res = (a ^ b) & (sum ^ a) - // Slower routine: res = (sum^a) & ~(sum^b) - // Overflow occurred, iff (res < 0) - if (((a ^ b) & (sum ^ a)) < 0) - overflow.* = 1; - return sum; -} - -test { - _ = @import("subosi4_test.zig"); - _ = @import("subodi4_test.zig"); - _ = @import("suboti4_test.zig"); -} diff --git a/lib/compiler_rt/subodi4_test.zig b/lib/compiler_rt/subodi4_test.zig deleted file mode 100644 index 2dd717e14b4b..000000000000 --- a/lib/compiler_rt/subodi4_test.zig +++ /dev/null @@ -1,81 +0,0 @@ -const subo = @import("subo.zig"); -const std = @import("std"); -const testing = std.testing; -const math = std.math; - -fn test__subodi4(a: i64, b: i64) !void { - var result_ov: c_int = undefined; - var expected_ov: c_int = undefined; - const result = subo.__subodi4(a, b, &result_ov); - const expected: i64 = simple_subodi4(a, b, &expected_ov); - try testing.expectEqual(expected, result); - try testing.expectEqual(expected_ov, result_ov); -} - -// 2 cases on evaluating `a-b`: -// 1. `a-b` may underflow, iff b>0 && a<0 and a-b < min <=> a0 and a-b > max <=> a>max+b -// `-b` evaluation may overflow, iff b==min, but this is handled by the hardware -pub fn simple_subodi4(a: i64, b: i64, overflow: *c_int) i64 { - overflow.* = 0; - const min: i64 = math.minInt(i64); - const max: i64 = math.maxInt(i64); - if (((b > 0) and (a < min + b)) or - ((b < 0) and (a > max + b))) - overflow.* = 1; - return a -% b; -} - -test "subodi3" { - const min: i64 = math.minInt(i64); - const max: i64 = math.maxInt(i64); - var i: i64 = 1; - while (i < max) : (i *|= 2) { - try test__subodi4(i, i); - try test__subodi4(-i, -i); - try test__subodi4(i, -i); - try test__subodi4(-i, i); - } - - // edge cases - // 0 - 0 = 0 - // MIN - MIN = 0 - // MAX - MAX = 0 - // 0 - MIN overflow - // 0 - MAX = MIN+1 - // MIN - 0 = MIN - // MAX - 0 = MAX - // MIN - MAX overflow - // MAX - MIN overflow - try test__subodi4(0, 0); - try test__subodi4(min, min); - try test__subodi4(max, max); - try test__subodi4(0, min); - try test__subodi4(0, max); - try test__subodi4(min, 0); - try test__subodi4(max, 0); - try test__subodi4(min, max); - try test__subodi4(max, min); - - // derived edge cases - // MIN+1 - MIN = 1 - // MAX-1 - MAX = -1 - // 1 - MIN overflow - // -1 - MIN = MAX - // -1 - MAX = MIN - // +1 - MAX = MIN+2 - // MIN - 1 overflow - // MIN - -1 = MIN+1 - // MAX - 1 = MAX-1 - // MAX - -1 overflow - try test__subodi4(min + 1, min); - try test__subodi4(max - 1, max); - try test__subodi4(1, min); - try test__subodi4(-1, min); - try test__subodi4(-1, max); - try test__subodi4(1, max); - try test__subodi4(min, 1); - try test__subodi4(min, -1); - try test__subodi4(max, -1); - try test__subodi4(max, 1); -} diff --git a/lib/compiler_rt/subosi4_test.zig b/lib/compiler_rt/subosi4_test.zig deleted file mode 100644 index 8644e8100ef6..000000000000 --- a/lib/compiler_rt/subosi4_test.zig +++ /dev/null @@ -1,82 +0,0 @@ -const subo = @import("subo.zig"); -const testing = @import("std").testing; - -fn test__subosi4(a: i32, b: i32) !void { - var result_ov: c_int = undefined; - var expected_ov: c_int = undefined; - const result = subo.__subosi4(a, b, &result_ov); - const expected: i32 = simple_subosi4(a, b, &expected_ov); - try testing.expectEqual(expected, result); - try testing.expectEqual(expected_ov, result_ov); -} - -// 2 cases on evaluating `a-b`: -// 1. `a-b` may underflow, iff b>0 && a<0 and a-b < min <=> a0 and a-b > max <=> a>max+b -// `-b` evaluation may overflow, iff b==min, but this is handled by the hardware -pub fn simple_subosi4(a: i32, b: i32, overflow: *c_int) i32 { - overflow.* = 0; - const min: i32 = -2147483648; - const max: i32 = 2147483647; - if (((b > 0) and (a < min + b)) or - ((b < 0) and (a > max + b))) - overflow.* = 1; - return a -% b; -} - -test "subosi3" { - // -2^31 <= i32 <= 2^31-1 - // 2^31 = 2147483648 - // 2^31-1 = 2147483647 - const min: i32 = -2147483648; - const max: i32 = 2147483647; - var i: i32 = 1; - while (i < max) : (i *|= 2) { - try test__subosi4(i, i); - try test__subosi4(-i, -i); - try test__subosi4(i, -i); - try test__subosi4(-i, i); - } - - // edge cases - // 0 - 0 = 0 - // MIN - MIN = 0 - // MAX - MAX = 0 - // 0 - MIN overflow - // 0 - MAX = MIN+1 - // MIN - 0 = MIN - // MAX - 0 = MAX - // MIN - MAX overflow - // MAX - MIN overflow - try test__subosi4(0, 0); - try test__subosi4(min, min); - try test__subosi4(max, max); - try test__subosi4(0, min); - try test__subosi4(0, max); - try test__subosi4(min, 0); - try test__subosi4(max, 0); - try test__subosi4(min, max); - try test__subosi4(max, min); - - // derived edge cases - // MIN+1 - MIN = 1 - // MAX-1 - MAX = -1 - // 1 - MIN overflow - // -1 - MIN = MAX - // -1 - MAX = MIN - // +1 - MAX = MIN+2 - // MIN - 1 overflow - // MIN - -1 = MIN+1 - // MAX - 1 = MAX-1 - // MAX - -1 overflow - try test__subosi4(min + 1, min); - try test__subosi4(max - 1, max); - try test__subosi4(1, min); - try test__subosi4(-1, min); - try test__subosi4(-1, max); - try test__subosi4(1, max); - try test__subosi4(min, 1); - try test__subosi4(min, -1); - try test__subosi4(max, -1); - try test__subosi4(max, 1); -} diff --git a/lib/compiler_rt/suboti4_test.zig b/lib/compiler_rt/suboti4_test.zig deleted file mode 100644 index 65018bc966e5..000000000000 --- a/lib/compiler_rt/suboti4_test.zig +++ /dev/null @@ -1,84 +0,0 @@ -const subo = @import("subo.zig"); -const builtin = @import("builtin"); -const std = @import("std"); -const testing = std.testing; -const math = std.math; - -fn test__suboti4(a: i128, b: i128) !void { - var result_ov: c_int = undefined; - var expected_ov: c_int = undefined; - const result = subo.__suboti4(a, b, &result_ov); - const expected: i128 = simple_suboti4(a, b, &expected_ov); - try testing.expectEqual(expected, result); - try testing.expectEqual(expected_ov, result_ov); -} - -// 2 cases on evaluating `a-b`: -// 1. `a-b` may underflow, iff b>0 && a<0 and a-b < min <=> a0 and a-b > max <=> a>max+b -// `-b` evaluation may overflow, iff b==min, but this is handled by the hardware -pub fn simple_suboti4(a: i128, b: i128, overflow: *c_int) i128 { - overflow.* = 0; - const min: i128 = math.minInt(i128); - const max: i128 = math.maxInt(i128); - if (((b > 0) and (a < min + b)) or - ((b < 0) and (a > max + b))) - overflow.* = 1; - return a -% b; -} - -test "suboti3" { - if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; - - const min: i128 = math.minInt(i128); - const max: i128 = math.maxInt(i128); - var i: i128 = 1; - while (i < max) : (i *|= 2) { - try test__suboti4(i, i); - try test__suboti4(-i, -i); - try test__suboti4(i, -i); - try test__suboti4(-i, i); - } - - // edge cases - // 0 - 0 = 0 - // MIN - MIN = 0 - // MAX - MAX = 0 - // 0 - MIN overflow - // 0 - MAX = MIN+1 - // MIN - 0 = MIN - // MAX - 0 = MAX - // MIN - MAX overflow - // MAX - MIN overflow - try test__suboti4(0, 0); - try test__suboti4(min, min); - try test__suboti4(max, max); - try test__suboti4(0, min); - try test__suboti4(0, max); - try test__suboti4(min, 0); - try test__suboti4(max, 0); - try test__suboti4(min, max); - try test__suboti4(max, min); - - // derived edge cases - // MIN+1 - MIN = 1 - // MAX-1 - MAX = -1 - // 1 - MIN overflow - // -1 - MIN = MAX - // -1 - MAX = MIN - // +1 - MAX = MIN+2 - // MIN - 1 overflow - // MIN - -1 = MIN+1 - // MAX - 1 = MAX-1 - // MAX - -1 overflow - try test__suboti4(min + 1, min); - try test__suboti4(max - 1, max); - try test__suboti4(1, min); - try test__suboti4(-1, min); - try test__suboti4(-1, max); - try test__suboti4(1, max); - try test__suboti4(min, 1); - try test__suboti4(min, -1); - try test__suboti4(max, -1); - try test__suboti4(max, 1); -} diff --git a/lib/compiler_rt/subvdi3.zig b/lib/compiler_rt/subvdi3.zig index 8248e930222d..a34deb2da1bf 100644 --- a/lib/compiler_rt/subvdi3.zig +++ b/lib/compiler_rt/subvdi3.zig @@ -1,4 +1,3 @@ -const subv = @import("subo.zig"); const common = @import("./common.zig"); const testing = @import("std").testing; @@ -9,9 +8,10 @@ comptime { } pub fn __subvdi3(a: i64, b: i64) callconv(.c) i64 { - var overflow: c_int = 0; - const sum = subv.__subodi4(a, b, &overflow); - if (overflow != 0) @panic("compiler-rt: integer overflow"); + const sum = a -% b; + // Overflow occurred iff the operands have opposite signs, and the sign of the + // sum is the opposite of the lhs sign. + if (((a ^ b) & (sum ^ a)) < 0) @panic("compiler-rt: integer overflow"); return sum; } diff --git a/lib/compiler_rt/subvsi3.zig b/lib/compiler_rt/subvsi3.zig index 8a2ea6c6a612..c524a3a63499 100644 --- a/lib/compiler_rt/subvsi3.zig +++ b/lib/compiler_rt/subvsi3.zig @@ -1,4 +1,3 @@ -const subv = @import("subo.zig"); const common = @import("./common.zig"); const testing = @import("std").testing; @@ -9,9 +8,10 @@ comptime { } pub fn __subvsi3(a: i32, b: i32) callconv(.c) i32 { - var overflow: c_int = 0; - const sum = subv.__subosi4(a, b, &overflow); - if (overflow != 0) @panic("compiler-rt: integer overflow"); + const sum = a -% b; + // Overflow occurred iff the operands have opposite signs, and the sign of the + // sum is the opposite of the lhs sign. + if (((a ^ b) & (sum ^ a)) < 0) @panic("compiler-rt: integer overflow"); return sum; } diff --git a/lib/zig.h b/lib/zig.h index 7fc86df5c41a..7a7a22b17ba1 100644 --- a/lib/zig.h +++ b/lib/zig.h @@ -809,15 +809,13 @@ static inline bool zig_addo_u32(uint32_t *res, uint32_t lhs, uint32_t rhs, uint8 #endif } -zig_extern int32_t __addosi4(int32_t lhs, int32_t rhs, int *overflow); static inline bool zig_addo_i32(int32_t *res, int32_t lhs, int32_t rhs, uint8_t bits) { #if zig_has_builtin(add_overflow) || defined(zig_gcc) int32_t full_res; bool overflow = __builtin_add_overflow(lhs, rhs, &full_res); #else - int overflow_int; - int32_t full_res = __addosi4(lhs, rhs, &overflow_int); - bool overflow = overflow_int != 0; + int32_t full_res = (int32_t)((uint32_t)lhs + (uint32_t)rhs); + bool overflow = ((full_res ^ lhs) & (full_res ^ rhs)) < 0; #endif *res = zig_wrap_i32(full_res, bits); return overflow || full_res < zig_minInt_i(32, bits) || full_res > zig_maxInt_i(32, bits); @@ -835,15 +833,13 @@ static inline bool zig_addo_u64(uint64_t *res, uint64_t lhs, uint64_t rhs, uint8 #endif } -zig_extern int64_t __addodi4(int64_t lhs, int64_t rhs, int *overflow); static inline bool zig_addo_i64(int64_t *res, int64_t lhs, int64_t rhs, uint8_t bits) { #if zig_has_builtin(add_overflow) || defined(zig_gcc) int64_t full_res; bool overflow = __builtin_add_overflow(lhs, rhs, &full_res); #else - int overflow_int; - int64_t full_res = __addodi4(lhs, rhs, &overflow_int); - bool overflow = overflow_int != 0; + int64_t full_res = (int64_t)((uint64_t)lhs + (uint64_t)rhs); + bool overflow = ((full_res ^ lhs) & (full_res ^ rhs)) < 0; #endif *res = zig_wrap_i64(full_res, bits); return overflow || full_res < zig_minInt_i(64, bits) || full_res > zig_maxInt_i(64, bits); @@ -917,15 +913,13 @@ static inline bool zig_subo_u32(uint32_t *res, uint32_t lhs, uint32_t rhs, uint8 #endif } -zig_extern int32_t __subosi4(int32_t lhs, int32_t rhs, int *overflow); static inline bool zig_subo_i32(int32_t *res, int32_t lhs, int32_t rhs, uint8_t bits) { #if zig_has_builtin(sub_overflow) || defined(zig_gcc) int32_t full_res; bool overflow = __builtin_sub_overflow(lhs, rhs, &full_res); #else - int overflow_int; - int32_t full_res = __subosi4(lhs, rhs, &overflow_int); - bool overflow = overflow_int != 0; + int32_t full_res = (int32_t)((uint32_t)lhs - (uint32_t)rhs); + bool overflow = ((lhs ^ rhs) & (full_res ^ lhs)) < 0; #endif *res = zig_wrap_i32(full_res, bits); return overflow || full_res < zig_minInt_i(32, bits) || full_res > zig_maxInt_i(32, bits); @@ -943,15 +937,13 @@ static inline bool zig_subo_u64(uint64_t *res, uint64_t lhs, uint64_t rhs, uint8 #endif } -zig_extern int64_t __subodi4(int64_t lhs, int64_t rhs, int *overflow); static inline bool zig_subo_i64(int64_t *res, int64_t lhs, int64_t rhs, uint8_t bits) { #if zig_has_builtin(sub_overflow) || defined(zig_gcc) int64_t full_res; bool overflow = __builtin_sub_overflow(lhs, rhs, &full_res); #else - int overflow_int; - int64_t full_res = __subodi4(lhs, rhs, &overflow_int); - bool overflow = overflow_int != 0; + int64_t full_res = (int64_t)((uint64_t)lhs - (uint64_t)rhs); + bool overflow = ((lhs ^ rhs) & (full_res ^ lhs)) < 0; #endif *res = zig_wrap_i64(full_res, bits); return overflow || full_res < zig_minInt_i(64, bits) || full_res > zig_maxInt_i(64, bits); @@ -1755,15 +1747,13 @@ static inline bool zig_addo_u128(zig_u128 *res, zig_u128 lhs, zig_u128 rhs, uint #endif } -zig_extern zig_i128 __addoti4(zig_i128 lhs, zig_i128 rhs, int *overflow); static inline bool zig_addo_i128(zig_i128 *res, zig_i128 lhs, zig_i128 rhs, uint8_t bits) { #if zig_has_builtin(add_overflow) zig_i128 full_res; bool overflow = __builtin_add_overflow(lhs, rhs, &full_res); #else - int overflow_int; - zig_i128 full_res = __addoti4(lhs, rhs, &overflow_int); - bool overflow = overflow_int != 0; + zig_i128 full_res = (zig_i128)((zig_u128)lhs + (zig_u128)rhs); + bool overflow = ((full_res ^ lhs) & (full_res ^ rhs)) < 0; #endif *res = zig_wrap_i128(full_res, bits); return overflow || full_res < zig_minInt_i(128, bits) || full_res > zig_maxInt_i(128, bits); @@ -1781,15 +1771,13 @@ static inline bool zig_subo_u128(zig_u128 *res, zig_u128 lhs, zig_u128 rhs, uint #endif } -zig_extern zig_i128 __suboti4(zig_i128 lhs, zig_i128 rhs, int *overflow); static inline bool zig_subo_i128(zig_i128 *res, zig_i128 lhs, zig_i128 rhs, uint8_t bits) { #if zig_has_builtin(sub_overflow) zig_i128 full_res; bool overflow = __builtin_sub_overflow(lhs, rhs, &full_res); #else - int overflow_int; - zig_i128 full_res = __suboti4(lhs, rhs, &overflow_int); - bool overflow = overflow_int != 0; + zig_i128 full_res = (zig_i128)((zig_u128)lhs - (zig_u128)rhs); + bool overflow = ((lhs ^ rhs) & (full_res ^ lhs)) < 0; #endif *res = zig_wrap_i128(full_res, bits); return overflow || full_res < zig_minInt_i(128, bits) || full_res > zig_maxInt_i(128, bits); diff --git a/src/Air.zig b/src/Air.zig index 1b394ca1c161..722ea28305ac 100644 --- a/src/Air.zig +++ b/src/Air.zig @@ -660,8 +660,8 @@ pub const Inst = struct { /// Given a pointer to a slice, return a pointer to the pointer of the slice. /// Uses the `ty_op` field. ptr_slice_ptr_ptr, - /// Given an (array value or vector value) and element index, - /// return the element value at that index. + /// Given an (array value or vector value) and element index, return the element value at + /// that index. If the lhs is a vector value, the index is guaranteed to be comptime-known. /// Result type is the element type of the array operand. /// Uses the `bin_op` field. array_elem_val, @@ -874,10 +874,6 @@ pub const Inst = struct { /// Uses the `ty_pl` field. save_err_return_trace_index, - /// Store an element to a vector pointer at an index. - /// Uses the `vector_store_elem` field. - vector_store_elem, - /// Compute a pointer to a `Nav` at runtime, always one of: /// /// * `threadlocal var` @@ -919,6 +915,26 @@ pub const Inst = struct { /// Operand is unused and set to Ref.none work_group_id, + // The remaining instructions are not emitted by Sema. They are only emitted by `Legalize`, + // depending on the enabled features. As such, backends can consider them `unreachable` if + // they do not enable the relevant legalizations. + + /// Given a pointer to a vector, a runtime-known index, and a scalar value, store the value + /// into the vector at the given index. Zig does not support this operation, but `Legalize` + /// may emit it when scalarizing vector operations. + /// + /// Uses the `pl_op` field with payload `Bin`. `operand` is the vector pointer. `lhs` is the + /// element index of type `usize`. `rhs` is the element value. Result is always void. + legalize_vec_store_elem, + /// Given a vector value and a runtime-known index, return the element value at that index. + /// This instruction is similar to `array_elem_val`; the only difference is that the index + /// here is runtime-known, which is usually not allowed for vectors. `Legalize` may emit + /// this instruction when scalarizing vector operations. + /// + /// Uses the `bin_op` field. `lhs` is the vector pointer. `rhs` is the element index. Result + /// type is the vector element type. + legalize_vec_elem_val, + pub fn fromCmpOp(op: std.math.CompareOperator, optimized: bool) Tag { switch (op) { .lt => return if (optimized) .cmp_lt_optimized else .cmp_lt, @@ -1220,11 +1236,6 @@ pub const Inst = struct { operand: Ref, operation: std.builtin.ReduceOp, }, - vector_store_elem: struct { - vector_ptr: Ref, - // Index into a different array. - payload: u32, - }, ty_nav: struct { ty: InternPool.Index, nav: InternPool.Nav.Index, @@ -1689,8 +1700,8 @@ pub fn typeOfIndex(air: *const Air, inst: Air.Inst.Index, ip: *const InternPool) .set_union_tag, .prefetch, .set_err_return_trace, - .vector_store_elem, .c_va_end, + .legalize_vec_store_elem, => return .void, .slice_len, @@ -1709,7 +1720,7 @@ pub fn typeOfIndex(air: *const Air, inst: Air.Inst.Index, ip: *const InternPool) return .fromInterned(ip.funcTypeReturnType(callee_ty.toIntern())); }, - .slice_elem_val, .ptr_elem_val, .array_elem_val => { + .slice_elem_val, .ptr_elem_val, .array_elem_val, .legalize_vec_elem_val => { const ptr_ty = air.typeOf(datas[@intFromEnum(inst)].bin_op.lhs, ip); return ptr_ty.childTypeIp(ip); }, @@ -1857,7 +1868,6 @@ pub fn mustLower(air: Air, inst: Air.Inst.Index, ip: *const InternPool) bool { .prefetch, .wasm_memory_grow, .set_err_return_trace, - .vector_store_elem, .c_va_arg, .c_va_copy, .c_va_end, @@ -1868,6 +1878,7 @@ pub fn mustLower(air: Air, inst: Air.Inst.Index, ip: *const InternPool) bool { .intcast_safe, .int_from_float_safe, .int_from_float_optimized_safe, + .legalize_vec_store_elem, => true, .add, @@ -2013,6 +2024,7 @@ pub fn mustLower(air: Air, inst: Air.Inst.Index, ip: *const InternPool) bool { .work_item_id, .work_group_size, .work_group_id, + .legalize_vec_elem_val, => false, .is_non_null_ptr, .is_null_ptr, .is_non_err_ptr, .is_err_ptr => air.typeOf(data.un_op, ip).isVolatilePtrIp(ip), diff --git a/src/Air/Legalize.zig b/src/Air/Legalize.zig index 46c96b4472c3..1d935bd360df 100644 --- a/src/Air/Legalize.zig +++ b/src/Air/Legalize.zig @@ -14,7 +14,7 @@ features: if (switch (dev.env) { return comptime bootstrap_features.contains(feature); } /// `inline` to propagate comptime-known result. - fn hasAny(_: @This(), comptime features: []const Feature) bool { + inline fn hasAny(_: @This(), comptime features: []const Feature) bool { return comptime !bootstrap_features.intersectWith(.initMany(features)).eql(.initEmpty()); } } else struct { @@ -154,9 +154,9 @@ pub const Feature = enum { /// Currently assumes little endian and a specific integer layout where the lsb of every integer is the lsb of the /// first byte of memory until bit pointers know their backing type. expand_packed_store, - /// Replace `struct_field_val` of a packed field with a `store` and packed `load`. + /// Replace `struct_field_val` of a packed field with a `bitcast` to integer, `shr`, `trunc`, and `bitcast` to field type. expand_packed_struct_field_val, - /// Replace `aggregate_init` of a packed aggregate with a series a packed `store`s followed by a `load`. + /// Replace `aggregate_init` of a packed struct with a sequence of `shl_exact`, `bitcast`, `intcast`, and `bit_or`. expand_packed_aggregate_init, fn scalarize(tag: Air.Inst.Tag) Feature { @@ -320,28 +320,36 @@ fn legalizeBody(l: *Legalize, body_start: usize, body_len: usize) Error!void { .xor, => |air_tag| if (l.features.has(comptime .scalarize(air_tag))) { const bin_op = l.air_instructions.items(.data)[@intFromEnum(inst)].bin_op; - if (l.typeOf(bin_op.lhs).isVector(zcu)) continue :inst try l.scalarize(inst, .bin_op); + if (l.typeOf(bin_op.lhs).isVector(zcu)) { + continue :inst l.replaceInst(inst, .block, try l.scalarizeBlockPayload(inst, .bin_op)); + } }, .add_safe => if (l.features.has(.expand_add_safe)) { assert(!l.features.has(.scalarize_add_safe)); // it doesn't make sense to do both continue :inst l.replaceInst(inst, .block, try l.safeArithmeticBlockPayload(inst, .add_with_overflow)); } else if (l.features.has(.scalarize_add_safe)) { const bin_op = l.air_instructions.items(.data)[@intFromEnum(inst)].bin_op; - if (l.typeOf(bin_op.lhs).isVector(zcu)) continue :inst try l.scalarize(inst, .bin_op); + if (l.typeOf(bin_op.lhs).isVector(zcu)) { + continue :inst l.replaceInst(inst, .block, try l.scalarizeBlockPayload(inst, .bin_op)); + } }, .sub_safe => if (l.features.has(.expand_sub_safe)) { assert(!l.features.has(.scalarize_sub_safe)); // it doesn't make sense to do both continue :inst l.replaceInst(inst, .block, try l.safeArithmeticBlockPayload(inst, .sub_with_overflow)); } else if (l.features.has(.scalarize_sub_safe)) { const bin_op = l.air_instructions.items(.data)[@intFromEnum(inst)].bin_op; - if (l.typeOf(bin_op.lhs).isVector(zcu)) continue :inst try l.scalarize(inst, .bin_op); + if (l.typeOf(bin_op.lhs).isVector(zcu)) { + continue :inst l.replaceInst(inst, .block, try l.scalarizeBlockPayload(inst, .bin_op)); + } }, .mul_safe => if (l.features.has(.expand_mul_safe)) { assert(!l.features.has(.scalarize_mul_safe)); // it doesn't make sense to do both continue :inst l.replaceInst(inst, .block, try l.safeArithmeticBlockPayload(inst, .mul_with_overflow)); } else if (l.features.has(.scalarize_mul_safe)) { const bin_op = l.air_instructions.items(.data)[@intFromEnum(inst)].bin_op; - if (l.typeOf(bin_op.lhs).isVector(zcu)) continue :inst try l.scalarize(inst, .bin_op); + if (l.typeOf(bin_op.lhs).isVector(zcu)) { + continue :inst l.replaceInst(inst, .block, try l.scalarizeBlockPayload(inst, .bin_op)); + } }, .ptr_add, .ptr_sub => {}, inline .add_with_overflow, @@ -350,7 +358,9 @@ fn legalizeBody(l: *Legalize, body_start: usize, body_len: usize) Error!void { .shl_with_overflow, => |air_tag| if (l.features.has(comptime .scalarize(air_tag))) { const ty_pl = l.air_instructions.items(.data)[@intFromEnum(inst)].ty_pl; - if (ty_pl.ty.toType().fieldType(0, zcu).isVector(zcu)) continue :inst l.replaceInst(inst, .block, try l.scalarizeOverflowBlockPayload(inst)); + if (ty_pl.ty.toType().fieldType(0, zcu).isVector(zcu)) { + continue :inst l.replaceInst(inst, .block, try l.scalarizeOverflowBlockPayload(inst)); + } }, .alloc => {}, .inferred_alloc, .inferred_alloc_comptime => unreachable, @@ -387,7 +397,9 @@ fn legalizeBody(l: *Legalize, body_start: usize, body_len: usize) Error!void { } } } - if (l.features.has(comptime .scalarize(air_tag))) continue :inst try l.scalarize(inst, .bin_op); + if (l.features.has(comptime .scalarize(air_tag))) { + continue :inst l.replaceInst(inst, .block, try l.scalarizeBlockPayload(inst, .bin_op)); + } } }, inline .not, @@ -406,64 +418,41 @@ fn legalizeBody(l: *Legalize, body_start: usize, body_len: usize) Error!void { .float_from_int, => |air_tag| if (l.features.has(comptime .scalarize(air_tag))) { const ty_op = l.air_instructions.items(.data)[@intFromEnum(inst)].ty_op; - if (ty_op.ty.toType().isVector(zcu)) continue :inst try l.scalarize(inst, .ty_op); + if (ty_op.ty.toType().isVector(zcu)) { + continue :inst l.replaceInst(inst, .block, try l.scalarizeBlockPayload(inst, .ty_op)); + } }, .bitcast => if (l.features.has(.scalarize_bitcast)) { - const ty_op = l.air_instructions.items(.data)[@intFromEnum(inst)].ty_op; - - const to_ty = ty_op.ty.toType(); - const to_ty_tag = to_ty.zigTypeTag(zcu); - const to_ty_legal = legal: switch (to_ty_tag) { - else => true, - .array, .vector => { - if (to_ty.arrayLen(zcu) == 1) break :legal true; - const to_elem_ty = to_ty.childType(zcu); - break :legal to_elem_ty.bitSize(zcu) == 8 * to_elem_ty.abiSize(zcu); - }, - }; - - const from_ty = l.typeOf(ty_op.operand); - const from_ty_legal = legal: switch (from_ty.zigTypeTag(zcu)) { - else => true, - .array, .vector => { - if (from_ty.arrayLen(zcu) == 1) break :legal true; - const from_elem_ty = from_ty.childType(zcu); - break :legal from_elem_ty.bitSize(zcu) == 8 * from_elem_ty.abiSize(zcu); - }, - }; - - if (!to_ty_legal and !from_ty_legal and to_ty.arrayLen(zcu) == from_ty.arrayLen(zcu)) switch (to_ty_tag) { - else => unreachable, - .array => continue :inst l.replaceInst(inst, .block, try l.scalarizeBitcastToArrayBlockPayload(inst)), - .vector => continue :inst try l.scalarize(inst, .bitcast), - }; - if (!to_ty_legal) switch (to_ty_tag) { - else => unreachable, - .array => continue :inst l.replaceInst(inst, .block, try l.scalarizeBitcastResultArrayBlockPayload(inst)), - .vector => continue :inst l.replaceInst(inst, .block, try l.scalarizeBitcastResultVectorBlockPayload(inst)), - }; - if (!from_ty_legal) continue :inst l.replaceInst(inst, .block, try l.scalarizeBitcastOperandBlockPayload(inst)); + if (try l.scalarizeBitcastBlockPayload(inst)) |payload| { + continue :inst l.replaceInst(inst, .block, payload); + } }, .intcast_safe => if (l.features.has(.expand_intcast_safe)) { assert(!l.features.has(.scalarize_intcast_safe)); // it doesn't make sense to do both continue :inst l.replaceInst(inst, .block, try l.safeIntcastBlockPayload(inst)); } else if (l.features.has(.scalarize_intcast_safe)) { const ty_op = l.air_instructions.items(.data)[@intFromEnum(inst)].ty_op; - if (ty_op.ty.toType().isVector(zcu)) continue :inst try l.scalarize(inst, .ty_op); + if (ty_op.ty.toType().isVector(zcu)) { + continue :inst l.replaceInst(inst, .block, try l.scalarizeBlockPayload(inst, .ty_op)); + } }, .int_from_float_safe => if (l.features.has(.expand_int_from_float_safe)) { assert(!l.features.has(.scalarize_int_from_float_safe)); continue :inst l.replaceInst(inst, .block, try l.safeIntFromFloatBlockPayload(inst, false)); } else if (l.features.has(.scalarize_int_from_float_safe)) { const ty_op = l.air_instructions.items(.data)[@intFromEnum(inst)].ty_op; - if (ty_op.ty.toType().isVector(zcu)) continue :inst try l.scalarize(inst, .ty_op); + if (ty_op.ty.toType().isVector(zcu)) { + continue :inst l.replaceInst(inst, .block, try l.scalarizeBlockPayload(inst, .ty_op)); + } }, .int_from_float_optimized_safe => if (l.features.has(.expand_int_from_float_optimized_safe)) { assert(!l.features.has(.scalarize_int_from_float_optimized_safe)); continue :inst l.replaceInst(inst, .block, try l.safeIntFromFloatBlockPayload(inst, true)); } else if (l.features.has(.scalarize_int_from_float_optimized_safe)) { const ty_op = l.air_instructions.items(.data)[@intFromEnum(inst)].ty_op; - if (ty_op.ty.toType().isVector(zcu)) continue :inst try l.scalarize(inst, .ty_op); + if (ty_op.ty.toType().isVector(zcu)) { + continue :inst l.replaceInst(inst, .block, try l.scalarizeBlockPayload(inst, .ty_op)); + } }, .block, .loop => { const ty_pl = l.air_instructions.items(.data)[@intFromEnum(inst)].ty_pl; @@ -498,7 +487,9 @@ fn legalizeBody(l: *Legalize, body_start: usize, body_len: usize) Error!void { .neg_optimized, => |air_tag| if (l.features.has(comptime .scalarize(air_tag))) { const un_op = l.air_instructions.items(.data)[@intFromEnum(inst)].un_op; - if (l.typeOf(un_op).isVector(zcu)) continue :inst try l.scalarize(inst, .un_op); + if (l.typeOf(un_op).isVector(zcu)) { + continue :inst l.replaceInst(inst, .block, try l.scalarizeBlockPayload(inst, .un_op)); + } }, .cmp_lt, .cmp_lt_optimized, @@ -515,7 +506,9 @@ fn legalizeBody(l: *Legalize, body_start: usize, body_len: usize) Error!void { => {}, inline .cmp_vector, .cmp_vector_optimized => |air_tag| if (l.features.has(comptime .scalarize(air_tag))) { const ty_pl = l.air_instructions.items(.data)[@intFromEnum(inst)].ty_pl; - if (ty_pl.ty.toType().isVector(zcu)) continue :inst try l.scalarize(inst, .cmp_vector); + if (ty_pl.ty.toType().isVector(zcu)) { + continue :inst l.replaceInst(inst, .block, try l.scalarizeBlockPayload(inst, .cmp_vector)); + } }, .cond_br => { const pl_op = l.air_instructions.items(.data)[@intFromEnum(inst)].pl_op; @@ -570,13 +563,17 @@ fn legalizeBody(l: *Legalize, body_start: usize, body_len: usize) Error!void { .load => if (l.features.has(.expand_packed_load)) { const ty_op = l.air_instructions.items(.data)[@intFromEnum(inst)].ty_op; const ptr_info = l.typeOf(ty_op.operand).ptrInfo(zcu); - if (ptr_info.packed_offset.host_size > 0 and ptr_info.flags.vector_index == .none) continue :inst l.replaceInst(inst, .block, try l.packedLoadBlockPayload(inst)); + if (ptr_info.packed_offset.host_size > 0 and ptr_info.flags.vector_index == .none) { + continue :inst l.replaceInst(inst, .block, try l.packedLoadBlockPayload(inst)); + } }, .ret, .ret_safe, .ret_load => {}, .store, .store_safe => if (l.features.has(.expand_packed_store)) { const bin_op = l.air_instructions.items(.data)[@intFromEnum(inst)].bin_op; const ptr_info = l.typeOf(bin_op.lhs).ptrInfo(zcu); - if (ptr_info.packed_offset.host_size > 0 and ptr_info.flags.vector_index == .none) continue :inst l.replaceInst(inst, .block, try l.packedStoreBlockPayload(inst)); + if (ptr_info.packed_offset.host_size > 0 and ptr_info.flags.vector_index == .none) { + continue :inst l.replaceInst(inst, .block, try l.packedStoreBlockPayload(inst)); + } }, .unreach, .optional_payload, @@ -624,7 +621,7 @@ fn legalizeBody(l: *Legalize, body_start: usize, body_len: usize) Error!void { switch (vector_ty.vectorLen(zcu)) { 0 => unreachable, 1 => continue :inst l.replaceInst(inst, .bitcast, .{ .ty_op = .{ - .ty = Air.internedToRef(vector_ty.childType(zcu).toIntern()), + .ty = .fromType(vector_ty.childType(zcu)), .operand = reduce.operand, } }), else => {}, @@ -641,9 +638,15 @@ fn legalizeBody(l: *Legalize, body_start: usize, body_len: usize) Error!void { else => {}, } }, - .shuffle_one => if (l.features.has(.scalarize_shuffle_one)) continue :inst try l.scalarize(inst, .shuffle_one), - .shuffle_two => if (l.features.has(.scalarize_shuffle_two)) continue :inst try l.scalarize(inst, .shuffle_two), - .select => if (l.features.has(.scalarize_select)) continue :inst try l.scalarize(inst, .select), + .shuffle_one => if (l.features.has(.scalarize_shuffle_one)) { + continue :inst l.replaceInst(inst, .block, try l.scalarizeShuffleOneBlockPayload(inst)); + }, + .shuffle_two => if (l.features.has(.scalarize_shuffle_two)) { + continue :inst l.replaceInst(inst, .block, try l.scalarizeShuffleTwoBlockPayload(inst)); + }, + .select => if (l.features.has(.scalarize_select)) { + continue :inst l.replaceInst(inst, .block, try l.scalarizeBlockPayload(inst, .select)); + }, .memset, .memset_safe, .memcpy, @@ -666,16 +669,27 @@ fn legalizeBody(l: *Legalize, body_start: usize, body_len: usize) Error!void { const agg_ty = ty_pl.ty.toType(); switch (agg_ty.zigTypeTag(zcu)) { else => {}, - .@"struct", .@"union" => switch (agg_ty.containerLayout(zcu)) { + .@"union" => unreachable, + .@"struct" => switch (agg_ty.containerLayout(zcu)) { .auto, .@"extern" => {}, - .@"packed" => continue :inst l.replaceInst(inst, .block, try l.packedAggregateInitBlockPayload(inst)), + .@"packed" => switch (agg_ty.structFieldCount(zcu)) { + 0 => unreachable, + // An `aggregate_init` of a packed struct with 1 field is just a fancy bitcast. + 1 => continue :inst l.replaceInst(inst, .bitcast, .{ .ty_op = .{ + .ty = .fromType(agg_ty), + .operand = @enumFromInt(l.air_extra.items[ty_pl.payload]), + } }), + else => continue :inst l.replaceInst(inst, .block, try l.packedAggregateInitBlockPayload(inst)), + }, }, } }, .union_init, .prefetch => {}, .mul_add => if (l.features.has(.scalarize_mul_add)) { const pl_op = l.air_instructions.items(.data)[@intFromEnum(inst)].pl_op; - if (l.typeOf(pl_op.operand).isVector(zcu)) continue :inst try l.scalarize(inst, .pl_op_bin); + if (l.typeOf(pl_op.operand).isVector(zcu)) { + continue :inst l.replaceInst(inst, .block, try l.scalarizeBlockPayload(inst, .pl_op_bin)); + } }, .field_parent_ptr, .wasm_memory_size, @@ -685,7 +699,6 @@ fn legalizeBody(l: *Legalize, body_start: usize, body_len: usize) Error!void { .set_err_return_trace, .addrspace_cast, .save_err_return_trace_index, - .vector_store_elem, .runtime_nav_ptr, .c_va_arg, .c_va_copy, @@ -694,1003 +707,757 @@ fn legalizeBody(l: *Legalize, body_start: usize, body_len: usize) Error!void { .work_item_id, .work_group_size, .work_group_id, + .legalize_vec_elem_val, + .legalize_vec_store_elem, => {}, } } } -const ScalarizeForm = enum { un_op, ty_op, bin_op, pl_op_bin, bitcast, cmp_vector, shuffle_one, shuffle_two, select }; -/// inline to propagate comptime-known `replaceInst` result. -inline fn scalarize(l: *Legalize, orig_inst: Air.Inst.Index, comptime form: ScalarizeForm) Error!Air.Inst.Tag { - return l.replaceInst(orig_inst, .block, try l.scalarizeBlockPayload(orig_inst, form)); -} -fn scalarizeBlockPayload(l: *Legalize, orig_inst: Air.Inst.Index, comptime form: ScalarizeForm) Error!Air.Inst.Data { +const ScalarizeForm = enum { un_op, ty_op, bin_op, pl_op_bin, cmp_vector, select }; +fn scalarizeBlockPayload(l: *Legalize, orig_inst: Air.Inst.Index, form: ScalarizeForm) Error!Air.Inst.Data { const pt = l.pt; const zcu = pt.zcu; const orig = l.air_instructions.get(@intFromEnum(orig_inst)); const res_ty = l.typeOfIndex(orig_inst); - const res_len = res_ty.vectorLen(zcu); - - const extra_insts = switch (form) { - .un_op, .ty_op, .bitcast => 1, - .bin_op, .cmp_vector => 2, - .pl_op_bin => 3, - .shuffle_one, .shuffle_two => 13, - .select => 6, + const result_is_array = switch (res_ty.zigTypeTag(zcu)) { + .vector => false, + .array => true, + else => unreachable, }; - var inst_buf: [5 + extra_insts + 9]Air.Inst.Index = undefined; - try l.air_instructions.ensureUnusedCapacity(zcu.gpa, inst_buf.len); + const res_len = res_ty.arrayLen(zcu); + const res_elem_ty = res_ty.childType(zcu); - var res_block: Block = .init(&inst_buf); - { - const res_alloc_inst = res_block.add(l, .{ - .tag = .alloc, - .data = .{ .ty = try pt.singleMutPtrType(res_ty) }, - }); - const index_alloc_inst = res_block.add(l, .{ - .tag = .alloc, - .data = .{ .ty = .ptr_usize }, - }); - _ = res_block.add(l, .{ - .tag = .store, - .data = .{ .bin_op = .{ - .lhs = index_alloc_inst.toRef(), - .rhs = .zero_usize, - } }, - }); + if (result_is_array) { + // This is only allowed when legalizing an elementwise bitcast. + assert(orig.tag == .bitcast); + assert(form == .ty_op); + } - var loop: Loop = .init(l, &res_block); - loop.block = .init(res_block.stealRemainingCapacity()); - { - const cur_index_inst = loop.block.add(l, .{ - .tag = .load, - .data = .{ .ty_op = .{ - .ty = .usize_type, - .operand = index_alloc_inst.toRef(), - } }, - }); - _ = loop.block.add(l, .{ - .tag = .vector_store_elem, - .data = .{ .vector_store_elem = .{ - .vector_ptr = res_alloc_inst.toRef(), - .payload = try l.addExtra(Air.Bin, .{ - .lhs = cur_index_inst.toRef(), - .rhs = res_elem: switch (form) { - .un_op => loop.block.add(l, .{ - .tag = orig.tag, - .data = .{ .un_op = loop.block.add(l, .{ - .tag = .array_elem_val, - .data = .{ .bin_op = .{ - .lhs = orig.data.un_op, - .rhs = cur_index_inst.toRef(), - } }, - }).toRef() }, - }).toRef(), - .ty_op => loop.block.add(l, .{ - .tag = orig.tag, - .data = .{ .ty_op = .{ - .ty = Air.internedToRef(res_ty.childType(zcu).toIntern()), - .operand = loop.block.add(l, .{ - .tag = .array_elem_val, - .data = .{ .bin_op = .{ - .lhs = orig.data.ty_op.operand, - .rhs = cur_index_inst.toRef(), - } }, - }).toRef(), - } }, - }).toRef(), - .bin_op => loop.block.add(l, .{ - .tag = orig.tag, - .data = .{ .bin_op = .{ - .lhs = loop.block.add(l, .{ - .tag = .array_elem_val, - .data = .{ .bin_op = .{ - .lhs = orig.data.bin_op.lhs, - .rhs = cur_index_inst.toRef(), - } }, - }).toRef(), - .rhs = loop.block.add(l, .{ - .tag = .array_elem_val, - .data = .{ .bin_op = .{ - .lhs = orig.data.bin_op.rhs, - .rhs = cur_index_inst.toRef(), - } }, - }).toRef(), - } }, - }).toRef(), - .pl_op_bin => { - const extra = l.extraData(Air.Bin, orig.data.pl_op.payload).data; - break :res_elem loop.block.add(l, .{ - .tag = orig.tag, - .data = .{ .pl_op = .{ - .payload = try l.addExtra(Air.Bin, .{ - .lhs = loop.block.add(l, .{ - .tag = .array_elem_val, - .data = .{ .bin_op = .{ - .lhs = extra.lhs, - .rhs = cur_index_inst.toRef(), - } }, - }).toRef(), - .rhs = loop.block.add(l, .{ - .tag = .array_elem_val, - .data = .{ .bin_op = .{ - .lhs = extra.rhs, - .rhs = cur_index_inst.toRef(), - } }, - }).toRef(), - }), - .operand = loop.block.add(l, .{ - .tag = .array_elem_val, - .data = .{ .bin_op = .{ - .lhs = orig.data.pl_op.operand, - .rhs = cur_index_inst.toRef(), - } }, - }).toRef(), - } }, - }).toRef(); - }, - .bitcast => loop.block.addBitCast(l, res_ty.childType(zcu), loop.block.add(l, .{ - .tag = .array_elem_val, - .data = .{ .bin_op = .{ - .lhs = orig.data.ty_op.operand, - .rhs = cur_index_inst.toRef(), - } }, - }).toRef()), - .cmp_vector => { - const extra = l.extraData(Air.VectorCmp, orig.data.ty_pl.payload).data; - break :res_elem (try loop.block.addCmp( - l, - extra.compareOperator(), - loop.block.add(l, .{ - .tag = .array_elem_val, - .data = .{ .bin_op = .{ - .lhs = extra.lhs, - .rhs = cur_index_inst.toRef(), - } }, - }).toRef(), - loop.block.add(l, .{ - .tag = .array_elem_val, - .data = .{ .bin_op = .{ - .lhs = extra.rhs, - .rhs = cur_index_inst.toRef(), - } }, - }).toRef(), - .{ .optimized = switch (orig.tag) { - else => unreachable, - .cmp_vector => false, - .cmp_vector_optimized => true, - } }, - )).toRef(); - }, - .shuffle_one, .shuffle_two => { - const ip = &zcu.intern_pool; - const unwrapped = switch (form) { - else => comptime unreachable, - .shuffle_one => l.getTmpAir().unwrapShuffleOne(zcu, orig_inst), - .shuffle_two => l.getTmpAir().unwrapShuffleTwo(zcu, orig_inst), - }; - const operand_a = switch (form) { - else => comptime unreachable, - .shuffle_one => unwrapped.operand, - .shuffle_two => unwrapped.operand_a, - }; - const operand_a_len = l.typeOf(operand_a).vectorLen(zcu); - const elem_ty = res_ty.childType(zcu); - var res_elem: Result = .init(l, elem_ty, &loop.block); - res_elem.block = .init(loop.block.stealCapacity(extra_insts)); - { - const ExpectedContents = extern struct { - mask_elems: [128]InternPool.Index, - ct_elems: switch (form) { - else => unreachable, - .shuffle_one => extern struct { - keys: [152]InternPool.Index, - header: u8 align(@alignOf(u32)), - index: [256][2]u8, - }, - .shuffle_two => void, - }, - }; - var stack align(@max(@alignOf(ExpectedContents), @alignOf(std.heap.StackFallbackAllocator(0)))) = - std.heap.stackFallback(@sizeOf(ExpectedContents), zcu.gpa); - const gpa = stack.get(); + // Our output will be a loop doing elementwise stores: + // + // %1 = block(@Vector(N, Scalar), { + // %2 = alloc(*usize) + // %3 = alloc(*@Vector(N, Scalar)) + // %4 = store(%2, @zero_usize) + // %5 = loop({ + // %6 = load(%2) + // %7 = + // %8 = legalize_vec_store_elem(%3, %5, %6) + // %9 = cmp_eq(%6, ) + // %10 = cond_br(%9, { + // %11 = load(%3) + // %12 = br(%1, %11) + // }, { + // %13 = add(%6, @one_usize) + // %14 = store(%2, %13) + // %15 = repeat(%5) + // }) + // }) + // }) + // + // If scalarizing an elementwise bitcast, the result might be an array, in which case + // `legalize_vec_store_elem` becomes two instructions (`ptr_elem_ptr` and `store`). + // Therefore, there are 13 or 14 instructions in the block, plus however many are + // needed to compute each result element for `form`. + const inst_per_form: usize = switch (form) { + .un_op, .ty_op => 2, + .bin_op, .cmp_vector => 3, + .pl_op_bin => 4, + .select => 7, + }; + const max_inst_per_form = 7; // maximum value in the above switch + var inst_buf: [14 + max_inst_per_form]Air.Inst.Index = undefined; - const mask_elems = try gpa.alloc(InternPool.Index, res_len); - defer gpa.free(mask_elems); - - var ct_elems: switch (form) { - else => unreachable, - .shuffle_one => std.AutoArrayHashMapUnmanaged(InternPool.Index, void), - .shuffle_two => struct { - const empty: @This() = .{}; - inline fn deinit(_: @This(), _: std.mem.Allocator) void {} - inline fn ensureTotalCapacity(_: @This(), _: std.mem.Allocator, _: usize) error{}!void {} - }, - } = .empty; - defer ct_elems.deinit(gpa); - try ct_elems.ensureTotalCapacity(gpa, res_len); - - const mask_elem_ty = try pt.intType(.signed, 1 + Type.smallestUnsignedBits(@max(operand_a_len, switch (form) { - else => comptime unreachable, - .shuffle_one => res_len, - .shuffle_two => l.typeOf(unwrapped.operand_b).vectorLen(zcu), - }))); - for (mask_elems, unwrapped.mask) |*mask_elem_val, mask_elem| mask_elem_val.* = (try pt.intValue(mask_elem_ty, switch (form) { - else => comptime unreachable, - .shuffle_one => switch (mask_elem.unwrap()) { - .elem => |index| index, - .value => |elem_val| if (ip.isUndef(elem_val)) - operand_a_len - else - ~@as(i33, @intCast((ct_elems.getOrPutAssumeCapacity(elem_val)).index)), - }, - .shuffle_two => switch (mask_elem.unwrap()) { - .a_elem => |a_index| a_index, - .b_elem => |b_index| ~@as(i33, b_index), - .undef => operand_a_len, - }, - })).toIntern(); - const mask_ty = try pt.arrayType(.{ - .len = res_len, - .child = mask_elem_ty.toIntern(), - }); - const mask_elem_inst = res_elem.block.add(l, .{ - .tag = .ptr_elem_val, - .data = .{ .bin_op = .{ - .lhs = Air.internedToRef(try pt.intern(.{ .ptr = .{ - .ty = (try pt.manyConstPtrType(mask_elem_ty)).toIntern(), - .base_addr = .{ .uav = .{ - .val = (try pt.aggregateValue(mask_ty, mask_elems)).toIntern(), - .orig_ty = (try pt.singleConstPtrType(mask_ty)).toIntern(), - } }, - .byte_offset = 0, - } })), - .rhs = cur_index_inst.toRef(), - } }, - }); - var def_cond_br: CondBr = .init(l, (try res_elem.block.addCmp( - l, - .lt, - mask_elem_inst.toRef(), - try pt.intRef(mask_elem_ty, operand_a_len), - .{}, - )).toRef(), &res_elem.block, .{}); - def_cond_br.then_block = .init(res_elem.block.stealRemainingCapacity()); - { - const operand_b_used = switch (form) { - else => comptime unreachable, - .shuffle_one => ct_elems.count() > 0, - .shuffle_two => true, - }; - var operand_cond_br: CondBr = undefined; - operand_cond_br.then_block = if (operand_b_used) then_block: { - operand_cond_br = .init(l, (try def_cond_br.then_block.addCmp( - l, - .gte, - mask_elem_inst.toRef(), - try pt.intRef(mask_elem_ty, 0), - .{}, - )).toRef(), &def_cond_br.then_block, .{}); - break :then_block .init(def_cond_br.then_block.stealRemainingCapacity()); - } else def_cond_br.then_block; - _ = operand_cond_br.then_block.add(l, .{ - .tag = .br, - .data = .{ .br = .{ - .block_inst = res_elem.inst, - .operand = operand_cond_br.then_block.add(l, .{ - .tag = .array_elem_val, - .data = .{ .bin_op = .{ - .lhs = operand_a, - .rhs = operand_cond_br.then_block.add(l, .{ - .tag = .intcast, - .data = .{ .ty_op = .{ - .ty = .usize_type, - .operand = mask_elem_inst.toRef(), - } }, - }).toRef(), - } }, - }).toRef(), - } }, - }); - if (operand_b_used) { - operand_cond_br.else_block = .init(operand_cond_br.then_block.stealRemainingCapacity()); - _ = operand_cond_br.else_block.add(l, .{ - .tag = .br, - .data = .{ .br = .{ - .block_inst = res_elem.inst, - .operand = if (switch (form) { - else => comptime unreachable, - .shuffle_one => ct_elems.count() > 1, - .shuffle_two => true, - }) operand_cond_br.else_block.add(l, .{ - .tag = switch (form) { - else => comptime unreachable, - .shuffle_one => .ptr_elem_val, - .shuffle_two => .array_elem_val, - }, - .data = .{ .bin_op = .{ - .lhs = operand_b: switch (form) { - else => comptime unreachable, - .shuffle_one => { - const ct_elems_ty = try pt.arrayType(.{ - .len = ct_elems.count(), - .child = elem_ty.toIntern(), - }); - break :operand_b Air.internedToRef(try pt.intern(.{ .ptr = .{ - .ty = (try pt.manyConstPtrType(elem_ty)).toIntern(), - .base_addr = .{ .uav = .{ - .val = (try pt.aggregateValue(ct_elems_ty, ct_elems.keys())).toIntern(), - .orig_ty = (try pt.singleConstPtrType(ct_elems_ty)).toIntern(), - } }, - .byte_offset = 0, - } })); - }, - .shuffle_two => unwrapped.operand_b, - }, - .rhs = operand_cond_br.else_block.add(l, .{ - .tag = .intcast, - .data = .{ .ty_op = .{ - .ty = .usize_type, - .operand = operand_cond_br.else_block.add(l, .{ - .tag = .not, - .data = .{ .ty_op = .{ - .ty = Air.internedToRef(mask_elem_ty.toIntern()), - .operand = mask_elem_inst.toRef(), - } }, - }).toRef(), - } }, - }).toRef(), - } }, - }).toRef() else res_elem_br: { - _ = operand_cond_br.else_block.stealCapacity(3); - break :res_elem_br Air.internedToRef(ct_elems.keys()[0]); - }, - } }, - }); - def_cond_br.else_block = .init(operand_cond_br.else_block.stealRemainingCapacity()); - try operand_cond_br.finish(l); - } else { - def_cond_br.then_block = operand_cond_br.then_block; - _ = def_cond_br.then_block.stealCapacity(6); - def_cond_br.else_block = .init(def_cond_br.then_block.stealRemainingCapacity()); - } - } - _ = def_cond_br.else_block.add(l, .{ - .tag = .br, - .data = .{ .br = .{ - .block_inst = res_elem.inst, - .operand = try pt.undefRef(elem_ty), - } }, - }); - try def_cond_br.finish(l); - } - try res_elem.finish(l); - break :res_elem res_elem.inst.toRef(); - }, - .select => { - const extra = l.extraData(Air.Bin, orig.data.pl_op.payload).data; - var res_elem: Result = .init(l, l.typeOf(extra.lhs).childType(zcu), &loop.block); - res_elem.block = .init(loop.block.stealCapacity(extra_insts)); - { - var select_cond_br: CondBr = .init(l, res_elem.block.add(l, .{ - .tag = .array_elem_val, - .data = .{ .bin_op = .{ - .lhs = orig.data.pl_op.operand, - .rhs = cur_index_inst.toRef(), - } }, - }).toRef(), &res_elem.block, .{}); - select_cond_br.then_block = .init(res_elem.block.stealRemainingCapacity()); - _ = select_cond_br.then_block.add(l, .{ - .tag = .br, - .data = .{ .br = .{ - .block_inst = res_elem.inst, - .operand = select_cond_br.then_block.add(l, .{ - .tag = .array_elem_val, - .data = .{ .bin_op = .{ - .lhs = extra.lhs, - .rhs = cur_index_inst.toRef(), - } }, - }).toRef(), - } }, - }); - select_cond_br.else_block = .init(select_cond_br.then_block.stealRemainingCapacity()); - _ = select_cond_br.else_block.add(l, .{ - .tag = .br, - .data = .{ .br = .{ - .block_inst = res_elem.inst, - .operand = select_cond_br.else_block.add(l, .{ - .tag = .array_elem_val, - .data = .{ .bin_op = .{ - .lhs = extra.rhs, - .rhs = cur_index_inst.toRef(), - } }, - }).toRef(), - } }, - }); - try select_cond_br.finish(l); - } - try res_elem.finish(l); - break :res_elem res_elem.inst.toRef(); - }, - }, - }), - } }, - }); + var main_block: Block = .init(&inst_buf); + try l.air_instructions.ensureUnusedCapacity(zcu.gpa, inst_buf.len); - var loop_cond_br: CondBr = .init(l, (try loop.block.addCmp( + const index_ptr = main_block.addTy(l, .alloc, .ptr_usize).toRef(); + const result_ptr = main_block.addTy(l, .alloc, try pt.singleMutPtrType(res_ty)).toRef(); + + _ = main_block.addBinOp(l, .store, index_ptr, .zero_usize); + + var loop: Loop = .init(l, &main_block); + loop.block = .init(main_block.stealRemainingCapacity()); + + const index_val = loop.block.addTyOp(l, .load, .usize, index_ptr).toRef(); + const elem_val: Air.Inst.Ref = switch (form) { + .un_op => elem: { + const orig_operand = orig.data.un_op; + const operand = loop.block.addBinOp(l, .legalize_vec_elem_val, orig_operand, index_val).toRef(); + break :elem loop.block.addUnOp(l, orig.tag, operand).toRef(); + }, + .ty_op => elem: { + const orig_operand = orig.data.ty_op.operand; + const operand_is_array = switch (l.typeOf(orig_operand).zigTypeTag(zcu)) { + .vector => false, + .array => true, + else => unreachable, + }; + const operand = loop.block.addBinOp( l, - .lt, - cur_index_inst.toRef(), - try pt.intRef(.usize, res_len - 1), - .{}, - )).toRef(), &loop.block, .{}); - loop_cond_br.then_block = .init(loop.block.stealRemainingCapacity()); - { - _ = loop_cond_br.then_block.add(l, .{ - .tag = .store, - .data = .{ .bin_op = .{ - .lhs = index_alloc_inst.toRef(), - .rhs = loop_cond_br.then_block.add(l, .{ - .tag = .add, - .data = .{ .bin_op = .{ - .lhs = cur_index_inst.toRef(), - .rhs = .one_usize, - } }, - }).toRef(), - } }, - }); - _ = loop_cond_br.then_block.add(l, .{ - .tag = .repeat, - .data = .{ .repeat = .{ .loop_inst = loop.inst } }, - }); - } - loop_cond_br.else_block = .init(loop_cond_br.then_block.stealRemainingCapacity()); - _ = loop_cond_br.else_block.add(l, .{ - .tag = .br, - .data = .{ .br = .{ - .block_inst = orig_inst, - .operand = loop_cond_br.else_block.add(l, .{ - .tag = .load, - .data = .{ .ty_op = .{ - .ty = Air.internedToRef(res_ty.toIntern()), - .operand = res_alloc_inst.toRef(), - } }, - }).toRef(), + if (operand_is_array) .array_elem_val else .legalize_vec_elem_val, + orig_operand, + index_val, + ).toRef(); + break :elem loop.block.addTyOp(l, orig.tag, res_elem_ty, operand).toRef(); + }, + .bin_op => elem: { + const orig_bin = orig.data.bin_op; + const lhs = loop.block.addBinOp(l, .legalize_vec_elem_val, orig_bin.lhs, index_val).toRef(); + const rhs = loop.block.addBinOp(l, .legalize_vec_elem_val, orig_bin.rhs, index_val).toRef(); + break :elem loop.block.addBinOp(l, orig.tag, lhs, rhs).toRef(); + }, + .pl_op_bin => elem: { + const orig_operand = orig.data.pl_op.operand; + const orig_bin = l.extraData(Air.Bin, orig.data.pl_op.payload).data; + const operand = loop.block.addBinOp(l, .legalize_vec_elem_val, orig_operand, index_val).toRef(); + const lhs = loop.block.addBinOp(l, .legalize_vec_elem_val, orig_bin.lhs, index_val).toRef(); + const rhs = loop.block.addBinOp(l, .legalize_vec_elem_val, orig_bin.rhs, index_val).toRef(); + break :elem loop.block.add(l, .{ + .tag = orig.tag, + .data = .{ .pl_op = .{ + .operand = operand, + .payload = try l.addExtra(Air.Bin, .{ .lhs = lhs, .rhs = rhs }), + } }, + }).toRef(); + }, + .cmp_vector => elem: { + const orig_payload = l.extraData(Air.VectorCmp, orig.data.ty_pl.payload).data; + const cmp_op = orig_payload.compareOperator(); + const optimized = switch (orig.tag) { + .cmp_vector => false, + .cmp_vector_optimized => true, + else => unreachable, + }; + const lhs = loop.block.addBinOp(l, .legalize_vec_elem_val, orig_payload.lhs, index_val).toRef(); + const rhs = loop.block.addBinOp(l, .legalize_vec_elem_val, orig_payload.rhs, index_val).toRef(); + break :elem loop.block.addCmpScalar(l, cmp_op, lhs, rhs, optimized).toRef(); + }, + .select => elem: { + const orig_cond = orig.data.pl_op.operand; + const orig_bin = l.extraData(Air.Bin, orig.data.pl_op.payload).data; + + const elem_block_inst = loop.block.add(l, .{ + .tag = .block, + .data = .{ .ty_pl = .{ + .ty = .fromType(res_elem_ty), + .payload = undefined, } }, }); - try loop_cond_br.finish(l); - } - try loop.finish(l); + var elem_block: Block = .init(loop.block.stealCapacity(2)); + const cond = elem_block.addBinOp(l, .legalize_vec_elem_val, orig_cond, index_val).toRef(); + + var condbr: CondBr = .init(l, cond, &elem_block, .{}); + + condbr.then_block = .init(loop.block.stealCapacity(2)); + const lhs = condbr.then_block.addBinOp(l, .legalize_vec_elem_val, orig_bin.lhs, index_val).toRef(); + condbr.then_block.addBr(l, elem_block_inst, lhs); + + condbr.else_block = .init(loop.block.stealCapacity(2)); + const rhs = condbr.else_block.addBinOp(l, .legalize_vec_elem_val, orig_bin.rhs, index_val).toRef(); + condbr.else_block.addBr(l, elem_block_inst, rhs); + + try condbr.finish(l); + + const inst_data = l.air_instructions.items(.data); + inst_data[@intFromEnum(elem_block_inst)].ty_pl.payload = try l.addBlockBody(elem_block.body()); + + break :elem elem_block_inst.toRef(); + }, + }; + _ = loop.block.stealCapacity(max_inst_per_form - inst_per_form); + if (result_is_array) { + const elem_ptr = loop.block.add(l, .{ + .tag = .ptr_elem_ptr, + .data = .{ .ty_pl = .{ + .ty = .fromType(try pt.singleMutPtrType(res_elem_ty)), + .payload = try l.addExtra(Air.Bin, .{ + .lhs = result_ptr, + .rhs = index_val, + }), + } }, + }).toRef(); + _ = loop.block.addBinOp(l, .store, elem_ptr, elem_val); + } else { + _ = loop.block.add(l, .{ + .tag = .legalize_vec_store_elem, + .data = .{ .pl_op = .{ + .operand = result_ptr, + .payload = try l.addExtra(Air.Bin, .{ + .lhs = index_val, + .rhs = elem_val, + }), + } }, + }); + _ = loop.block.stealCapacity(1); } + const is_end_val = loop.block.addBinOp(l, .cmp_eq, index_val, .fromValue(try pt.intValue(.usize, res_len - 1))).toRef(); + + var condbr: CondBr = .init(l, is_end_val, &loop.block, .{}); + condbr.then_block = .init(loop.block.stealRemainingCapacity()); + const result_val = condbr.then_block.addTyOp(l, .load, res_ty, result_ptr).toRef(); + condbr.then_block.addBr(l, orig_inst, result_val); + + condbr.else_block = .init(condbr.then_block.stealRemainingCapacity()); + const new_index_val = condbr.else_block.addBinOp(l, .add, index_val, .one_usize).toRef(); + _ = condbr.else_block.addBinOp(l, .store, index_ptr, new_index_val); + _ = condbr.else_block.add(l, .{ + .tag = .repeat, + .data = .{ .repeat = .{ .loop_inst = loop.inst } }, + }); + + try condbr.finish(l); + + try loop.finish(l); + return .{ .ty_pl = .{ - .ty = Air.internedToRef(res_ty.toIntern()), - .payload = try l.addBlockBody(res_block.body()), + .ty = .fromType(res_ty), + .payload = try l.addBlockBody(main_block.body()), } }; } -fn scalarizeBitcastToArrayBlockPayload(l: *Legalize, orig_inst: Air.Inst.Index) Error!Air.Inst.Data { +fn scalarizeShuffleOneBlockPayload(l: *Legalize, orig_inst: Air.Inst.Index) Error!Air.Inst.Data { const pt = l.pt; const zcu = pt.zcu; + const gpa = zcu.gpa; - const orig_ty_op = l.air_instructions.items(.data)[@intFromEnum(orig_inst)].ty_op; - const res_ty = orig_ty_op.ty.toType(); - const res_elem_ty = res_ty.childType(zcu); - const res_len = res_ty.arrayLen(zcu); + const shuffle = l.getTmpAir().unwrapShuffleOne(zcu, orig_inst); - var inst_buf: [16]Air.Inst.Index = undefined; - try l.air_instructions.ensureUnusedCapacity(zcu.gpa, inst_buf.len); + // We're going to emit something like this: + // + // var x: @Vector(N, T) = all_comptime_known_elems; + // for (out_idxs, in_idxs) |i, j| x[i] = operand[j]; + // + // So we must first compute `out_idxs` and `in_idxs`. - var res_block: Block = .init(&inst_buf); - { - const res_alloc_inst = res_block.add(l, .{ - .tag = .alloc, - .data = .{ .ty = try pt.singleMutPtrType(res_ty) }, - }); - const index_alloc_inst = res_block.add(l, .{ - .tag = .alloc, - .data = .{ .ty = .ptr_usize }, - }); - _ = res_block.add(l, .{ - .tag = .store, - .data = .{ .bin_op = .{ - .lhs = index_alloc_inst.toRef(), - .rhs = .zero_usize, - } }, - }); + var sfba_state = std.heap.stackFallback(512, gpa); + const sfba = sfba_state.get(); - var loop: Loop = .init(l, &res_block); - loop.block = .init(res_block.stealRemainingCapacity()); - { - const cur_index_inst = loop.block.add(l, .{ - .tag = .load, - .data = .{ .ty_op = .{ - .ty = .usize_type, - .operand = index_alloc_inst.toRef(), - } }, - }); - _ = loop.block.add(l, .{ - .tag = .store, - .data = .{ .bin_op = .{ - .lhs = loop.block.add(l, .{ - .tag = .ptr_elem_ptr, - .data = .{ .ty_pl = .{ - .ty = Air.internedToRef((try pt.singleMutPtrType(res_elem_ty)).toIntern()), - .payload = try l.addExtra(Air.Bin, .{ - .lhs = res_alloc_inst.toRef(), - .rhs = cur_index_inst.toRef(), - }), - } }, - }).toRef(), - .rhs = loop.block.addBitCast(l, res_elem_ty, loop.block.add(l, .{ - .tag = .array_elem_val, - .data = .{ .bin_op = .{ - .lhs = orig_ty_op.operand, - .rhs = cur_index_inst.toRef(), - } }, - }).toRef()), - } }, - }); + const out_idxs_buf = try sfba.alloc(InternPool.Index, shuffle.mask.len); + defer sfba.free(out_idxs_buf); + + const in_idxs_buf = try sfba.alloc(InternPool.Index, shuffle.mask.len); + defer sfba.free(in_idxs_buf); + + var n: usize = 0; + for (shuffle.mask, 0..) |mask, out_idx| switch (mask.unwrap()) { + .value => {}, + .elem => |in_idx| { + out_idxs_buf[n] = (try pt.intValue(.usize, out_idx)).toIntern(); + in_idxs_buf[n] = (try pt.intValue(.usize, in_idx)).toIntern(); + n += 1; + }, + }; + + const init_val: Value = init: { + const undef_val = try pt.undefValue(shuffle.result_ty.childType(zcu)); + const elems = try sfba.alloc(InternPool.Index, shuffle.mask.len); + defer sfba.free(elems); + for (shuffle.mask, elems) |mask, *elem| elem.* = switch (mask.unwrap()) { + .value => |ip_index| ip_index, + .elem => undef_val.toIntern(), + }; + break :init try pt.aggregateValue(shuffle.result_ty, elems); + }; + + // %1 = block(@Vector(N, T), { + // %2 = alloc(*@Vector(N, T)) + // %3 = alloc(*usize) + // %4 = store(%2, ) + // %5 = [addScalarizedShuffle] + // %6 = load(%2) + // %7 = br(%1, %6) + // }) + + var inst_buf: [6]Air.Inst.Index = undefined; + var main_block: Block = .init(&inst_buf); + try l.air_instructions.ensureUnusedCapacity(gpa, 19); + + const result_ptr = main_block.addTy(l, .alloc, try pt.singleMutPtrType(shuffle.result_ty)).toRef(); + const index_ptr = main_block.addTy(l, .alloc, .ptr_usize).toRef(); + + _ = main_block.addBinOp(l, .store, result_ptr, .fromValue(init_val)); + + try l.addScalarizedShuffle( + &main_block, + shuffle.operand, + result_ptr, + index_ptr, + out_idxs_buf[0..n], + in_idxs_buf[0..n], + ); + + const result_val = main_block.addTyOp(l, .load, shuffle.result_ty, result_ptr).toRef(); + main_block.addBr(l, orig_inst, result_val); - var loop_cond_br: CondBr = .init(l, (try loop.block.addCmp( - l, - .lt, - cur_index_inst.toRef(), - try pt.intRef(.usize, res_len - 1), - .{}, - )).toRef(), &loop.block, .{}); - loop_cond_br.then_block = .init(loop.block.stealRemainingCapacity()); - { - _ = loop_cond_br.then_block.add(l, .{ - .tag = .store, - .data = .{ .bin_op = .{ - .lhs = index_alloc_inst.toRef(), - .rhs = loop_cond_br.then_block.add(l, .{ - .tag = .add, - .data = .{ .bin_op = .{ - .lhs = cur_index_inst.toRef(), - .rhs = .one_usize, - } }, - }).toRef(), - } }, - }); - _ = loop_cond_br.then_block.add(l, .{ - .tag = .repeat, - .data = .{ .repeat = .{ .loop_inst = loop.inst } }, - }); - } - loop_cond_br.else_block = .init(loop_cond_br.then_block.stealRemainingCapacity()); - _ = loop_cond_br.else_block.add(l, .{ - .tag = .br, - .data = .{ .br = .{ - .block_inst = orig_inst, - .operand = loop_cond_br.else_block.add(l, .{ - .tag = .load, - .data = .{ .ty_op = .{ - .ty = Air.internedToRef(res_ty.toIntern()), - .operand = res_alloc_inst.toRef(), - } }, - }).toRef(), - } }, - }); - try loop_cond_br.finish(l); - } - try loop.finish(l); - } return .{ .ty_pl = .{ - .ty = Air.internedToRef(res_ty.toIntern()), - .payload = try l.addBlockBody(res_block.body()), + .ty = .fromType(shuffle.result_ty), + .payload = try l.addBlockBody(main_block.body()), } }; } -fn scalarizeBitcastOperandBlockPayload(l: *Legalize, orig_inst: Air.Inst.Index) Error!Air.Inst.Data { +fn scalarizeShuffleTwoBlockPayload(l: *Legalize, orig_inst: Air.Inst.Index) Error!Air.Inst.Data { const pt = l.pt; const zcu = pt.zcu; + const gpa = zcu.gpa; - const orig_ty_op = l.air_instructions.items(.data)[@intFromEnum(orig_inst)].ty_op; - const res_ty = orig_ty_op.ty.toType(); - const operand_ty = l.typeOf(orig_ty_op.operand); - const int_bits: u16 = @intCast(operand_ty.bitSize(zcu)); - const int_ty = try pt.intType(.unsigned, int_bits); - const shift_ty = try pt.intType(.unsigned, std.math.log2_int_ceil(u16, int_bits)); - const elem_bits: u16 = @intCast(operand_ty.childType(zcu).bitSize(zcu)); - const elem_int_ty = try pt.intType(.unsigned, elem_bits); - - var inst_buf: [22]Air.Inst.Index = undefined; - try l.air_instructions.ensureUnusedCapacity(zcu.gpa, inst_buf.len); + const shuffle = l.getTmpAir().unwrapShuffleTwo(zcu, orig_inst); - var res_block: Block = .init(&inst_buf); - { - const int_alloc_inst = res_block.add(l, .{ - .tag = .alloc, - .data = .{ .ty = try pt.singleMutPtrType(int_ty) }, - }); - _ = res_block.add(l, .{ - .tag = .store, - .data = .{ .bin_op = .{ - .lhs = int_alloc_inst.toRef(), - .rhs = try pt.intRef(int_ty, 0), - } }, - }); - const index_alloc_inst = res_block.add(l, .{ - .tag = .alloc, - .data = .{ .ty = .ptr_usize }, - }); - _ = res_block.add(l, .{ - .tag = .store, - .data = .{ .bin_op = .{ - .lhs = index_alloc_inst.toRef(), - .rhs = .zero_usize, - } }, - }); + // We're going to emit something like this: + // + // var x: @Vector(N, T) = undefined; + // for (out_idxs_a, in_idxs_a) |i, j| x[i] = operand_a[j]; + // for (out_idxs_b, in_idxs_b) |i, j| x[i] = operand_b[j]; + // + // The AIR will look like this: + // + // %1 = block(@Vector(N, T), { + // %2 = alloc(*@Vector(N, T)) + // %3 = alloc(*usize) + // %4 = store(%2, <@Vector(N, T), undefined>) + // %5 = [addScalarizedShuffle] + // %6 = [addScalarizedShuffle] + // %7 = load(%2) + // %8 = br(%1, %7) + // }) - var loop: Loop = .init(l, &res_block); - loop.block = .init(res_block.stealRemainingCapacity()); - { - const cur_index_inst = loop.block.add(l, .{ - .tag = .load, - .data = .{ .ty_op = .{ - .ty = .usize_type, - .operand = index_alloc_inst.toRef(), - } }, - }); - const cur_int_inst = loop.block.add(l, .{ - .tag = .bit_or, - .data = .{ .bin_op = .{ - .lhs = loop.block.add(l, .{ - .tag = .shl_exact, - .data = .{ .bin_op = .{ - .lhs = loop.block.add(l, .{ - .tag = .intcast, - .data = .{ .ty_op = .{ - .ty = Air.internedToRef(int_ty.toIntern()), - .operand = loop.block.addBitCast(l, elem_int_ty, loop.block.add(l, .{ - .tag = .array_elem_val, - .data = .{ .bin_op = .{ - .lhs = orig_ty_op.operand, - .rhs = cur_index_inst.toRef(), - } }, - }).toRef()), - } }, - }).toRef(), - .rhs = loop.block.add(l, .{ - .tag = .mul, - .data = .{ .bin_op = .{ - .lhs = loop.block.add(l, .{ - .tag = .intcast, - .data = .{ .ty_op = .{ - .ty = Air.internedToRef(shift_ty.toIntern()), - .operand = cur_index_inst.toRef(), - } }, - }).toRef(), - .rhs = try pt.intRef(shift_ty, elem_bits), - } }, - }).toRef(), - } }, - }).toRef(), - .rhs = loop.block.add(l, .{ - .tag = .load, - .data = .{ .ty_op = .{ - .ty = Air.internedToRef(int_ty.toIntern()), - .operand = int_alloc_inst.toRef(), - } }, - }).toRef(), - } }, - }); + var sfba_state = std.heap.stackFallback(512, gpa); + const sfba = sfba_state.get(); - var loop_cond_br: CondBr = .init(l, (try loop.block.addCmp( - l, - .lt, - cur_index_inst.toRef(), - try pt.intRef(.usize, operand_ty.arrayLen(zcu) - 1), - .{}, - )).toRef(), &loop.block, .{}); - loop_cond_br.then_block = .init(loop.block.stealRemainingCapacity()); - { - _ = loop_cond_br.then_block.add(l, .{ - .tag = .store, - .data = .{ .bin_op = .{ - .lhs = int_alloc_inst.toRef(), - .rhs = cur_int_inst.toRef(), - } }, - }); - _ = loop_cond_br.then_block.add(l, .{ - .tag = .store, - .data = .{ .bin_op = .{ - .lhs = index_alloc_inst.toRef(), - .rhs = loop_cond_br.then_block.add(l, .{ - .tag = .add, - .data = .{ .bin_op = .{ - .lhs = cur_index_inst.toRef(), - .rhs = .one_usize, - } }, - }).toRef(), - } }, - }); - _ = loop_cond_br.then_block.add(l, .{ - .tag = .repeat, - .data = .{ .repeat = .{ .loop_inst = loop.inst } }, - }); - } - loop_cond_br.else_block = .init(loop_cond_br.then_block.stealRemainingCapacity()); - _ = loop_cond_br.else_block.add(l, .{ - .tag = .br, - .data = .{ .br = .{ - .block_inst = orig_inst, - .operand = loop_cond_br.else_block.addBitCast(l, res_ty, cur_int_inst.toRef()), - } }, - }); - try loop_cond_br.finish(l); - } - try loop.finish(l); + const out_idxs_buf = try sfba.alloc(InternPool.Index, shuffle.mask.len); + defer sfba.free(out_idxs_buf); + + const in_idxs_buf = try sfba.alloc(InternPool.Index, shuffle.mask.len); + defer sfba.free(in_idxs_buf); + + // Iterate `shuffle.mask` before doing anything, because modifying AIR invalidates it. + const out_idxs_a, const in_idxs_a, const out_idxs_b, const in_idxs_b = idxs: { + var n: usize = 0; + for (shuffle.mask, 0..) |mask, out_idx| switch (mask.unwrap()) { + .undef, .b_elem => {}, + .a_elem => |in_idx| { + out_idxs_buf[n] = (try pt.intValue(.usize, out_idx)).toIntern(); + in_idxs_buf[n] = (try pt.intValue(.usize, in_idx)).toIntern(); + n += 1; + }, + }; + const a_len = n; + for (shuffle.mask, 0..) |mask, out_idx| switch (mask.unwrap()) { + .undef, .a_elem => {}, + .b_elem => |in_idx| { + out_idxs_buf[n] = (try pt.intValue(.usize, out_idx)).toIntern(); + in_idxs_buf[n] = (try pt.intValue(.usize, in_idx)).toIntern(); + n += 1; + }, + }; + break :idxs .{ + out_idxs_buf[0..a_len], + in_idxs_buf[0..a_len], + out_idxs_buf[a_len..n], + in_idxs_buf[a_len..n], + }; + }; + + var inst_buf: [7]Air.Inst.Index = undefined; + var main_block: Block = .init(&inst_buf); + try l.air_instructions.ensureUnusedCapacity(gpa, 33); + + const result_ptr = main_block.addTy(l, .alloc, try pt.singleMutPtrType(shuffle.result_ty)).toRef(); + const index_ptr = main_block.addTy(l, .alloc, .ptr_usize).toRef(); + + _ = main_block.addBinOp(l, .store, result_ptr, .fromValue(try pt.undefValue(shuffle.result_ty))); + + if (out_idxs_a.len == 0) { + _ = main_block.stealCapacity(1); + } else { + try l.addScalarizedShuffle( + &main_block, + shuffle.operand_a, + result_ptr, + index_ptr, + out_idxs_a, + in_idxs_a, + ); + } + + if (out_idxs_b.len == 0) { + _ = main_block.stealCapacity(1); + } else { + try l.addScalarizedShuffle( + &main_block, + shuffle.operand_b, + result_ptr, + index_ptr, + out_idxs_b, + in_idxs_b, + ); } + + const result_val = main_block.addTyOp(l, .load, shuffle.result_ty, result_ptr).toRef(); + main_block.addBr(l, orig_inst, result_val); + return .{ .ty_pl = .{ - .ty = Air.internedToRef(res_ty.toIntern()), - .payload = try l.addBlockBody(res_block.body()), + .ty = .fromType(shuffle.result_ty), + .payload = try l.addBlockBody(main_block.body()), } }; } -fn scalarizeBitcastResultArrayBlockPayload(l: *Legalize, orig_inst: Air.Inst.Index) Error!Air.Inst.Data { +/// Adds code to `parent_block` which behaves like this loop: +/// +/// for (out_idxs, in_idxs) |i, j| result_vec_ptr[i] = operand_vec[j]; +/// +/// The actual AIR adds exactly one instruction to `parent_block` itself and 14 instructions +/// overall, and is as follows: +/// +/// %1 = block(void, { +/// %2 = store(index_ptr, @zero_usize) +/// %3 = loop({ +/// %4 = load(index_ptr) +/// %5 = ptr_elem_val(out_idxs_ptr, %4) +/// %6 = ptr_elem_val(in_idxs_ptr, %4) +/// %7 = legalize_vec_elem_val(operand_vec, %6) +/// %8 = legalize_vec_store_elem(result_vec_ptr, %4, %7) +/// %9 = cmp_eq(%4, ) +/// %10 = cond_br(%9, { +/// %11 = br(%1, @void_value) +/// }, { +/// %12 = add(%4, @one_usize) +/// %13 = store(index_ptr, %12) +/// %14 = repeat(%3) +/// }) +/// }) +/// }) +/// +/// The caller is responsible for reserving space in `l.air_instructions`. +fn addScalarizedShuffle( + l: *Legalize, + parent_block: *Block, + operand_vec: Air.Inst.Ref, + result_vec_ptr: Air.Inst.Ref, + index_ptr: Air.Inst.Ref, + out_idxs: []const InternPool.Index, + in_idxs: []const InternPool.Index, +) Error!void { const pt = l.pt; - const zcu = pt.zcu; - const orig_ty_op = l.air_instructions.items(.data)[@intFromEnum(orig_inst)].ty_op; - const res_ty = orig_ty_op.ty.toType(); - const int_bits: u16 = @intCast(res_ty.bitSize(zcu)); - const int_ty = try pt.intType(.unsigned, int_bits); - const shift_ty = try pt.intType(.unsigned, std.math.log2_int_ceil(u16, int_bits)); - const res_elem_ty = res_ty.childType(zcu); - const elem_bits: u16 = @intCast(res_elem_ty.bitSize(zcu)); - const elem_int_ty = try pt.intType(.unsigned, elem_bits); + assert(out_idxs.len == in_idxs.len); + const n = out_idxs.len; - var inst_buf: [20]Air.Inst.Index = undefined; - try l.air_instructions.ensureUnusedCapacity(zcu.gpa, inst_buf.len); + const idxs_ty = try pt.arrayType(.{ .len = n, .child = .usize_type }); + const idxs_ptr_ty = try pt.singleConstPtrType(idxs_ty); + const manyptr_usize_ty = try pt.manyConstPtrType(.usize); - var res_block: Block = .init(&inst_buf); - { - const res_alloc_inst = res_block.add(l, .{ - .tag = .alloc, - .data = .{ .ty = try pt.singleMutPtrType(res_ty) }, - }); - const int_ref = res_block.addBitCast(l, int_ty, orig_ty_op.operand); - const index_alloc_inst = res_block.add(l, .{ - .tag = .alloc, - .data = .{ .ty = .ptr_usize }, - }); - _ = res_block.add(l, .{ - .tag = .store, - .data = .{ .bin_op = .{ - .lhs = index_alloc_inst.toRef(), - .rhs = .zero_usize, - } }, - }); + const out_idxs_ptr = try pt.intern(.{ .ptr = .{ + .ty = manyptr_usize_ty.toIntern(), + .base_addr = .{ .uav = .{ + .val = (try pt.aggregateValue(idxs_ty, out_idxs)).toIntern(), + .orig_ty = idxs_ptr_ty.toIntern(), + } }, + .byte_offset = 0, + } }); + const in_idxs_ptr = try pt.intern(.{ .ptr = .{ + .ty = manyptr_usize_ty.toIntern(), + .base_addr = .{ .uav = .{ + .val = (try pt.aggregateValue(idxs_ty, in_idxs)).toIntern(), + .orig_ty = idxs_ptr_ty.toIntern(), + } }, + .byte_offset = 0, + } }); - var loop: Loop = .init(l, &res_block); - loop.block = .init(res_block.stealRemainingCapacity()); - { - const cur_index_inst = loop.block.add(l, .{ - .tag = .load, - .data = .{ .ty_op = .{ - .ty = .usize_type, - .operand = index_alloc_inst.toRef(), - } }, - }); - _ = loop.block.add(l, .{ - .tag = .store, - .data = .{ .bin_op = .{ - .lhs = loop.block.add(l, .{ - .tag = .ptr_elem_ptr, - .data = .{ .ty_pl = .{ - .ty = Air.internedToRef((try pt.singleMutPtrType(res_elem_ty)).toIntern()), - .payload = try l.addExtra(Air.Bin, .{ - .lhs = res_alloc_inst.toRef(), - .rhs = cur_index_inst.toRef(), - }), - } }, - }).toRef(), - .rhs = loop.block.addBitCast(l, res_elem_ty, loop.block.add(l, .{ - .tag = .trunc, - .data = .{ .ty_op = .{ - .ty = Air.internedToRef(elem_int_ty.toIntern()), - .operand = loop.block.add(l, .{ - .tag = .shr, - .data = .{ .bin_op = .{ - .lhs = int_ref, - .rhs = loop.block.add(l, .{ - .tag = .mul, - .data = .{ .bin_op = .{ - .lhs = loop.block.add(l, .{ - .tag = .intcast, - .data = .{ .ty_op = .{ - .ty = Air.internedToRef(shift_ty.toIntern()), - .operand = cur_index_inst.toRef(), - } }, - }).toRef(), - .rhs = try pt.intRef(shift_ty, elem_bits), - } }, - }).toRef(), - } }, - }).toRef(), - } }, - }).toRef()), - } }, - }); + const main_block_inst = parent_block.add(l, .{ + .tag = .block, + .data = .{ .ty_pl = .{ + .ty = .void_type, + .payload = undefined, + } }, + }); - var loop_cond_br: CondBr = .init(l, (try loop.block.addCmp( - l, - .lt, - cur_index_inst.toRef(), - try pt.intRef(.usize, res_ty.arrayLen(zcu) - 1), - .{}, - )).toRef(), &loop.block, .{}); - loop_cond_br.then_block = .init(loop.block.stealRemainingCapacity()); - { - _ = loop_cond_br.then_block.add(l, .{ - .tag = .store, - .data = .{ .bin_op = .{ - .lhs = index_alloc_inst.toRef(), - .rhs = loop_cond_br.then_block.add(l, .{ - .tag = .add, - .data = .{ .bin_op = .{ - .lhs = cur_index_inst.toRef(), - .rhs = .one_usize, - } }, - }).toRef(), - } }, - }); - _ = loop_cond_br.then_block.add(l, .{ - .tag = .repeat, - .data = .{ .repeat = .{ .loop_inst = loop.inst } }, - }); - } - loop_cond_br.else_block = .init(loop_cond_br.then_block.stealRemainingCapacity()); - _ = loop_cond_br.else_block.add(l, .{ - .tag = .br, - .data = .{ .br = .{ - .block_inst = orig_inst, - .operand = loop_cond_br.else_block.add(l, .{ - .tag = .load, - .data = .{ .ty_op = .{ - .ty = Air.internedToRef(res_ty.toIntern()), - .operand = res_alloc_inst.toRef(), - } }, - }).toRef(), - } }, - }); - try loop_cond_br.finish(l); - } - try loop.finish(l); - } - return .{ .ty_pl = .{ - .ty = Air.internedToRef(res_ty.toIntern()), - .payload = try l.addBlockBody(res_block.body()), - } }; + var inst_buf: [13]Air.Inst.Index = undefined; + var main_block: Block = .init(&inst_buf); + + _ = main_block.addBinOp(l, .store, index_ptr, .zero_usize); + + var loop: Loop = .init(l, &main_block); + loop.block = .init(main_block.stealRemainingCapacity()); + + const index_val = loop.block.addTyOp(l, .load, .usize, index_ptr).toRef(); + const in_idx_val = loop.block.addBinOp(l, .ptr_elem_val, .fromIntern(in_idxs_ptr), index_val).toRef(); + const out_idx_val = loop.block.addBinOp(l, .ptr_elem_val, .fromIntern(out_idxs_ptr), index_val).toRef(); + + const elem_val = loop.block.addBinOp(l, .legalize_vec_elem_val, operand_vec, in_idx_val).toRef(); + _ = loop.block.add(l, .{ + .tag = .legalize_vec_store_elem, + .data = .{ .pl_op = .{ + .operand = result_vec_ptr, + .payload = try l.addExtra(Air.Bin, .{ + .lhs = out_idx_val, + .rhs = elem_val, + }), + } }, + }); + + const is_end_val = loop.block.addBinOp(l, .cmp_eq, index_val, .fromValue(try pt.intValue(.usize, n - 1))).toRef(); + var condbr: CondBr = .init(l, is_end_val, &loop.block, .{}); + condbr.then_block = .init(loop.block.stealRemainingCapacity()); + condbr.then_block.addBr(l, main_block_inst, .void_value); + + condbr.else_block = .init(condbr.then_block.stealRemainingCapacity()); + const new_index_val = condbr.else_block.addBinOp(l, .add, index_val, .one_usize).toRef(); + _ = condbr.else_block.addBinOp(l, .store, index_ptr, new_index_val); + _ = condbr.else_block.add(l, .{ + .tag = .repeat, + .data = .{ .repeat = .{ .loop_inst = loop.inst } }, + }); + + try condbr.finish(l); + try loop.finish(l); + + const inst_data = l.air_instructions.items(.data); + inst_data[@intFromEnum(main_block_inst)].ty_pl.payload = try l.addBlockBody(main_block.body()); } -fn scalarizeBitcastResultVectorBlockPayload(l: *Legalize, orig_inst: Air.Inst.Index) Error!Air.Inst.Data { +fn scalarizeBitcastBlockPayload(l: *Legalize, orig_inst: Air.Inst.Index) Error!?Air.Inst.Data { const pt = l.pt; const zcu = pt.zcu; - const orig_ty_op = l.air_instructions.items(.data)[@intFromEnum(orig_inst)].ty_op; - const res_ty = orig_ty_op.ty.toType(); - const int_bits: u16 = @intCast(res_ty.bitSize(zcu)); - const int_ty = try pt.intType(.unsigned, int_bits); - const shift_ty = try pt.intType(.unsigned, std.math.log2_int_ceil(u16, int_bits)); - const res_elem_ty = res_ty.childType(zcu); - const elem_bits: u16 = @intCast(res_elem_ty.bitSize(zcu)); - const elem_int_ty = try pt.intType(.unsigned, elem_bits); + const ty_op = l.air_instructions.items(.data)[@intFromEnum(orig_inst)].ty_op; - var inst_buf: [19]Air.Inst.Index = undefined; + const dest_ty = ty_op.ty.toType(); + const dest_legal = switch (dest_ty.zigTypeTag(zcu)) { + else => true, + .array, .vector => legal: { + if (dest_ty.arrayLen(zcu) == 1) break :legal true; + const dest_elem_ty = dest_ty.childType(zcu); + break :legal dest_elem_ty.bitSize(zcu) == 8 * dest_elem_ty.abiSize(zcu); + }, + }; + + const operand_ty = l.typeOf(ty_op.operand); + const operand_legal = switch (operand_ty.zigTypeTag(zcu)) { + else => true, + .array, .vector => legal: { + if (operand_ty.arrayLen(zcu) == 1) break :legal true; + const operand_elem_ty = operand_ty.childType(zcu); + break :legal operand_elem_ty.bitSize(zcu) == 8 * operand_elem_ty.abiSize(zcu); + }, + }; + + if (dest_legal and operand_legal) return null; + + if (!operand_legal and !dest_legal and operand_ty.arrayLen(zcu) == dest_ty.arrayLen(zcu)) { + // from_ty and to_ty are both arrays or vectors of types with the same bit size, + // so we can do an elementwise bitcast. + return try l.scalarizeBlockPayload(orig_inst, .ty_op); + } + + // Fallback path. Our strategy is to use an unsigned integer type as an intermediate + // "bag of bits" representation which can be manipulated by bitwise operations. + + const num_bits: u16 = @intCast(dest_ty.bitSize(zcu)); + assert(operand_ty.bitSize(zcu) == num_bits); + const uint_ty = try pt.intType(.unsigned, num_bits); + const shift_ty = try pt.intType(.unsigned, std.math.log2_int_ceil(u16, num_bits)); + + var inst_buf: [39]Air.Inst.Index = undefined; + var main_block: Block = .init(&inst_buf); try l.air_instructions.ensureUnusedCapacity(zcu.gpa, inst_buf.len); - var res_block: Block = .init(&inst_buf); - { - const res_alloc_inst = res_block.add(l, .{ - .tag = .alloc, - .data = .{ .ty = try pt.singleMutPtrType(res_ty) }, - }); - const int_ref = res_block.addBitCast(l, int_ty, orig_ty_op.operand); - const index_alloc_inst = res_block.add(l, .{ - .tag = .alloc, - .data = .{ .ty = .ptr_usize }, - }); - _ = res_block.add(l, .{ - .tag = .store, - .data = .{ .bin_op = .{ - .lhs = index_alloc_inst.toRef(), - .rhs = .zero_usize, + // First, convert `operand_ty` to `uint_ty` (`uN`). + + const uint_val: Air.Inst.Ref = uint_val: { + if (operand_legal) { + _ = main_block.stealCapacity(19); + break :uint_val main_block.addBitCast(l, uint_ty, ty_op.operand); + } + + // %1 = block({ + // %2 = alloc(*usize) + // %3 = alloc(*uN) + // %4 = store(%2, ) + // %5 = store(%3, ) + // %6 = loop({ + // %7 = load(%2) + // %8 = array_elem_val(orig_operand, %7) + // %9 = bitcast(uE, %8) + // %10 = intcast(uN, %9) + // %11 = load(%3) + // %12 = shl_exact(%11, ) + // %13 = bit_or(%12, %10) + // %14 = cmp_eq(%4, @zero_usize) + // %15 = cond_br(%14, { + // %16 = br(%1, %13) + // }, { + // %17 = store(%3, %13) + // %18 = sub(%7, @one_usize) + // %19 = store(%2, %18) + // %20 = repeat(%6) + // }) + // }) + // }) + + const elem_bits = operand_ty.childType(zcu).bitSize(zcu); + const elem_bits_val = try pt.intValue(shift_ty, elem_bits); + const elem_uint_ty = try pt.intType(.unsigned, @intCast(elem_bits)); + + const uint_block_inst = main_block.add(l, .{ + .tag = .block, + .data = .{ .ty_pl = .{ + .ty = .fromType(uint_ty), + .payload = undefined, } }, }); + var uint_block: Block = .init(main_block.stealCapacity(19)); + + const index_ptr = uint_block.addTy(l, .alloc, .ptr_usize).toRef(); + const result_ptr = uint_block.addTy(l, .alloc, try pt.singleMutPtrType(uint_ty)).toRef(); + _ = uint_block.addBinOp( + l, + .store, + index_ptr, + .fromValue(try pt.intValue(.usize, operand_ty.arrayLen(zcu))), + ); + _ = uint_block.addBinOp(l, .store, result_ptr, .fromValue(try pt.intValue(uint_ty, 0))); + + var loop: Loop = .init(l, &uint_block); + loop.block = .init(uint_block.stealRemainingCapacity()); + + const index_val = loop.block.addTyOp(l, .load, .usize, index_ptr).toRef(); + const raw_elem = loop.block.addBinOp( + l, + if (operand_ty.zigTypeTag(zcu) == .vector) .legalize_vec_elem_val else .array_elem_val, + ty_op.operand, + index_val, + ).toRef(); + const elem_uint = loop.block.addBitCast(l, elem_uint_ty, raw_elem); + const elem_extended = loop.block.addTyOp(l, .intcast, uint_ty, elem_uint).toRef(); + const old_result = loop.block.addTyOp(l, .load, uint_ty, result_ptr).toRef(); + const shifted_result = loop.block.addBinOp(l, .shl_exact, old_result, .fromValue(elem_bits_val)).toRef(); + const new_result = loop.block.addBinOp(l, .bit_or, shifted_result, elem_extended).toRef(); + + const is_end_val = loop.block.addBinOp(l, .cmp_eq, index_val, .zero_usize).toRef(); + var condbr: CondBr = .init(l, is_end_val, &loop.block, .{}); + + condbr.then_block = .init(loop.block.stealRemainingCapacity()); + condbr.then_block.addBr(l, uint_block_inst, new_result); - var loop: Loop = .init(l, &res_block); - loop.block = .init(res_block.stealRemainingCapacity()); - { - const cur_index_inst = loop.block.add(l, .{ - .tag = .load, - .data = .{ .ty_op = .{ - .ty = .usize_type, - .operand = index_alloc_inst.toRef(), - } }, - }); - _ = loop.block.add(l, .{ - .tag = .vector_store_elem, - .data = .{ .vector_store_elem = .{ - .vector_ptr = res_alloc_inst.toRef(), - .payload = try l.addExtra(Air.Bin, .{ - .lhs = cur_index_inst.toRef(), - .rhs = loop.block.addBitCast(l, res_elem_ty, loop.block.add(l, .{ - .tag = .trunc, - .data = .{ .ty_op = .{ - .ty = Air.internedToRef(elem_int_ty.toIntern()), - .operand = loop.block.add(l, .{ - .tag = .shr, - .data = .{ .bin_op = .{ - .lhs = int_ref, - .rhs = loop.block.add(l, .{ - .tag = .mul, - .data = .{ .bin_op = .{ - .lhs = loop.block.add(l, .{ - .tag = .intcast, - .data = .{ .ty_op = .{ - .ty = Air.internedToRef(shift_ty.toIntern()), - .operand = cur_index_inst.toRef(), - } }, - }).toRef(), - .rhs = try pt.intRef(shift_ty, elem_bits), - } }, - }).toRef(), - } }, - }).toRef(), - } }, - }).toRef()), - }), - } }, - }); + condbr.else_block = .init(condbr.then_block.stealRemainingCapacity()); + _ = condbr.else_block.addBinOp(l, .store, result_ptr, new_result); + const new_index_val = condbr.else_block.addBinOp(l, .sub, index_val, .one_usize).toRef(); + _ = condbr.else_block.addBinOp(l, .store, index_ptr, new_index_val); + _ = condbr.else_block.add(l, .{ + .tag = .repeat, + .data = .{ .repeat = .{ .loop_inst = loop.inst } }, + }); - var loop_cond_br: CondBr = .init(l, (try loop.block.addCmp( - l, - .lt, - cur_index_inst.toRef(), - try pt.intRef(.usize, res_ty.vectorLen(zcu) - 1), - .{}, - )).toRef(), &loop.block, .{}); - loop_cond_br.then_block = .init(loop.block.stealRemainingCapacity()); - { - _ = loop_cond_br.then_block.add(l, .{ - .tag = .store, - .data = .{ .bin_op = .{ - .lhs = index_alloc_inst.toRef(), - .rhs = loop_cond_br.then_block.add(l, .{ - .tag = .add, - .data = .{ .bin_op = .{ - .lhs = cur_index_inst.toRef(), - .rhs = .one_usize, - } }, - }).toRef(), + try condbr.finish(l); + try loop.finish(l); + + const inst_data = l.air_instructions.items(.data); + inst_data[@intFromEnum(uint_block_inst)].ty_pl.payload = try l.addBlockBody(uint_block.body()); + + break :uint_val uint_block_inst.toRef(); + }; + + // Now convert `uint_ty` (`uN`) to `dest_ty`. + + if (dest_legal) { + _ = main_block.stealCapacity(17); + const result = main_block.addBitCast(l, dest_ty, uint_val); + main_block.addBr(l, orig_inst, result); + } else { + // %1 = alloc(*usize) + // %2 = alloc(*@Vector(N, Result)) + // %3 = store(%1, @zero_usize) + // %4 = loop({ + // %5 = load(%1) + // %6 = mul(%5, ) + // %7 = intcast(uS, %6) + // %8 = shr(uint_val, %7) + // %9 = trunc(uE, %8) + // %10 = bitcast(Result, %9) + // %11 = legalize_vec_store_elem(%2, %5, %10) + // %12 = cmp_eq(%5, ) + // %13 = cond_br(%12, { + // %14 = load(%2) + // %15 = br(%0, %14) + // }, { + // %16 = add(%5, @one_usize) + // %17 = store(%1, %16) + // %18 = repeat(%4) + // }) + // }) + // + // The result might be an array, in which case `legalize_vec_store_elem` + // becomes `ptr_elem_ptr` followed by `store`. + + const elem_ty = dest_ty.childType(zcu); + const elem_bits = elem_ty.bitSize(zcu); + const elem_uint_ty = try pt.intType(.unsigned, @intCast(elem_bits)); + + const index_ptr = main_block.addTy(l, .alloc, .ptr_usize).toRef(); + const result_ptr = main_block.addTy(l, .alloc, try pt.singleMutPtrType(dest_ty)).toRef(); + _ = main_block.addBinOp(l, .store, index_ptr, .zero_usize); + + var loop: Loop = .init(l, &main_block); + loop.block = .init(main_block.stealRemainingCapacity()); + + const index_val = loop.block.addTyOp(l, .load, .usize, index_ptr).toRef(); + const bit_offset = loop.block.addBinOp(l, .mul, index_val, .fromValue(try pt.intValue(.usize, elem_bits))).toRef(); + const casted_bit_offset = loop.block.addTyOp(l, .intcast, shift_ty, bit_offset).toRef(); + const shifted_uint = loop.block.addBinOp(l, .shr, index_val, casted_bit_offset).toRef(); + const elem_uint = loop.block.addTyOp(l, .trunc, elem_uint_ty, shifted_uint).toRef(); + const elem_val = loop.block.addBitCast(l, elem_ty, elem_uint); + switch (dest_ty.zigTypeTag(zcu)) { + .array => { + const elem_ptr = loop.block.add(l, .{ + .tag = .ptr_elem_ptr, + .data = .{ .ty_pl = .{ + .ty = .fromType(try pt.singleMutPtrType(elem_ty)), + .payload = try l.addExtra(Air.Bin, .{ + .lhs = result_ptr, + .rhs = index_val, + }), + } }, + }).toRef(); + _ = loop.block.addBinOp(l, .store, elem_ptr, elem_val); + }, + .vector => { + _ = loop.block.add(l, .{ + .tag = .legalize_vec_store_elem, + .data = .{ .pl_op = .{ + .operand = result_ptr, + .payload = try l.addExtra(Air.Bin, .{ + .lhs = index_val, + .rhs = elem_val, + }), } }, }); - _ = loop_cond_br.then_block.add(l, .{ - .tag = .repeat, - .data = .{ .repeat = .{ .loop_inst = loop.inst } }, - }); - } - loop_cond_br.else_block = .init(loop_cond_br.then_block.stealRemainingCapacity()); - _ = loop_cond_br.else_block.add(l, .{ - .tag = .br, - .data = .{ .br = .{ - .block_inst = orig_inst, - .operand = loop_cond_br.else_block.add(l, .{ - .tag = .load, - .data = .{ .ty_op = .{ - .ty = Air.internedToRef(res_ty.toIntern()), - .operand = res_alloc_inst.toRef(), - } }, - }).toRef(), - } }, - }); - try loop_cond_br.finish(l); + _ = loop.block.stealCapacity(1); + }, + else => unreachable, } + + const is_end_val = loop.block.addBinOp(l, .cmp_eq, index_val, .fromValue(try pt.intValue(.usize, dest_ty.arrayLen(zcu) - 1))).toRef(); + + var condbr: CondBr = .init(l, is_end_val, &loop.block, .{}); + + condbr.then_block = .init(loop.block.stealRemainingCapacity()); + const result_val = condbr.then_block.addTyOp(l, .load, dest_ty, result_ptr).toRef(); + condbr.then_block.addBr(l, orig_inst, result_val); + + condbr.else_block = .init(condbr.then_block.stealRemainingCapacity()); + const new_index_val = condbr.else_block.addBinOp(l, .add, index_val, .one_usize).toRef(); + _ = condbr.else_block.addBinOp(l, .store, index_ptr, new_index_val); + _ = condbr.else_block.add(l, .{ + .tag = .repeat, + .data = .{ .repeat = .{ .loop_inst = loop.inst } }, + }); + + try condbr.finish(l); try loop.finish(l); } + return .{ .ty_pl = .{ - .ty = Air.internedToRef(res_ty.toIntern()), - .payload = try l.addBlockBody(res_block.body()), + .ty = .fromType(dest_ty), + .payload = try l.addBlockBody(main_block.body()), } }; } fn scalarizeOverflowBlockPayload(l: *Legalize, orig_inst: Air.Inst.Index) Error!Air.Inst.Data { @@ -1698,169 +1465,145 @@ fn scalarizeOverflowBlockPayload(l: *Legalize, orig_inst: Air.Inst.Index) Error! const zcu = pt.zcu; const orig = l.air_instructions.get(@intFromEnum(orig_inst)); - const res_ty = l.typeOfIndex(orig_inst); - const wrapped_res_ty = res_ty.fieldType(0, zcu); - const wrapped_res_scalar_ty = wrapped_res_ty.childType(zcu); - const res_len = wrapped_res_ty.vectorLen(zcu); + const orig_operands = l.extraData(Air.Bin, orig.data.ty_pl.payload).data; + + const vec_tuple_ty = l.typeOfIndex(orig_inst); + const vec_int_ty = vec_tuple_ty.fieldType(0, zcu); + const vec_overflow_ty = vec_tuple_ty.fieldType(1, zcu); + + assert(l.typeOf(orig_operands.lhs).toIntern() == vec_int_ty.toIntern()); + if (orig.tag != .shl_with_overflow) { + assert(l.typeOf(orig_operands.rhs).toIntern() == vec_int_ty.toIntern()); + } + + const scalar_int_ty = vec_int_ty.childType(zcu); + const scalar_tuple_ty = try pt.overflowArithmeticTupleType(scalar_int_ty); + + // %1 = block(struct { @Vector(N, Int), @Vector(N, u1) }, { + // %2 = alloc(*usize) + // %3 = alloc(*struct { @Vector(N, Int), @Vector(N, u1) }) + // %4 = struct_field_ptr_index_0(*@Vector(N, Int), %3) + // %5 = struct_field_ptr_index_1(*@Vector(N, u1), %3) + // %6 = store(%2, @zero_usize) + // %7 = loop({ + // %8 = load(%2) + // %9 = legalize_vec_elem_val(orig_lhs, %8) + // %10 = legalize_vec_elem_val(orig_rhs, %8) + // %11 = ???_with_overflow(struct { Int, u1 }, %9, %10) + // %12 = struct_field_val(%11, 0) + // %13 = struct_field_val(%11, 1) + // %14 = legalize_vec_store_elem(%4, %8, %12) + // %15 = legalize_vec_store_elem(%4, %8, %13) + // %16 = cmp_eq(%8, ) + // %17 = cond_br(%16, { + // %18 = load(%3) + // %19 = br(%1, %18) + // }, { + // %20 = add(%8, @one_usize) + // %21 = store(%2, %20) + // %22 = repeat(%7) + // }) + // }) + // }) + + const elems_len = vec_int_ty.vectorLen(zcu); var inst_buf: [21]Air.Inst.Index = undefined; + var main_block: Block = .init(&inst_buf); try l.air_instructions.ensureUnusedCapacity(zcu.gpa, inst_buf.len); - var res_block: Block = .init(&inst_buf); - { - const res_alloc_inst = res_block.add(l, .{ - .tag = .alloc, - .data = .{ .ty = try pt.singleMutPtrType(res_ty) }, - }); - const ptr_wrapped_res_inst = res_block.add(l, .{ - .tag = .struct_field_ptr_index_0, - .data = .{ .ty_op = .{ - .ty = Air.internedToRef((try pt.singleMutPtrType(wrapped_res_ty)).toIntern()), - .operand = res_alloc_inst.toRef(), - } }, - }); - const ptr_overflow_res_inst = res_block.add(l, .{ - .tag = .struct_field_ptr_index_1, - .data = .{ .ty_op = .{ - .ty = Air.internedToRef((try pt.singleMutPtrType(res_ty.fieldType(1, zcu))).toIntern()), - .operand = res_alloc_inst.toRef(), - } }, - }); - const index_alloc_inst = res_block.add(l, .{ - .tag = .alloc, - .data = .{ .ty = .ptr_usize }, - }); - _ = res_block.add(l, .{ - .tag = .store, - .data = .{ .bin_op = .{ - .lhs = index_alloc_inst.toRef(), - .rhs = .zero_usize, - } }, - }); + const index_ptr = main_block.addTy(l, .alloc, .ptr_usize).toRef(); + const result_ptr = main_block.addTy(l, .alloc, try pt.singleMutPtrType(vec_tuple_ty)).toRef(); + const result_int_ptr = main_block.addTyOp( + l, + .struct_field_ptr_index_0, + try pt.singleMutPtrType(vec_int_ty), + result_ptr, + ).toRef(); + const result_overflow_ptr = main_block.addTyOp( + l, + .struct_field_ptr_index_1, + try pt.singleMutPtrType(vec_overflow_ty), + result_ptr, + ).toRef(); + + _ = main_block.addBinOp(l, .store, index_ptr, .zero_usize); + + var loop: Loop = .init(l, &main_block); + loop.block = .init(main_block.stealRemainingCapacity()); + + const index_val = loop.block.addTyOp(l, .load, .usize, index_ptr).toRef(); + const lhs = loop.block.addBinOp(l, .legalize_vec_elem_val, orig_operands.lhs, index_val).toRef(); + const rhs = loop.block.addBinOp(l, .legalize_vec_elem_val, orig_operands.rhs, index_val).toRef(); + const elem_result = loop.block.add(l, .{ + .tag = orig.tag, + .data = .{ .ty_pl = .{ + .ty = .fromType(scalar_tuple_ty), + .payload = try l.addExtra(Air.Bin, .{ .lhs = lhs, .rhs = rhs }), + } }, + }).toRef(); + const int_elem = loop.block.add(l, .{ + .tag = .struct_field_val, + .data = .{ .ty_pl = .{ + .ty = .fromType(scalar_int_ty), + .payload = try l.addExtra(Air.StructField, .{ + .struct_operand = elem_result, + .field_index = 0, + }), + } }, + }).toRef(); + const overflow_elem = loop.block.add(l, .{ + .tag = .struct_field_val, + .data = .{ .ty_pl = .{ + .ty = .u1_type, + .payload = try l.addExtra(Air.StructField, .{ + .struct_operand = elem_result, + .field_index = 1, + }), + } }, + }).toRef(); + _ = loop.block.add(l, .{ + .tag = .legalize_vec_store_elem, + .data = .{ .pl_op = .{ + .operand = result_int_ptr, + .payload = try l.addExtra(Air.Bin, .{ + .lhs = index_val, + .rhs = int_elem, + }), + } }, + }); + _ = loop.block.add(l, .{ + .tag = .legalize_vec_store_elem, + .data = .{ .pl_op = .{ + .operand = result_overflow_ptr, + .payload = try l.addExtra(Air.Bin, .{ + .lhs = index_val, + .rhs = overflow_elem, + }), + } }, + }); - var loop: Loop = .init(l, &res_block); - loop.block = .init(res_block.stealRemainingCapacity()); - { - const cur_index_inst = loop.block.add(l, .{ - .tag = .load, - .data = .{ .ty_op = .{ - .ty = .usize_type, - .operand = index_alloc_inst.toRef(), - } }, - }); - const extra = l.extraData(Air.Bin, orig.data.ty_pl.payload).data; - const res_elem = loop.block.add(l, .{ - .tag = orig.tag, - .data = .{ .ty_pl = .{ - .ty = Air.internedToRef(try zcu.intern_pool.getTupleType(zcu.gpa, pt.tid, .{ - .types = &.{ wrapped_res_scalar_ty.toIntern(), .u1_type }, - .values = &(.{.none} ** 2), - })), - .payload = try l.addExtra(Air.Bin, .{ - .lhs = loop.block.add(l, .{ - .tag = .array_elem_val, - .data = .{ .bin_op = .{ - .lhs = extra.lhs, - .rhs = cur_index_inst.toRef(), - } }, - }).toRef(), - .rhs = loop.block.add(l, .{ - .tag = .array_elem_val, - .data = .{ .bin_op = .{ - .lhs = extra.rhs, - .rhs = cur_index_inst.toRef(), - } }, - }).toRef(), - }), - } }, - }); - _ = loop.block.add(l, .{ - .tag = .vector_store_elem, - .data = .{ .vector_store_elem = .{ - .vector_ptr = ptr_overflow_res_inst.toRef(), - .payload = try l.addExtra(Air.Bin, .{ - .lhs = cur_index_inst.toRef(), - .rhs = loop.block.add(l, .{ - .tag = .struct_field_val, - .data = .{ .ty_pl = .{ - .ty = .u1_type, - .payload = try l.addExtra(Air.StructField, .{ - .struct_operand = res_elem.toRef(), - .field_index = 1, - }), - } }, - }).toRef(), - }), - } }, - }); - _ = loop.block.add(l, .{ - .tag = .vector_store_elem, - .data = .{ .vector_store_elem = .{ - .vector_ptr = ptr_wrapped_res_inst.toRef(), - .payload = try l.addExtra(Air.Bin, .{ - .lhs = cur_index_inst.toRef(), - .rhs = loop.block.add(l, .{ - .tag = .struct_field_val, - .data = .{ .ty_pl = .{ - .ty = Air.internedToRef(wrapped_res_scalar_ty.toIntern()), - .payload = try l.addExtra(Air.StructField, .{ - .struct_operand = res_elem.toRef(), - .field_index = 0, - }), - } }, - }).toRef(), - }), - } }, - }); + const is_end_val = loop.block.addBinOp(l, .cmp_eq, index_val, .fromValue(try pt.intValue(.usize, elems_len - 1))).toRef(); + var condbr: CondBr = .init(l, is_end_val, &loop.block, .{}); + + condbr.then_block = .init(loop.block.stealRemainingCapacity()); + const result_val = condbr.then_block.addTyOp(l, .load, vec_tuple_ty, result_ptr).toRef(); + condbr.then_block.addBr(l, orig_inst, result_val); + + condbr.else_block = .init(condbr.then_block.stealRemainingCapacity()); + const new_index_val = condbr.else_block.addBinOp(l, .add, index_val, .one_usize).toRef(); + _ = condbr.else_block.addBinOp(l, .store, index_ptr, new_index_val); + _ = condbr.else_block.add(l, .{ + .tag = .repeat, + .data = .{ .repeat = .{ .loop_inst = loop.inst } }, + }); + + try condbr.finish(l); + try loop.finish(l); - var loop_cond_br: CondBr = .init(l, (try loop.block.addCmp( - l, - .lt, - cur_index_inst.toRef(), - try pt.intRef(.usize, res_len - 1), - .{}, - )).toRef(), &loop.block, .{}); - loop_cond_br.then_block = .init(loop.block.stealRemainingCapacity()); - { - _ = loop_cond_br.then_block.add(l, .{ - .tag = .store, - .data = .{ .bin_op = .{ - .lhs = index_alloc_inst.toRef(), - .rhs = loop_cond_br.then_block.add(l, .{ - .tag = .add, - .data = .{ .bin_op = .{ - .lhs = cur_index_inst.toRef(), - .rhs = .one_usize, - } }, - }).toRef(), - } }, - }); - _ = loop_cond_br.then_block.add(l, .{ - .tag = .repeat, - .data = .{ .repeat = .{ .loop_inst = loop.inst } }, - }); - } - loop_cond_br.else_block = .init(loop_cond_br.then_block.stealRemainingCapacity()); - _ = loop_cond_br.else_block.add(l, .{ - .tag = .br, - .data = .{ .br = .{ - .block_inst = orig_inst, - .operand = loop_cond_br.else_block.add(l, .{ - .tag = .load, - .data = .{ .ty_op = .{ - .ty = Air.internedToRef(res_ty.toIntern()), - .operand = res_alloc_inst.toRef(), - } }, - }).toRef(), - } }, - }); - try loop_cond_br.finish(l); - } - try loop.finish(l); - } return .{ .ty_pl = .{ - .ty = Air.internedToRef(res_ty.toIntern()), - .payload = try l.addBlockBody(res_block.body()), + .ty = .fromType(vec_tuple_ty), + .payload = try l.addBlockBody(main_block.body()), } }; } @@ -2047,7 +1790,7 @@ fn safeIntFromFloatBlockPayload(l: *Legalize, orig_inst: Air.Inst.Index, optimiz // We emit 9 instructions in the worst case. var inst_buf: [9]Air.Inst.Index = undefined; - try l.air_instructions.ensureUnusedCapacity(zcu.gpa, inst_buf.len); + try l.air_instructions.ensureUnusedCapacity(gpa, inst_buf.len); var main_block: Block = .init(&inst_buf); // This check is a bit annoying because of floating-point rounding and the fact that this @@ -2231,37 +1974,6 @@ fn safeArithmeticBlockPayload(l: *Legalize, orig_inst: Air.Inst.Index, overflow_ } }; } -fn expandBitcastBlockPayload(l: *Legalize, orig_inst: Air.Inst.Index) Error!Air.Inst.Data { - const pt = l.pt; - const zcu = pt.zcu; - const ip = &zcu.intern_pool; - - const orig_ty_op = l.air_instructions.items(.data)[@intFromEnum(orig_inst)].ty_op; - const res_ty = orig_ty_op.ty.toType(); - const res_ty_key = ip.indexToKey(res_ty.toIntern()); - const operand_ty = l.typeOf(orig_ty_op.operand); - const operand_ty_key = ip.indexToKey(operand_ty.toIntern()); - _ = res_ty_key; - _ = operand_ty_key; - - var inst_buf: [1]Air.Inst.Index = undefined; - try l.air_instructions.ensureUnusedCapacity(zcu.gpa, inst_buf.len); - - var res_block: Block = .init(&inst_buf); - { - _ = res_block.add(l, .{ - .tag = .br, - .data = .{ .br = .{ - .block_inst = orig_inst, - .operand = try pt.undefRef(res_ty), - } }, - }); - } - return .{ .ty_pl = .{ - .ty = Air.internedToRef(res_ty.toIntern()), - .payload = try l.addBlockBody(res_block.body()), - } }; -} fn packedLoadBlockPayload(l: *Legalize, orig_inst: Air.Inst.Index) Error!Air.Inst.Data { const pt = l.pt; const zcu = pt.zcu; @@ -2431,89 +2143,73 @@ fn packedStructFieldValBlockPayload(l: *Legalize, orig_inst: Air.Inst.Index) Err const field_ty = orig_ty_pl.ty.toType(); const agg_ty = l.typeOf(orig_extra.struct_operand); + const agg_bits: u16 = @intCast(agg_ty.bitSize(zcu)); + const bit_offset = zcu.structPackedFieldBitOffset(zcu.typeToStruct(agg_ty).?, orig_extra.field_index); + + const agg_int_ty = try pt.intType(.unsigned, agg_bits); + const field_int_ty = try pt.intType(.unsigned, @intCast(field_ty.bitSize(zcu))); + + const agg_shift_ty = try pt.intType(.unsigned, std.math.log2_int_ceil(u16, agg_bits)); + const bit_offset_ref: Air.Inst.Ref = .fromValue(try pt.intValue(agg_shift_ty, bit_offset)); + var inst_buf: [5]Air.Inst.Index = undefined; + var main_block: Block = .init(&inst_buf); try l.air_instructions.ensureUnusedCapacity(zcu.gpa, inst_buf.len); - var res_block: Block = .init(&inst_buf); - { - const agg_alloc_inst = res_block.add(l, .{ - .tag = .alloc, - .data = .{ .ty = try pt.singleMutPtrType(agg_ty) }, - }); - _ = res_block.add(l, .{ - .tag = .store, - .data = .{ .bin_op = .{ - .lhs = agg_alloc_inst.toRef(), - .rhs = orig_extra.struct_operand, - } }, - }); - _ = res_block.add(l, .{ - .tag = .br, - .data = .{ .br = .{ - .block_inst = orig_inst, - .operand = res_block.add(l, .{ - .tag = .load, - .data = .{ .ty_op = .{ - .ty = Air.internedToRef(field_ty.toIntern()), - .operand = (try res_block.addStructFieldPtr(l, agg_alloc_inst.toRef(), orig_extra.field_index)).toRef(), - } }, - }).toRef(), - } }, - }); - } + const agg_int = main_block.addBitCast(l, agg_int_ty, orig_extra.struct_operand); + const shifted_agg_int = main_block.addBinOp(l, .shr, agg_int, bit_offset_ref).toRef(); + const field_int = main_block.addTyOp(l, .trunc, field_int_ty, shifted_agg_int).toRef(); + const field_val = main_block.addBitCast(l, field_ty, field_int); + main_block.addBr(l, orig_inst, field_val); + return .{ .ty_pl = .{ - .ty = Air.internedToRef(field_ty.toIntern()), - .payload = try l.addBlockBody(res_block.body()), + .ty = .fromType(field_ty), + .payload = try l.addBlockBody(main_block.body()), } }; } fn packedAggregateInitBlockPayload(l: *Legalize, orig_inst: Air.Inst.Index) Error!Air.Inst.Data { const pt = l.pt; const zcu = pt.zcu; + const gpa = zcu.gpa; const orig_ty_pl = l.air_instructions.items(.data)[@intFromEnum(orig_inst)].ty_pl; - const field_ty = orig_ty_pl.ty.toType(); const agg_ty = orig_ty_pl.ty.toType(); const agg_field_count = agg_ty.structFieldCount(zcu); - const ExpectedContents = [1 + 2 * 32 + 2]Air.Inst.Index; - var stack align(@max(@alignOf(ExpectedContents), @alignOf(std.heap.StackFallbackAllocator(0)))) = - std.heap.stackFallback(@sizeOf(ExpectedContents), zcu.gpa); - const gpa = stack.get(); + var sfba_state = std.heap.stackFallback(@sizeOf([4 * 32 + 2]Air.Inst.Index), gpa); + const sfba = sfba_state.get(); - const inst_buf = try gpa.alloc(Air.Inst.Index, 1 + 2 * agg_field_count + 2); - defer gpa.free(inst_buf); - try l.air_instructions.ensureUnusedCapacity(zcu.gpa, inst_buf.len); + const inst_buf = try sfba.alloc(Air.Inst.Index, 4 * agg_field_count + 2); + defer sfba.free(inst_buf); - var res_block: Block = .init(inst_buf); - { - const agg_alloc_inst = res_block.add(l, .{ - .tag = .alloc, - .data = .{ .ty = try pt.singleMutPtrType(agg_ty) }, - }); - for (0..agg_field_count, orig_ty_pl.payload..) |field_index, extra_index| _ = res_block.add(l, .{ - .tag = .store, - .data = .{ .bin_op = .{ - .lhs = (try res_block.addStructFieldPtr(l, agg_alloc_inst.toRef(), field_index)).toRef(), - .rhs = @enumFromInt(l.air_extra.items[extra_index]), - } }, - }); - _ = res_block.add(l, .{ - .tag = .br, - .data = .{ .br = .{ - .block_inst = orig_inst, - .operand = res_block.add(l, .{ - .tag = .load, - .data = .{ .ty_op = .{ - .ty = Air.internedToRef(field_ty.toIntern()), - .operand = agg_alloc_inst.toRef(), - } }, - }).toRef(), - } }, - }); + var main_block: Block = .init(inst_buf); + try l.air_instructions.ensureUnusedCapacity(gpa, inst_buf.len); + + const num_bits: u16 = @intCast(agg_ty.bitSize(zcu)); + const shift_ty = try pt.intType(.unsigned, std.math.log2_int_ceil(u16, num_bits)); + const uint_ty = try pt.intType(.unsigned, num_bits); + var cur_uint: Air.Inst.Ref = .fromValue(try pt.intValue(uint_ty, 0)); + + var field_idx = agg_field_count; + while (field_idx > 0) { + field_idx -= 1; + const field_ty = agg_ty.fieldType(field_idx, zcu); + const field_uint_ty = try pt.intType(.unsigned, @intCast(field_ty.bitSize(zcu))); + const field_bit_size_ref: Air.Inst.Ref = .fromValue(try pt.intValue(shift_ty, field_ty.bitSize(zcu))); + const field_val: Air.Inst.Ref = @enumFromInt(l.air_extra.items[orig_ty_pl.payload + field_idx]); + + const shifted = main_block.addBinOp(l, .shl_exact, cur_uint, field_bit_size_ref).toRef(); + const field_as_uint = main_block.addBitCast(l, field_uint_ty, field_val); + const field_extended = main_block.addTyOp(l, .intcast, uint_ty, field_as_uint).toRef(); + cur_uint = main_block.addBinOp(l, .bit_or, shifted, field_extended).toRef(); } + + const result = main_block.addBitCast(l, agg_ty, cur_uint); + main_block.addBr(l, orig_inst, result); + return .{ .ty_pl = .{ - .ty = Air.internedToRef(field_ty.toIntern()), - .payload = try l.addBlockBody(res_block.body()), + .ty = .fromType(agg_ty), + .payload = try l.addBlockBody(main_block.body()), } }; } @@ -2571,6 +2267,36 @@ const Block = struct { b.len += 1; return inst; } + fn addBr(b: *Block, l: *Legalize, target: Air.Inst.Index, operand: Air.Inst.Ref) void { + _ = b.add(l, .{ + .tag = .br, + .data = .{ .br = .{ .block_inst = target, .operand = operand } }, + }); + } + fn addTy(b: *Block, l: *Legalize, tag: Air.Inst.Tag, ty: Type) Air.Inst.Index { + return b.add(l, .{ .tag = tag, .data = .{ .ty = ty } }); + } + fn addBinOp(b: *Block, l: *Legalize, tag: Air.Inst.Tag, lhs: Air.Inst.Ref, rhs: Air.Inst.Ref) Air.Inst.Index { + return b.add(l, .{ + .tag = tag, + .data = .{ .bin_op = .{ .lhs = lhs, .rhs = rhs } }, + }); + } + fn addUnOp(b: *Block, l: *Legalize, tag: Air.Inst.Tag, operand: Air.Inst.Ref) Air.Inst.Index { + return b.add(l, .{ + .tag = tag, + .data = .{ .un_op = operand }, + }); + } + fn addTyOp(b: *Block, l: *Legalize, tag: Air.Inst.Tag, ty: Type, operand: Air.Inst.Ref) Air.Inst.Index { + return b.add(l, .{ + .tag = tag, + .data = .{ .ty_op = .{ + .ty = .fromType(ty), + .operand = operand, + } }, + }); + } /// Adds the code to call the panic handler `panic_id`. This is usually `.call` then `.unreach`, /// but if `Zcu.Feature.panic_fn` is unsupported, we lower to `.trap` instead. @@ -2625,14 +2351,27 @@ const Block = struct { } }, }); } + return addCmpScalar(b, l, op, lhs, rhs, opts.optimized); + } + + /// Similar to `addCmp`, but for scalars only. Unlike `addCmp`, this function is + /// infallible, because it doesn't need to add entries to `extra`. + fn addCmpScalar( + b: *Block, + l: *Legalize, + op: std.math.CompareOperator, + lhs: Air.Inst.Ref, + rhs: Air.Inst.Ref, + optimized: bool, + ) Air.Inst.Index { return b.add(l, .{ .tag = switch (op) { - .lt => if (opts.optimized) .cmp_lt_optimized else .cmp_lt, - .lte => if (opts.optimized) .cmp_lte_optimized else .cmp_lte, - .eq => if (opts.optimized) .cmp_eq_optimized else .cmp_eq, - .gte => if (opts.optimized) .cmp_gte_optimized else .cmp_gte, - .gt => if (opts.optimized) .cmp_gt_optimized else .cmp_gt, - .neq => if (opts.optimized) .cmp_neq_optimized else .cmp_neq, + .lt => if (optimized) .cmp_lt_optimized else .cmp_lt, + .lte => if (optimized) .cmp_lte_optimized else .cmp_lte, + .eq => if (optimized) .cmp_eq_optimized else .cmp_eq, + .gte => if (optimized) .cmp_gte_optimized else .cmp_gte, + .gt => if (optimized) .cmp_gt_optimized else .cmp_gt, + .neq => if (optimized) .cmp_neq_optimized else .cmp_neq, }, .data = .{ .bin_op = .{ .lhs = lhs, @@ -2641,93 +2380,6 @@ const Block = struct { }); } - /// Adds a `struct_field_ptr*` instruction to `b`. This is a fairly thin wrapper around `add` - /// that selects the optimized instruction encoding to use, although it does compute the - /// proper field pointer type. - fn addStructFieldPtr( - b: *Block, - l: *Legalize, - struct_operand: Air.Inst.Ref, - field_index: usize, - ) Error!Air.Inst.Index { - const pt = l.pt; - const zcu = pt.zcu; - - const agg_ptr_ty = l.typeOf(struct_operand); - const agg_ptr_info = agg_ptr_ty.ptrInfo(zcu); - const agg_ty: Type = .fromInterned(agg_ptr_info.child); - const agg_ptr_align = switch (agg_ptr_info.flags.alignment) { - .none => agg_ty.abiAlignment(zcu), - else => |agg_ptr_align| agg_ptr_align, - }; - const agg_layout = agg_ty.containerLayout(zcu); - const field_ty = agg_ty.fieldType(field_index, zcu); - var field_ptr_info: InternPool.Key.PtrType = .{ - .child = field_ty.toIntern(), - .flags = .{ - .is_const = agg_ptr_info.flags.is_const, - .is_volatile = agg_ptr_info.flags.is_volatile, - .address_space = agg_ptr_info.flags.address_space, - }, - }; - field_ptr_info.flags.alignment = field_ptr_align: switch (agg_layout) { - .auto => agg_ty.fieldAlignment(field_index, zcu).min(agg_ptr_align), - .@"extern" => switch (agg_ty.zigTypeTag(zcu)) { - else => unreachable, - .@"struct" => .fromLog2Units(@min( - agg_ptr_align.toLog2Units(), - @ctz(agg_ty.structFieldOffset(field_index, zcu)), - )), - .@"union" => agg_ptr_align, - }, - .@"packed" => switch (agg_ty.zigTypeTag(zcu)) { - else => unreachable, - .@"struct" => { - const packed_offset = agg_ty.packedStructFieldPtrInfo(agg_ptr_ty, @intCast(field_index), pt); - field_ptr_info.packed_offset = packed_offset; - break :field_ptr_align agg_ptr_align; - }, - .@"union" => { - field_ptr_info.packed_offset = .{ - .host_size = switch (agg_ptr_info.packed_offset.host_size) { - 0 => @intCast(agg_ty.abiSize(zcu)), - else => |host_size| host_size, - }, - .bit_offset = agg_ptr_info.packed_offset.bit_offset, - }; - break :field_ptr_align agg_ptr_align; - }, - }, - }; - const field_ptr_ty = try pt.ptrType(field_ptr_info); - const field_ptr_ty_ref = Air.internedToRef(field_ptr_ty.toIntern()); - return switch (field_index) { - inline 0...3 => |ct_field_index| b.add(l, .{ - .tag = switch (ct_field_index) { - 0 => .struct_field_ptr_index_0, - 1 => .struct_field_ptr_index_1, - 2 => .struct_field_ptr_index_2, - 3 => .struct_field_ptr_index_3, - else => comptime unreachable, - }, - .data = .{ .ty_op = .{ - .ty = field_ptr_ty_ref, - .operand = struct_operand, - } }, - }), - else => b.add(l, .{ - .tag = .struct_field_ptr, - .data = .{ .ty_pl = .{ - .ty = field_ptr_ty_ref, - .payload = try l.addExtra(Air.StructField, .{ - .struct_operand = struct_operand, - .field_index = @intCast(field_index), - }), - } }, - }), - }; - } - /// Adds a `bitcast` instruction to `b`. This is a thin wrapper that omits the instruction for /// no-op casts. fn addBitCast( @@ -2774,31 +2426,6 @@ const Block = struct { } }; -const Result = struct { - inst: Air.Inst.Index, - block: Block, - - /// The return value has `block` initialized to `undefined`; it is the caller's reponsibility - /// to initialize it. - fn init(l: *Legalize, ty: Type, parent_block: *Block) Result { - return .{ - .inst = parent_block.add(l, .{ - .tag = .block, - .data = .{ .ty_pl = .{ - .ty = Air.internedToRef(ty.toIntern()), - .payload = undefined, - } }, - }), - .block = undefined, - }; - } - - fn finish(res: Result, l: *Legalize) Error!void { - const data = &l.air_instructions.items(.data)[@intFromEnum(res.inst)]; - data.ty_pl.payload = try l.addBlockBody(res.block.body()); - } -}; - const Loop = struct { inst: Air.Inst.Index, block: Block, diff --git a/src/Air/Liveness.zig b/src/Air/Liveness.zig index 58169730e880..c60ece5e4f8d 100644 --- a/src/Air/Liveness.zig +++ b/src/Air/Liveness.zig @@ -458,17 +458,12 @@ fn analyzeInst( .memset_safe, .memcpy, .memmove, + .legalize_vec_elem_val, => { const o = inst_datas[@intFromEnum(inst)].bin_op; return analyzeOperands(a, pass, data, inst, .{ o.lhs, o.rhs, .none }); }, - .vector_store_elem => { - const o = inst_datas[@intFromEnum(inst)].vector_store_elem; - const extra = a.air.extraData(Air.Bin, o.payload).data; - return analyzeOperands(a, pass, data, inst, .{ o.vector_ptr, extra.lhs, extra.rhs }); - }, - .arg, .alloc, .ret_ptr, @@ -775,6 +770,12 @@ fn analyzeInst( const pl_op = inst_datas[@intFromEnum(inst)].pl_op; return analyzeOperands(a, pass, data, inst, .{ pl_op.operand, .none, .none }); }, + + .legalize_vec_store_elem => { + const pl_op = inst_datas[@intFromEnum(inst)].pl_op; + const bin = a.air.extraData(Air.Bin, pl_op.payload).data; + return analyzeOperands(a, pass, data, inst, .{ pl_op.operand, bin.lhs, bin.rhs }); + }, } } diff --git a/src/Air/Liveness/Verify.zig b/src/Air/Liveness/Verify.zig index a1cce26a64f3..f522e1367e49 100644 --- a/src/Air/Liveness/Verify.zig +++ b/src/Air/Liveness/Verify.zig @@ -272,6 +272,7 @@ fn verifyBody(self: *Verify, body: []const Air.Inst.Index) Error!void { .memset_safe, .memcpy, .memmove, + .legalize_vec_elem_val, => { const bin_op = data[@intFromEnum(inst)].bin_op; try self.verifyInstOperands(inst, .{ bin_op.lhs, bin_op.rhs, .none }); @@ -322,11 +323,6 @@ fn verifyBody(self: *Verify, body: []const Air.Inst.Index) Error!void { const extra = self.air.extraData(Air.Bin, pl_op.payload).data; try self.verifyInstOperands(inst, .{ extra.lhs, extra.rhs, pl_op.operand }); }, - .vector_store_elem => { - const vector_store_elem = data[@intFromEnum(inst)].vector_store_elem; - const extra = self.air.extraData(Air.Bin, vector_store_elem.payload).data; - try self.verifyInstOperands(inst, .{ vector_store_elem.vector_ptr, extra.lhs, extra.rhs }); - }, .cmpxchg_strong, .cmpxchg_weak, => { @@ -582,6 +578,11 @@ fn verifyBody(self: *Verify, body: []const Air.Inst.Index) Error!void { try self.verifyInst(inst); }, + .legalize_vec_store_elem => { + const pl_op = data[@intFromEnum(inst)].pl_op; + const bin = self.air.extraData(Air.Bin, pl_op.payload).data; + try self.verifyInstOperands(inst, .{ pl_op.operand, bin.lhs, bin.rhs }); + }, } } } diff --git a/src/Air/print.zig b/src/Air/print.zig index 73cf2ed9b31e..3324055dc70a 100644 --- a/src/Air/print.zig +++ b/src/Air/print.zig @@ -171,6 +171,7 @@ const Writer = struct { .memmove, .memset, .memset_safe, + .legalize_vec_elem_val, => try w.writeBinOp(s, inst), .is_null, @@ -330,8 +331,8 @@ const Writer = struct { .shuffle_two => try w.writeShuffleTwo(s, inst), .reduce, .reduce_optimized => try w.writeReduce(s, inst), .cmp_vector, .cmp_vector_optimized => try w.writeCmpVector(s, inst), - .vector_store_elem => try w.writeVectorStoreElem(s, inst), .runtime_nav_ptr => try w.writeRuntimeNavPtr(s, inst), + .legalize_vec_store_elem => try w.writeLegalizeVecStoreElem(s, inst), .work_item_id, .work_group_size, @@ -509,6 +510,18 @@ const Writer = struct { try w.writeOperand(s, inst, 2, pl_op.operand); } + fn writeLegalizeVecStoreElem(w: *Writer, s: *std.Io.Writer, inst: Air.Inst.Index) Error!void { + const pl_op = w.air.instructions.items(.data)[@intFromEnum(inst)].pl_op; + const bin = w.air.extraData(Air.Bin, pl_op.payload).data; + + try w.writeOperand(s, inst, 0, pl_op.operand); + try s.writeAll(", "); + try w.writeOperand(s, inst, 1, bin.lhs); + try s.writeAll(", "); + try w.writeOperand(s, inst, 2, bin.rhs); + try s.writeAll(", "); + } + fn writeShuffleOne(w: *Writer, s: *std.Io.Writer, inst: Air.Inst.Index) Error!void { const unwrapped = w.air.unwrapShuffleOne(w.pt.zcu, inst); try w.writeType(s, unwrapped.result_ty); @@ -576,17 +589,6 @@ const Writer = struct { try w.writeOperand(s, inst, 1, extra.rhs); } - fn writeVectorStoreElem(w: *Writer, s: *std.Io.Writer, inst: Air.Inst.Index) Error!void { - const data = w.air.instructions.items(.data)[@intFromEnum(inst)].vector_store_elem; - const extra = w.air.extraData(Air.VectorCmp, data.payload).data; - - try w.writeOperand(s, inst, 0, data.vector_ptr); - try s.writeAll(", "); - try w.writeOperand(s, inst, 1, extra.lhs); - try s.writeAll(", "); - try w.writeOperand(s, inst, 2, extra.rhs); - } - fn writeRuntimeNavPtr(w: *Writer, s: *std.Io.Writer, inst: Air.Inst.Index) Error!void { const ip = &w.pt.zcu.intern_pool; const ty_nav = w.air.instructions.items(.data)[@intFromEnum(inst)].ty_nav; diff --git a/src/Air/types_resolved.zig b/src/Air/types_resolved.zig index 44669b82df87..d90550982d83 100644 --- a/src/Air/types_resolved.zig +++ b/src/Air/types_resolved.zig @@ -88,6 +88,7 @@ fn checkBody(air: Air, body: []const Air.Inst.Index, zcu: *Zcu) bool { .atomic_store_monotonic, .atomic_store_release, .atomic_store_seq_cst, + .legalize_vec_elem_val, => { if (!checkRef(data.bin_op.lhs, zcu)) return false; if (!checkRef(data.bin_op.rhs, zcu)) return false; @@ -316,19 +317,13 @@ fn checkBody(air: Air, body: []const Air.Inst.Index, zcu: *Zcu) bool { if (!checkRef(data.prefetch.ptr, zcu)) return false; }, - .vector_store_elem => { - const bin = air.extraData(Air.Bin, data.vector_store_elem.payload).data; - if (!checkRef(data.vector_store_elem.vector_ptr, zcu)) return false; - if (!checkRef(bin.lhs, zcu)) return false; - if (!checkRef(bin.rhs, zcu)) return false; - }, - .runtime_nav_ptr => { if (!checkType(.fromInterned(data.ty_nav.ty), zcu)) return false; }, .select, .mul_add, + .legalize_vec_store_elem, => { const bin = air.extraData(Air.Bin, data.pl_op.payload).data; if (!checkRef(data.pl_op.operand, zcu)) return false; diff --git a/src/InternPool.zig b/src/InternPool.zig index 3bee7a3f941d..e53caf382f8d 100644 --- a/src/InternPool.zig +++ b/src/InternPool.zig @@ -2104,7 +2104,6 @@ pub const Key = union(enum) { pub const VectorIndex = enum(u16) { none = std.math.maxInt(u16), - runtime = std.math.maxInt(u16) - 1, _, }; @@ -3739,10 +3738,8 @@ pub const LoadedStructType = struct { return s.field_inits.get(ip)[i]; } - /// Returns `none` in the case the struct is a tuple. - pub fn fieldName(s: LoadedStructType, ip: *const InternPool, i: usize) OptionalNullTerminatedString { - if (s.field_names.len == 0) return .none; - return s.field_names.get(ip)[i].toOptional(); + pub fn fieldName(s: LoadedStructType, ip: *const InternPool, i: usize) NullTerminatedString { + return s.field_names.get(ip)[i]; } pub fn fieldIsComptime(s: LoadedStructType, ip: *const InternPool, i: usize) bool { diff --git a/src/Sema.zig b/src/Sema.zig index af342a0d046d..7974b4791388 100644 --- a/src/Sema.zig +++ b/src/Sema.zig @@ -15919,24 +15919,30 @@ fn zirOverflowArithmetic( }, .mul_with_overflow => { // If either of the arguments is zero, the result is zero and no overflow occured. - // If either of the arguments is one, the result is the other and no overflow occured. - // Otherwise, if either of the arguments is undefined, both results are undefined. - const scalar_one = try pt.intValue(dest_ty.scalarType(zcu), 1); if (maybe_lhs_val) |lhs_val| { - if (!lhs_val.isUndef(zcu)) { - if (try lhs_val.compareAllWithZeroSema(.eq, pt)) { - break :result .{ .overflow_bit = try sema.splat(overflow_ty, .zero_u1), .inst = lhs }; - } else if (try sema.compareAll(lhs_val, .eq, try sema.splat(dest_ty, scalar_one), dest_ty)) { - break :result .{ .overflow_bit = try sema.splat(overflow_ty, .zero_u1), .inst = rhs }; - } + if (!lhs_val.isUndef(zcu) and try lhs_val.compareAllWithZeroSema(.eq, pt)) { + break :result .{ .overflow_bit = try sema.splat(overflow_ty, .zero_u1), .inst = lhs }; } } - if (maybe_rhs_val) |rhs_val| { - if (!rhs_val.isUndef(zcu)) { - if (try rhs_val.compareAllWithZeroSema(.eq, pt)) { + if (!rhs_val.isUndef(zcu) and try rhs_val.compareAllWithZeroSema(.eq, pt)) { + break :result .{ .overflow_bit = try sema.splat(overflow_ty, .zero_u1), .inst = rhs }; + } + } + // If either of the arguments is one, the result is the other and no overflow occured. + const dest_scalar_ty = dest_ty.scalarType(zcu); + const dest_scalar_int = dest_scalar_ty.intInfo(zcu); + // We could still be working with i1, where '1' is not a legal value! + if (!(dest_scalar_int.bits == 1 and dest_scalar_int.signedness == .signed)) { + const scalar_one = try pt.intValue(dest_scalar_ty, 1); + const vec_one = try sema.splat(dest_ty, scalar_one); + if (maybe_lhs_val) |lhs_val| { + if (!lhs_val.isUndef(zcu) and try sema.compareAll(lhs_val, .eq, vec_one, dest_ty)) { break :result .{ .overflow_bit = try sema.splat(overflow_ty, .zero_u1), .inst = rhs }; - } else if (try sema.compareAll(rhs_val, .eq, try sema.splat(dest_ty, scalar_one), dest_ty)) { + } + } + if (maybe_rhs_val) |rhs_val| { + if (!rhs_val.isUndef(zcu) and try sema.compareAll(rhs_val, .eq, vec_one, dest_ty)) { break :result .{ .overflow_bit = try sema.splat(overflow_ty, .zero_u1), .inst = lhs }; } } @@ -15947,7 +15953,6 @@ fn zirOverflowArithmetic( if (lhs_val.isUndef(zcu) or rhs_val.isUndef(zcu)) { break :result .{ .overflow_bit = .undef, .wrapped = .undef }; } - const result = try arith.mulWithOverflow(sema, dest_ty, lhs_val, rhs_val); break :result .{ .overflow_bit = result.overflow_bit, .wrapped = result.wrapped_result }; } @@ -17751,10 +17756,7 @@ fn zirTypeInfo(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError!Ai try ty.resolveStructFieldInits(pt); for (struct_field_vals, 0..) |*field_val, field_index| { - const field_name = if (struct_type.fieldName(ip, field_index).unwrap()) |field_name| - field_name - else - try ip.getOrPutStringFmt(gpa, pt.tid, "{d}", .{field_index}, .no_embedded_nulls); + const field_name = struct_type.fieldName(ip, field_index); const field_name_len = field_name.length(ip); const field_ty: Type = .fromInterned(struct_type.field_types.get(ip)[field_index]); const field_init = struct_type.fieldInit(ip, field_index); @@ -28347,6 +28349,10 @@ fn elemPtrArray( break :o index; } else null; + if (offset == null and array_ty.zigTypeTag(zcu) == .vector) { + return sema.fail(block, elem_index_src, "vector index not comptime known", .{}); + } + const elem_ptr_ty = try array_ptr_ty.elemPtrType(offset, pt); if (maybe_undef_array_ptr_val) |array_ptr_val| { @@ -28364,10 +28370,6 @@ fn elemPtrArray( try sema.validateRuntimeValue(block, array_ptr_src, array_ptr); } - if (offset == null and array_ty.zigTypeTag(zcu) == .vector) { - return sema.fail(block, elem_index_src, "vector index not comptime known", .{}); - } - // Runtime check is only needed if unable to comptime check. if (oob_safety and block.wantSafety() and offset == null) { const len_inst = try pt.intRef(.usize, array_len); @@ -30399,22 +30401,6 @@ fn storePtr2( const is_ret = air_tag == .ret_ptr; - // Detect if we are storing an array operand to a bitcasted vector pointer. - // If so, we instead reach through the bitcasted pointer to the vector pointer, - // bitcast the array operand to a vector, and then lower this as a store of - // a vector value to a vector pointer. This generally results in better code, - // as well as working around an LLVM bug: - // https://github.com/ziglang/zig/issues/11154 - if (sema.obtainBitCastedVectorPtr(ptr)) |vector_ptr| { - const vector_ty = sema.typeOf(vector_ptr).childType(zcu); - const vector = sema.coerceExtra(block, vector_ty, uncasted_operand, operand_src, .{ .is_ret = is_ret }) catch |err| switch (err) { - error.NotCoercible => unreachable, - else => |e| return e, - }; - try sema.storePtr2(block, src, vector_ptr, ptr_src, vector, operand_src, .store); - return; - } - const operand = sema.coerceExtra(block, elem_ty, uncasted_operand, operand_src, .{ .is_ret = is_ret }) catch |err| switch (err) { error.NotCoercible => unreachable, else => |e| return e, @@ -30447,29 +30433,6 @@ fn storePtr2( try sema.requireRuntimeBlock(block, src, runtime_src); - if (ptr_ty.ptrInfo(zcu).flags.vector_index == .runtime) { - const ptr_inst = ptr.toIndex().?; - const air_tags = sema.air_instructions.items(.tag); - if (air_tags[@intFromEnum(ptr_inst)] == .ptr_elem_ptr) { - const ty_pl = sema.air_instructions.items(.data)[@intFromEnum(ptr_inst)].ty_pl; - const bin_op = sema.getTmpAir().extraData(Air.Bin, ty_pl.payload).data; - _ = try block.addInst(.{ - .tag = .vector_store_elem, - .data = .{ .vector_store_elem = .{ - .vector_ptr = bin_op.lhs, - .payload = try block.sema.addExtra(Air.Bin{ - .lhs = bin_op.rhs, - .rhs = operand, - }), - } }, - }); - return; - } - return sema.fail(block, ptr_src, "unable to determine vector element index of type '{f}'", .{ - ptr_ty.fmt(pt), - }); - } - const store_inst = if (is_ret) try block.addBinOp(.store, ptr, operand) else @@ -30569,37 +30532,6 @@ fn markMaybeComptimeAllocRuntime(sema: *Sema, block: *Block, alloc_inst: Air.Ins } } -/// Traverse an arbitrary number of bitcasted pointers and return the underyling vector -/// pointer. Only if the final element type matches the vector element type, and the -/// lengths match. -fn obtainBitCastedVectorPtr(sema: *Sema, ptr: Air.Inst.Ref) ?Air.Inst.Ref { - const pt = sema.pt; - const zcu = pt.zcu; - const array_ty = sema.typeOf(ptr).childType(zcu); - if (array_ty.zigTypeTag(zcu) != .array) return null; - var ptr_ref = ptr; - var ptr_inst = ptr_ref.toIndex() orelse return null; - const air_datas = sema.air_instructions.items(.data); - const air_tags = sema.air_instructions.items(.tag); - const vector_ty = while (air_tags[@intFromEnum(ptr_inst)] == .bitcast) { - ptr_ref = air_datas[@intFromEnum(ptr_inst)].ty_op.operand; - if (!sema.isKnownZigType(ptr_ref, .pointer)) return null; - const child_ty = sema.typeOf(ptr_ref).childType(zcu); - if (child_ty.zigTypeTag(zcu) == .vector) break child_ty; - ptr_inst = ptr_ref.toIndex() orelse return null; - } else return null; - - // We have a pointer-to-array and a pointer-to-vector. If the elements and - // lengths match, return the result. - if (array_ty.childType(zcu).eql(vector_ty.childType(zcu), zcu) and - array_ty.arrayLen(zcu) == vector_ty.vectorLen(zcu)) - { - return ptr_ref; - } else { - return null; - } -} - /// Call when you have Value objects rather than Air instructions, and you want to /// assert the store must be done at comptime. fn storePtrVal( @@ -35579,8 +35511,13 @@ fn structFieldInits( const default_val = try sema.resolveConstValue(&block_scope, init_src, coerced, null); if (default_val.canMutateComptimeVarState(zcu)) { - const field_name = struct_type.fieldName(ip, field_i).unwrap().?; - return sema.failWithContainsReferenceToComptimeVar(&block_scope, init_src, field_name, "field default value", default_val); + return sema.failWithContainsReferenceToComptimeVar( + &block_scope, + init_src, + struct_type.fieldName(ip, field_i), + "field default value", + default_val, + ); } struct_type.field_inits.get(ip)[field_i] = default_val.toIntern(); } diff --git a/src/Sema/comptime_ptr_access.zig b/src/Sema/comptime_ptr_access.zig index 9441f8cf72a1..4e101ecd0f96 100644 --- a/src/Sema/comptime_ptr_access.zig +++ b/src/Sema/comptime_ptr_access.zig @@ -24,7 +24,6 @@ pub fn loadComptimePtr(sema: *Sema, block: *Block, src: LazySrcLoc, ptr: Value) const child_bits = Type.fromInterned(ptr_info.child).bitSize(zcu); const bit_offset = ptr_info.packed_offset.bit_offset + switch (ptr_info.flags.vector_index) { .none => 0, - .runtime => return .runtime_load, else => |idx| switch (pt.zcu.getTarget().cpu.arch.endian()) { .little => child_bits * @intFromEnum(idx), .big => host_bits - child_bits * (@intFromEnum(idx) + 1), // element order reversed on big endian @@ -81,7 +80,6 @@ pub fn storeComptimePtr( }; const bit_offset = ptr_info.packed_offset.bit_offset + switch (ptr_info.flags.vector_index) { .none => 0, - .runtime => return .runtime_store, else => |idx| switch (zcu.getTarget().cpu.arch.endian()) { .little => Type.fromInterned(ptr_info.child).bitSize(zcu) * @intFromEnum(idx), .big => host_bits - Type.fromInterned(ptr_info.child).bitSize(zcu) * (@intFromEnum(idx) + 1), // element order reversed on big endian diff --git a/src/Type.zig b/src/Type.zig index f3f5c9949179..b111650e3454 100644 --- a/src/Type.zig +++ b/src/Type.zig @@ -198,9 +198,7 @@ pub fn print(ty: Type, writer: *std.Io.Writer, pt: Zcu.PerThread) std.Io.Writer. info.packed_offset.bit_offset, info.packed_offset.host_size, }); } - if (info.flags.vector_index == .runtime) { - try writer.writeAll(":?"); - } else if (info.flags.vector_index != .none) { + if (info.flags.vector_index != .none) { try writer.print(":{d}", .{@intFromEnum(info.flags.vector_index)}); } try writer.writeAll(") "); @@ -3113,7 +3111,7 @@ pub fn enumTagFieldIndex(ty: Type, enum_tag: Value, zcu: *const Zcu) ?u32 { pub fn structFieldName(ty: Type, index: usize, zcu: *const Zcu) InternPool.OptionalNullTerminatedString { const ip = &zcu.intern_pool; return switch (ip.indexToKey(ty.toIntern())) { - .struct_type => ip.loadStructType(ty.toIntern()).fieldName(ip, index), + .struct_type => ip.loadStructType(ty.toIntern()).fieldName(ip, index).toOptional(), .tuple_type => .none, else => unreachable, }; @@ -3558,7 +3556,7 @@ pub fn packedStructFieldPtrInfo( } else .{ switch (zcu.comp.getZigBackend()) { else => (running_bits + 7) / 8, - .stage2_x86_64 => @intCast(struct_ty.abiSize(zcu)), + .stage2_x86_64, .stage2_c => @intCast(struct_ty.abiSize(zcu)), }, bit_offset, }; @@ -3985,7 +3983,7 @@ pub fn elemPtrType(ptr_ty: Type, offset: ?usize, pt: Zcu.PerThread) !Type { break :blk .{ .host_size = @intCast(parent_ty.arrayLen(zcu)), .alignment = parent_ty.abiAlignment(zcu), - .vector_index = if (offset) |some| @enumFromInt(some) else .runtime, + .vector_index = @enumFromInt(offset.?), }; } else .{}; diff --git a/src/Value.zig b/src/Value.zig index b72ee2f78958..9ced6f107473 100644 --- a/src/Value.zig +++ b/src/Value.zig @@ -574,166 +574,37 @@ pub fn writeToPackedMemory( } } -/// Load a Value from the contents of `buffer`. +/// Load a Value from the contents of `buffer`, where `ty` is an unsigned integer type. /// /// Asserts that buffer.len >= ty.abiSize(). The buffer is allowed to extend past /// the end of the value in memory. -pub fn readFromMemory( +pub fn readUintFromMemory( ty: Type, pt: Zcu.PerThread, buffer: []const u8, arena: Allocator, -) error{ - IllDefinedMemoryLayout, - Unimplemented, - OutOfMemory, -}!Value { +) Allocator.Error!Value { const zcu = pt.zcu; - const ip = &zcu.intern_pool; - const target = zcu.getTarget(); - const endian = target.cpu.arch.endian(); - switch (ty.zigTypeTag(zcu)) { - .void => return Value.void, - .bool => { - if (buffer[0] == 0) { - return Value.false; - } else { - return Value.true; - } - }, - .int, .@"enum" => |ty_tag| { - const int_ty = switch (ty_tag) { - .int => ty, - .@"enum" => ty.intTagType(zcu), - else => unreachable, - }; - const int_info = int_ty.intInfo(zcu); - const bits = int_info.bits; - const byte_count: u16 = @intCast((@as(u17, bits) + 7) / 8); - if (bits == 0 or buffer.len == 0) return zcu.getCoerced(try zcu.intValue(int_ty, 0), ty); + const endian = zcu.getTarget().cpu.arch.endian(); - if (bits <= 64) switch (int_info.signedness) { // Fast path for integers <= u64 - .signed => { - const val = std.mem.readVarInt(i64, buffer[0..byte_count], endian); - const result = (val << @as(u6, @intCast(64 - bits))) >> @as(u6, @intCast(64 - bits)); - return zcu.getCoerced(try zcu.intValue(int_ty, result), ty); - }, - .unsigned => { - const val = std.mem.readVarInt(u64, buffer[0..byte_count], endian); - const result = (val << @as(u6, @intCast(64 - bits))) >> @as(u6, @intCast(64 - bits)); - return zcu.getCoerced(try zcu.intValue(int_ty, result), ty); - }, - } else { // Slow path, we have to construct a big-int - const Limb = std.math.big.Limb; - const limb_count = (byte_count + @sizeOf(Limb) - 1) / @sizeOf(Limb); - const limbs_buffer = try arena.alloc(Limb, limb_count); - - var bigint = BigIntMutable.init(limbs_buffer, 0); - bigint.readTwosComplement(buffer[0..byte_count], bits, endian, int_info.signedness); - return zcu.getCoerced(try zcu.intValue_big(int_ty, bigint.toConst()), ty); - } - }, - .float => return Value.fromInterned(try pt.intern(.{ .float = .{ - .ty = ty.toIntern(), - .storage = switch (ty.floatBits(target)) { - 16 => .{ .f16 = @bitCast(std.mem.readInt(u16, buffer[0..2], endian)) }, - 32 => .{ .f32 = @bitCast(std.mem.readInt(u32, buffer[0..4], endian)) }, - 64 => .{ .f64 = @bitCast(std.mem.readInt(u64, buffer[0..8], endian)) }, - 80 => .{ .f80 = @bitCast(std.mem.readInt(u80, buffer[0..10], endian)) }, - 128 => .{ .f128 = @bitCast(std.mem.readInt(u128, buffer[0..16], endian)) }, - else => unreachable, - }, - } })), - .array => { - const elem_ty = ty.childType(zcu); - const elem_size = elem_ty.abiSize(zcu); - const elems = try arena.alloc(InternPool.Index, @intCast(ty.arrayLen(zcu))); - var offset: usize = 0; - for (elems) |*elem| { - elem.* = (try readFromMemory(elem_ty, zcu, buffer[offset..], arena)).toIntern(); - offset += @intCast(elem_size); - } - return pt.aggregateValue(ty, elems); - }, - .vector => { - // We use byte_count instead of abi_size here, so that any padding bytes - // follow the data bytes, on both big- and little-endian systems. - const byte_count = (@as(usize, @intCast(ty.bitSize(zcu))) + 7) / 8; - return readFromPackedMemory(ty, zcu, buffer[0..byte_count], 0, arena); - }, - .@"struct" => { - const struct_type = zcu.typeToStruct(ty).?; - switch (struct_type.layout) { - .auto => unreachable, // Sema is supposed to have emitted a compile error already - .@"extern" => { - const field_types = struct_type.field_types; - const field_vals = try arena.alloc(InternPool.Index, field_types.len); - for (field_vals, 0..) |*field_val, i| { - const field_ty = Type.fromInterned(field_types.get(ip)[i]); - const off: usize = @intCast(ty.structFieldOffset(i, zcu)); - const sz: usize = @intCast(field_ty.abiSize(zcu)); - field_val.* = (try readFromMemory(field_ty, zcu, buffer[off..(off + sz)], arena)).toIntern(); - } - return pt.aggregateValue(ty, field_vals); - }, - .@"packed" => { - const byte_count = (@as(usize, @intCast(ty.bitSize(zcu))) + 7) / 8; - return readFromPackedMemory(ty, zcu, buffer[0..byte_count], 0, arena); - }, - } - }, - .error_set => { - const bits = zcu.errorSetBits(); - const byte_count: u16 = @intCast((@as(u17, bits) + 7) / 8); - const int = std.mem.readVarInt(u64, buffer[0..byte_count], endian); - const index = (int << @as(u6, @intCast(64 - bits))) >> @as(u6, @intCast(64 - bits)); - const name = zcu.global_error_set.keys()[@intCast(index)]; + assert(ty.isUnsignedInt(zcu)); + const bits = ty.intInfo(zcu).bits; + const byte_count: u16 = @intCast((@as(u17, bits) + 7) / 8); - return Value.fromInterned(try pt.intern(.{ .err = .{ - .ty = ty.toIntern(), - .name = name, - } })); - }, - .@"union" => switch (ty.containerLayout(zcu)) { - .auto => return error.IllDefinedMemoryLayout, - .@"extern" => { - const union_size = ty.abiSize(zcu); - const array_ty = try zcu.arrayType(.{ .len = union_size, .child = .u8_type }); - const val = (try readFromMemory(array_ty, zcu, buffer, arena)).toIntern(); - return Value.fromInterned(try pt.internUnion(.{ - .ty = ty.toIntern(), - .tag = .none, - .val = val, - })); - }, - .@"packed" => { - const byte_count = (@as(usize, @intCast(ty.bitSize(zcu))) + 7) / 8; - return readFromPackedMemory(ty, zcu, buffer[0..byte_count], 0, arena); - }, - }, - .pointer => { - assert(!ty.isSlice(zcu)); // No well defined layout. - const int_val = try readFromMemory(Type.usize, zcu, buffer, arena); - return Value.fromInterned(try pt.intern(.{ .ptr = .{ - .ty = ty.toIntern(), - .base_addr = .int, - .byte_offset = int_val.toUnsignedInt(zcu), - } })); - }, - .optional => { - assert(ty.isPtrLikeOptional(zcu)); - const child_ty = ty.optionalChild(zcu); - const child_val = try readFromMemory(child_ty, zcu, buffer, arena); - return Value.fromInterned(try pt.intern(.{ .opt = .{ - .ty = ty.toIntern(), - .val = switch (child_val.orderAgainstZero(pt)) { - .lt => unreachable, - .eq => .none, - .gt => child_val.toIntern(), - }, - } })); - }, - else => return error.Unimplemented, + assert(buffer.len >= byte_count); + + if (bits <= 64) { + const val = std.mem.readVarInt(u64, buffer[0..byte_count], endian); + const result = (val << @as(u6, @intCast(64 - bits))) >> @as(u6, @intCast(64 - bits)); + return pt.intValue(ty, result); + } else { + const Limb = std.math.big.Limb; + const limb_count = (byte_count + @sizeOf(Limb) - 1) / @sizeOf(Limb); + const limbs_buffer = try arena.alloc(Limb, limb_count); + + var bigint: BigIntMutable = .init(limbs_buffer, 0); + bigint.readTwosComplement(buffer[0..byte_count], bits, endian, .unsigned); + return pt.intValue_big(ty, bigint.toConst()); } } diff --git a/src/Zcu/PerThread.zig b/src/Zcu/PerThread.zig index 20aaa3d3c258..41b5a32f6e25 100644 --- a/src/Zcu/PerThread.zig +++ b/src/Zcu/PerThread.zig @@ -3512,7 +3512,6 @@ pub fn ptrType(pt: Zcu.PerThread, info: InternPool.Key.PtrType) Allocator.Error! canon_info.packed_offset.host_size = 0; } }, - .runtime => {}, _ => assert(@intFromEnum(info.flags.vector_index) < info.packed_offset.host_size), } @@ -3663,21 +3662,40 @@ pub fn intRef(pt: Zcu.PerThread, ty: Type, x: anytype) Allocator.Error!Air.Inst. } pub fn intValue_big(pt: Zcu.PerThread, ty: Type, x: BigIntConst) Allocator.Error!Value { - return Value.fromInterned(try pt.intern(.{ .int = .{ + if (ty.toIntern() != .comptime_int_type) { + const int_info = ty.intInfo(pt.zcu); + assert(x.fitsInTwosComp(int_info.signedness, int_info.bits)); + } + return .fromInterned(try pt.intern(.{ .int = .{ .ty = ty.toIntern(), .storage = .{ .big_int = x }, } })); } pub fn intValue_u64(pt: Zcu.PerThread, ty: Type, x: u64) Allocator.Error!Value { - return Value.fromInterned(try pt.intern(.{ .int = .{ + if (ty.toIntern() != .comptime_int_type and x != 0) { + const int_info = ty.intInfo(pt.zcu); + const unsigned_bits = int_info.bits - @intFromBool(int_info.signedness == .signed); + assert(unsigned_bits >= std.math.log2(x) + 1); + } + return .fromInterned(try pt.intern(.{ .int = .{ .ty = ty.toIntern(), .storage = .{ .u64 = x }, } })); } pub fn intValue_i64(pt: Zcu.PerThread, ty: Type, x: i64) Allocator.Error!Value { - return Value.fromInterned(try pt.intern(.{ .int = .{ + if (ty.toIntern() != .comptime_int_type and x != 0) { + const int_info = ty.intInfo(pt.zcu); + const unsigned_bits = int_info.bits - @intFromBool(int_info.signedness == .signed); + if (x > 0) { + assert(unsigned_bits >= std.math.log2(x) + 1); + } else { + assert(int_info.signedness == .signed); + assert(unsigned_bits >= std.math.log2_int_ceil(u64, @abs(x))); + } + } + return .fromInterned(try pt.intern(.{ .int = .{ .ty = ty.toIntern(), .storage = .{ .i64 = x }, } })); diff --git a/src/codegen/aarch64/Select.zig b/src/codegen/aarch64/Select.zig index 4fe798271fd9..64aeeb7ff48b 100644 --- a/src/codegen/aarch64/Select.zig +++ b/src/codegen/aarch64/Select.zig @@ -134,6 +134,10 @@ pub fn analyze(isel: *Select, air_body: []const Air.Inst.Index) !void { var air_inst_index = air_body[air_body_index]; const initial_def_order_len = isel.def_order.count(); air_tag: switch (air_tags[@intFromEnum(air_inst_index)]) { + // No "scalarize" legalizations are enabled, so these instructions never appear. + .legalize_vec_elem_val => unreachable, + .legalize_vec_store_elem => unreachable, + .arg, .ret_addr, .frame_addr, @@ -826,18 +830,6 @@ pub fn analyze(isel: *Select, air_body: []const Air.Inst.Index) !void { try isel.analyzeUse(un_op); - air_body_index += 1; - air_inst_index = air_body[air_body_index]; - continue :air_tag air_tags[@intFromEnum(air_inst_index)]; - }, - .vector_store_elem => { - const vector_store_elem = air_data[@intFromEnum(air_inst_index)].vector_store_elem; - const bin_op = isel.air.extraData(Air.Bin, vector_store_elem.payload).data; - - try isel.analyzeUse(vector_store_elem.vector_ptr); - try isel.analyzeUse(bin_op.lhs); - try isel.analyzeUse(bin_op.rhs); - air_body_index += 1; air_inst_index = air_body[air_body_index]; continue :air_tag air_tags[@intFromEnum(air_inst_index)]; @@ -962,6 +954,11 @@ pub fn body(isel: *Select, air_body: []const Air.Inst.Index) error{ OutOfMemory, }; air_tag: switch (air.next().?) { else => |air_tag| return isel.fail("unimplemented {t}", .{air_tag}), + + // No "scalarize" legalizations are enabled, so these instructions never appear. + .legalize_vec_elem_val => unreachable, + .legalize_vec_store_elem => unreachable, + .arg => { const arg_vi = isel.live_values.fetchRemove(air.inst_index).?.value; defer arg_vi.deref(isel); diff --git a/src/codegen/c.zig b/src/codegen/c.zig index 7341a9fd0bdf..e3b33beb146c 100644 --- a/src/codegen/c.zig +++ b/src/codegen/c.zig @@ -37,6 +37,7 @@ pub fn legalizeFeatures(_: *const std.Target) ?*const Air.Legalize.Features { .expand_packed_load = true, .expand_packed_store = true, .expand_packed_struct_field_val = true, + .expand_packed_aggregate_init = true, }), }; } @@ -1392,114 +1393,21 @@ pub const DeclGen = struct { try w.writeByte('}'); }, .@"packed" => { - const int_info = ty.intInfo(zcu); - - const bits = Type.smallestUnsignedBits(int_info.bits - 1); - const bit_offset_ty = try pt.intType(.unsigned, bits); - - var bit_offset: u64 = 0; - var eff_num_fields: usize = 0; - - for (0..loaded_struct.field_types.len) |field_index| { - const field_ty: Type = .fromInterned(loaded_struct.field_types.get(ip)[field_index]); - if (!field_ty.hasRuntimeBitsIgnoreComptime(zcu)) continue; - eff_num_fields += 1; - } - - if (eff_num_fields == 0) { - try w.writeByte('('); - try dg.renderUndefValue(w, ty, location); - try w.writeByte(')'); - } else if (ty.bitSize(zcu) > 64) { - // zig_or_u128(zig_or_u128(zig_shl_u128(a, a_off), zig_shl_u128(b, b_off)), zig_shl_u128(c, c_off)) - var num_or = eff_num_fields - 1; - while (num_or > 0) : (num_or -= 1) { - try w.writeAll("zig_or_"); - try dg.renderTypeForBuiltinFnName(w, ty); - try w.writeByte('('); - } - - var eff_index: usize = 0; - var needs_closing_paren = false; - for (0..loaded_struct.field_types.len) |field_index| { - const field_ty: Type = .fromInterned(loaded_struct.field_types.get(ip)[field_index]); - if (!field_ty.hasRuntimeBitsIgnoreComptime(zcu)) continue; - - const field_val = switch (ip.indexToKey(val.toIntern()).aggregate.storage) { - .bytes => |bytes| try pt.intern(.{ .int = .{ - .ty = field_ty.toIntern(), - .storage = .{ .u64 = bytes.at(field_index, ip) }, - } }), - .elems => |elems| elems[field_index], - .repeated_elem => |elem| elem, - }; - const cast_context = IntCastContext{ .value = .{ .value = Value.fromInterned(field_val) } }; - if (bit_offset != 0) { - try w.writeAll("zig_shl_"); - try dg.renderTypeForBuiltinFnName(w, ty); - try w.writeByte('('); - try dg.renderIntCast(w, ty, cast_context, field_ty, .FunctionArgument); - try w.writeAll(", "); - try dg.renderValue(w, try pt.intValue(bit_offset_ty, bit_offset), .FunctionArgument); - try w.writeByte(')'); - } else { - try dg.renderIntCast(w, ty, cast_context, field_ty, .FunctionArgument); - } - - if (needs_closing_paren) try w.writeByte(')'); - if (eff_index != eff_num_fields - 1) try w.writeAll(", "); - - bit_offset += field_ty.bitSize(zcu); - needs_closing_paren = true; - eff_index += 1; - } - } else { - try w.writeByte('('); - // a << a_off | b << b_off | c << c_off - var empty = true; - for (0..loaded_struct.field_types.len) |field_index| { - const field_ty: Type = .fromInterned(loaded_struct.field_types.get(ip)[field_index]); - if (!field_ty.hasRuntimeBitsIgnoreComptime(zcu)) continue; - - if (!empty) try w.writeAll(" | "); - try w.writeByte('('); - try dg.renderCType(w, ctype); - try w.writeByte(')'); - - const field_val = switch (ip.indexToKey(val.toIntern()).aggregate.storage) { - .bytes => |bytes| try pt.intern(.{ .int = .{ - .ty = field_ty.toIntern(), - .storage = .{ .u64 = bytes.at(field_index, ip) }, - } }), - .elems => |elems| elems[field_index], - .repeated_elem => |elem| elem, - }; - - const field_int_info: std.builtin.Type.Int = if (field_ty.isAbiInt(zcu)) - field_ty.intInfo(zcu) - else - .{ .signedness = .unsigned, .bits = undefined }; - switch (field_int_info.signedness) { - .signed => { - try w.writeByte('('); - try dg.renderValue(w, Value.fromInterned(field_val), .Other); - try w.writeAll(" & "); - const field_uint_ty = try pt.intType(.unsigned, field_int_info.bits); - try dg.renderValue(w, try field_uint_ty.maxIntScalar(pt, field_uint_ty), .Other); - try w.writeByte(')'); - }, - .unsigned => try dg.renderValue(w, Value.fromInterned(field_val), .Other), - } - if (bit_offset != 0) { - try w.writeAll(" << "); - try dg.renderValue(w, try pt.intValue(bit_offset_ty, bit_offset), .FunctionArgument); - } - - bit_offset += field_ty.bitSize(zcu); - empty = false; - } - try w.writeByte(')'); - } + // https://github.com/ziglang/zig/issues/24657 will eliminate most of the + // following logic, leaving only the recursive `renderValue` call. Once + // that proposal is implemented, a `packed struct` will literally be + // represented in the InternPool by its comptime-known backing integer. + var arena: std.heap.ArenaAllocator = .init(zcu.gpa); + defer arena.deinit(); + const backing_ty: Type = .fromInterned(loaded_struct.backingIntTypeUnordered(ip)); + const buf = try arena.allocator().alloc(u8, @intCast(ty.abiSize(zcu))); + val.writeToMemory(pt, buf) catch |err| switch (err) { + error.IllDefinedMemoryLayout => unreachable, + error.OutOfMemory => |e| return e, + error.ReinterpretDeclRef, error.Unimplemented => return dg.fail("TODO: C backend: lower packed struct value", .{}), + }; + const backing_val: Value = try .readUintFromMemory(backing_ty, pt, buf, arena.allocator()); + return dg.renderValue(w, backing_val, location); }, } }, @@ -1507,33 +1415,38 @@ pub const DeclGen = struct { }, .un => |un| { const loaded_union = ip.loadUnionType(ty.toIntern()); + if (loaded_union.flagsUnordered(ip).layout == .@"packed") { + // https://github.com/ziglang/zig/issues/24657 will eliminate most of the + // following logic, leaving only the recursive `renderValue` call. Once + // that proposal is implemented, a `packed union` will literally be + // represented in the InternPool by its comptime-known backing integer. + var arena: std.heap.ArenaAllocator = .init(zcu.gpa); + defer arena.deinit(); + const backing_ty = try ty.unionBackingType(pt); + const buf = try arena.allocator().alloc(u8, @intCast(ty.abiSize(zcu))); + val.writeToMemory(pt, buf) catch |err| switch (err) { + error.IllDefinedMemoryLayout => unreachable, + error.OutOfMemory => |e| return e, + error.ReinterpretDeclRef, error.Unimplemented => return dg.fail("TODO: C backend: lower packed union value", .{}), + }; + const backing_val: Value = try .readUintFromMemory(backing_ty, pt, buf, arena.allocator()); + return dg.renderValue(w, backing_val, location); + } if (un.tag == .none) { const backing_ty = try ty.unionBackingType(pt); - switch (loaded_union.flagsUnordered(ip).layout) { - .@"packed" => { - if (!location.isInitializer()) { - try w.writeByte('('); - try dg.renderType(w, backing_ty); - try w.writeByte(')'); - } - try dg.renderValue(w, Value.fromInterned(un.val), location); - }, - .@"extern" => { - if (location == .StaticInitializer) { - return dg.fail("TODO: C backend: implement extern union backing type rendering in static initializers", .{}); - } - - const ptr_ty = try pt.singleConstPtrType(ty); - try w.writeAll("*(("); - try dg.renderType(w, ptr_ty); - try w.writeAll(")("); - try dg.renderType(w, backing_ty); - try w.writeAll("){"); - try dg.renderValue(w, Value.fromInterned(un.val), location); - try w.writeAll("})"); - }, - else => unreachable, + assert(loaded_union.flagsUnordered(ip).layout == .@"extern"); + if (location == .StaticInitializer) { + return dg.fail("TODO: C backend: implement extern union backing type rendering in static initializers", .{}); } + + const ptr_ty = try pt.singleConstPtrType(ty); + try w.writeAll("*(("); + try dg.renderType(w, ptr_ty); + try w.writeAll(")("); + try dg.renderType(w, backing_ty); + try w.writeAll("){"); + try dg.renderValue(w, Value.fromInterned(un.val), location); + try w.writeAll("})"); } else { if (!location.isInitializer()) { try w.writeByte('('); @@ -1544,21 +1457,6 @@ pub const DeclGen = struct { const field_index = zcu.unionTagFieldIndex(loaded_union, Value.fromInterned(un.tag)).?; const field_ty: Type = .fromInterned(loaded_union.field_types.get(ip)[field_index]); const field_name = loaded_union.loadTagType(ip).names.get(ip)[field_index]; - if (loaded_union.flagsUnordered(ip).layout == .@"packed") { - if (field_ty.hasRuntimeBits(zcu)) { - if (field_ty.isPtrAtRuntime(zcu)) { - try w.writeByte('('); - try dg.renderCType(w, ctype); - try w.writeByte(')'); - } else if (field_ty.zigTypeTag(zcu) == .float) { - try w.writeByte('('); - try dg.renderCType(w, ctype); - try w.writeByte(')'); - } - try dg.renderValue(w, Value.fromInterned(un.val), location); - } else try w.writeByte('0'); - return; - } const has_tag = loaded_union.hasTag(ip); if (has_tag) try w.writeByte('{'); @@ -1745,9 +1643,11 @@ pub const DeclGen = struct { } return w.writeByte('}'); }, - .@"packed" => return w.print("{f}", .{ - try dg.fmtIntLiteralHex(try pt.undefValue(ty), .Other), - }), + .@"packed" => return dg.renderUndefValue( + w, + .fromInterned(loaded_struct.backingIntTypeUnordered(ip)), + location, + ), } }, .tuple_type => |tuple_info| { @@ -1815,9 +1715,11 @@ pub const DeclGen = struct { } if (has_tag) try w.writeByte('}'); }, - .@"packed" => return w.print("{f}", .{ - try dg.fmtIntLiteralHex(try pt.undefValue(ty), .Other), - }), + .@"packed" => return dg.renderUndefValue( + w, + try ty.unionBackingType(pt), + location, + ), } }, .error_union_type => |error_union_type| switch (ctype.info(ctype_pool)) { @@ -2445,10 +2347,7 @@ pub const DeclGen = struct { const ty = val.typeOf(zcu); return .{ .data = .{ .dg = dg, - .int_info = if (ty.zigTypeTag(zcu) == .@"union" and ty.containerLayout(zcu) == .@"packed") - .{ .signedness = .unsigned, .bits = @intCast(ty.bitSize(zcu)) } - else - ty.intInfo(zcu), + .int_info = ty.intInfo(zcu), .kind = kind, .ctype = try dg.ctypeFromType(ty, kind), .val = val, @@ -3426,6 +3325,10 @@ fn genBodyInner(f: *Function, body: []const Air.Inst.Index) Error!void { // zig fmt: off .inferred_alloc, .inferred_alloc_comptime => unreachable, + // No "scalarize" legalizations are enabled, so these instructions never appear. + .legalize_vec_elem_val => unreachable, + .legalize_vec_store_elem => unreachable, + .arg => try airArg(f, inst), .breakpoint => try airBreakpoint(f), @@ -3656,7 +3559,6 @@ fn genBodyInner(f: *Function, body: []const Air.Inst.Index) Error!void { .is_named_enum_value => return f.fail("TODO: C backend: implement is_named_enum_value", .{}), .error_set_has_value => return f.fail("TODO: C backend: implement error_set_has_value", .{}), - .vector_store_elem => return f.fail("TODO: C backend: implement vector_store_elem", .{}), .runtime_nav_ptr => try airRuntimeNavPtr(f, inst), @@ -3899,6 +3801,24 @@ fn airAlloc(f: *Function, inst: Air.Inst.Index) !CValue { }); log.debug("%{d}: allocated unfreeable t{d}", .{ inst, local.new_local }); try f.allocs.put(zcu.gpa, local.new_local, true); + + switch (elem_ty.zigTypeTag(zcu)) { + .@"struct", .@"union" => switch (elem_ty.containerLayout(zcu)) { + .@"packed" => { + // For packed aggregates, we zero-initialize to try and work around a design flaw + // related to how `packed`, `undefined`, and RLS interact. See comment in `airStore` + // for details. + const w = &f.object.code.writer; + try w.print("memset(&t{d}, 0x00, sizeof(", .{local.new_local}); + try f.renderType(w, elem_ty); + try w.writeAll("));"); + try f.object.newline(); + }, + .auto, .@"extern" => {}, + }, + else => {}, + } + return .{ .local_ref = local.new_local }; } @@ -3918,6 +3838,24 @@ fn airRetPtr(f: *Function, inst: Air.Inst.Index) !CValue { }); log.debug("%{d}: allocated unfreeable t{d}", .{ inst, local.new_local }); try f.allocs.put(zcu.gpa, local.new_local, true); + + switch (elem_ty.zigTypeTag(zcu)) { + .@"struct", .@"union" => switch (elem_ty.containerLayout(zcu)) { + .@"packed" => { + // For packed aggregates, we zero-initialize to try and work around a design flaw + // related to how `packed`, `undefined`, and RLS interact. See comment in `airStore` + // for details. + const w = &f.object.code.writer; + try w.print("memset(&t{d}, 0x00, sizeof(", .{local.new_local}); + try f.renderType(w, elem_ty); + try w.writeAll("));"); + try f.object.newline(); + }, + .auto, .@"extern" => {}, + }, + else => {}, + } + return .{ .local_ref = local.new_local }; } @@ -3956,6 +3894,10 @@ fn airLoad(f: *Function, inst: Air.Inst.Index) !CValue { const ptr_info = ptr_scalar_ty.ptrInfo(zcu); const src_ty: Type = .fromInterned(ptr_info.child); + // `Air.Legalize.Feature.expand_packed_load` should ensure that the only + // bit-pointers we see here are vector element pointers. + assert(ptr_info.packed_offset.host_size == 0 or ptr_info.flags.vector_index != .none); + if (!src_ty.hasRuntimeBitsIgnoreComptime(zcu)) { try reap(f, inst, &.{ty_op.operand}); return .none; @@ -3987,40 +3929,6 @@ fn airLoad(f: *Function, inst: Air.Inst.Index) !CValue { try w.writeAll(", sizeof("); try f.renderType(w, src_ty); try w.writeAll("))"); - } else if (ptr_info.packed_offset.host_size > 0 and ptr_info.flags.vector_index == .none) { - const host_bits: u16 = ptr_info.packed_offset.host_size * 8; - const host_ty = try pt.intType(.unsigned, host_bits); - - const bit_offset_ty = try pt.intType(.unsigned, Type.smallestUnsignedBits(host_bits - 1)); - const bit_offset_val = try pt.intValue(bit_offset_ty, ptr_info.packed_offset.bit_offset); - - const field_ty = try pt.intType(.unsigned, @as(u16, @intCast(src_ty.bitSize(zcu)))); - - try f.writeCValue(w, local, .Other); - try v.elem(f, w); - try w.writeAll(" = ("); - try f.renderType(w, src_ty); - try w.writeAll(")zig_wrap_"); - try f.object.dg.renderTypeForBuiltinFnName(w, field_ty); - try w.writeAll("(("); - try f.renderType(w, field_ty); - try w.writeByte(')'); - const cant_cast = host_ty.isInt(zcu) and host_ty.bitSize(zcu) > 64; - if (cant_cast) { - if (field_ty.bitSize(zcu) > 64) return f.fail("TODO: C backend: implement casting between types > 64 bits", .{}); - try w.writeAll("zig_lo_"); - try f.object.dg.renderTypeForBuiltinFnName(w, host_ty); - try w.writeByte('('); - } - try w.writeAll("zig_shr_"); - try f.object.dg.renderTypeForBuiltinFnName(w, host_ty); - try w.writeByte('('); - try f.writeCValueDeref(w, operand); - try v.elem(f, w); - try w.print(", {f})", .{try f.fmtIntLiteralDec(bit_offset_val)}); - if (cant_cast) try w.writeByte(')'); - try f.object.dg.renderBuiltinInfo(w, field_ty, .bits); - try w.writeByte(')'); } else { try f.writeCValue(w, local, .Other); try v.elem(f, w); @@ -4213,6 +4121,10 @@ fn airStore(f: *Function, inst: Air.Inst.Index, safety: bool) !CValue { const ptr_scalar_ty = ptr_ty.scalarType(zcu); const ptr_info = ptr_scalar_ty.ptrInfo(zcu); + // `Air.Legalize.Feature.expand_packed_store` should ensure that the only + // bit-pointers we see here are vector element pointers. + assert(ptr_info.packed_offset.host_size == 0 or ptr_info.flags.vector_index != .none); + const ptr_val = try f.resolveInst(bin_op.lhs); const src_ty = f.typeOf(bin_op.rhs); @@ -4222,9 +4134,24 @@ fn airStore(f: *Function, inst: Air.Inst.Index, safety: bool) !CValue { if (val_is_undef) { try reap(f, inst, &.{ bin_op.lhs, bin_op.rhs }); if (safety and ptr_info.packed_offset.host_size == 0) { + // If the thing we're initializing is a packed struct/union, we set to 0 instead of + // 0xAA. This is a hack to work around a problem with partially-undefined packed + // aggregates. If we used 0xAA here, then a later initialization through RLS would + // not zero the high padding bits (for a packed type which is not 8/16/32/64/etc bits), + // so we would get a miscompilation. Using 0x00 here avoids this bug in some cases. It + // is *not* a correct fix; for instance it misses any case where packed structs are + // nested in other aggregates. A proper fix for this will involve changing the language, + // such as to remove RLS. This just prevents miscompilations in *some* common cases. + const byte_str: []const u8 = switch (src_ty.zigTypeTag(zcu)) { + else => "0xaa", + .@"struct", .@"union" => switch (src_ty.containerLayout(zcu)) { + .auto, .@"extern" => "0xaa", + .@"packed" => "0x00", + }, + }; try w.writeAll("memset("); try f.writeCValue(w, ptr_val, .FunctionArgument); - try w.writeAll(", 0xaa, sizeof("); + try w.print(", {s}, sizeof(", .{byte_str}); try f.renderType(w, .fromInterned(ptr_info.child)); try w.writeAll("));"); try f.object.newline(); @@ -4277,66 +4204,6 @@ fn airStore(f: *Function, inst: Air.Inst.Index, safety: bool) !CValue { try w.writeByte(';'); try f.object.newline(); try v.end(f, inst, w); - } else if (ptr_info.packed_offset.host_size > 0 and ptr_info.flags.vector_index == .none) { - const host_bits = ptr_info.packed_offset.host_size * 8; - const host_ty = try pt.intType(.unsigned, host_bits); - - const bit_offset_ty = try pt.intType(.unsigned, Type.smallestUnsignedBits(host_bits - 1)); - const bit_offset_val = try pt.intValue(bit_offset_ty, ptr_info.packed_offset.bit_offset); - - const src_bits = src_ty.bitSize(zcu); - - const ExpectedContents = [BigInt.Managed.default_capacity]BigIntLimb; - var stack align(@alignOf(ExpectedContents)) = - std.heap.stackFallback(@sizeOf(ExpectedContents), f.object.dg.gpa); - - var mask = try BigInt.Managed.initCapacity(stack.get(), BigInt.calcTwosCompLimbCount(host_bits)); - defer mask.deinit(); - - try mask.setTwosCompIntLimit(.max, .unsigned, @intCast(src_bits)); - try mask.shiftLeft(&mask, ptr_info.packed_offset.bit_offset); - try mask.bitNotWrap(&mask, .unsigned, host_bits); - - const mask_val = try pt.intValue_big(host_ty, mask.toConst()); - - const v = try Vectorize.start(f, inst, w, ptr_ty); - const a = try Assignment.start(f, w, src_scalar_ctype); - try f.writeCValueDeref(w, ptr_val); - try v.elem(f, w); - try a.assign(f, w); - try w.writeAll("zig_or_"); - try f.object.dg.renderTypeForBuiltinFnName(w, host_ty); - try w.writeAll("(zig_and_"); - try f.object.dg.renderTypeForBuiltinFnName(w, host_ty); - try w.writeByte('('); - try f.writeCValueDeref(w, ptr_val); - try v.elem(f, w); - try w.print(", {f}), zig_shl_", .{try f.fmtIntLiteralHex(mask_val)}); - try f.object.dg.renderTypeForBuiltinFnName(w, host_ty); - try w.writeByte('('); - const cant_cast = host_ty.isInt(zcu) and host_ty.bitSize(zcu) > 64; - if (cant_cast) { - if (src_ty.bitSize(zcu) > 64) return f.fail("TODO: C backend: implement casting between types > 64 bits", .{}); - try w.writeAll("zig_make_"); - try f.object.dg.renderTypeForBuiltinFnName(w, host_ty); - try w.writeAll("(0, "); - } else { - try w.writeByte('('); - try f.renderType(w, host_ty); - try w.writeByte(')'); - } - - if (src_ty.isPtrAtRuntime(zcu)) { - try w.writeByte('('); - try f.renderType(w, .usize); - try w.writeByte(')'); - } - try f.writeCValue(w, src_val, .Other); - try v.elem(f, w); - if (cant_cast) try w.writeByte(')'); - try w.print(", {f}))", .{try f.fmtIntLiteralDec(bit_offset_val)}); - try a.end(f, w); - try v.end(f, inst, w); } else { switch (ptr_val) { .local_ref => |ptr_local_index| switch (src_val) { @@ -6015,10 +5882,7 @@ fn fieldLocation( else if (!field_ptr_ty.childType(zcu).hasRuntimeBitsIgnoreComptime(zcu)) .{ .byte_offset = loaded_struct.offsets.get(ip)[field_index] } else - .{ .field = if (loaded_struct.fieldName(ip, field_index).unwrap()) |field_name| - .{ .identifier = field_name.toSlice(ip) } - else - .{ .field = field_index } }, + .{ .field = .{ .identifier = loaded_struct.fieldName(ip, field_index).toSlice(ip) } }, .@"packed" => if (field_ptr_ty.ptrInfo(zcu).packed_offset.host_size == 0) .{ .byte_offset = @divExact(zcu.structPackedFieldBitOffset(loaded_struct, field_index) + container_ptr_ty.ptrInfo(zcu).packed_offset.bit_offset, 8) } @@ -6202,115 +6066,20 @@ fn airStructFieldVal(f: *Function, inst: Air.Inst.Index) !CValue { // Ensure complete type definition is visible before accessing fields. _ = try f.ctypeFromType(struct_ty, .complete); + assert(struct_ty.containerLayout(zcu) != .@"packed"); // `Air.Legalize.Feature.expand_packed_struct_field_val` handles this case const field_name: CValue = switch (ip.indexToKey(struct_ty.toIntern())) { - .struct_type => field_name: { - const loaded_struct = ip.loadStructType(struct_ty.toIntern()); - switch (loaded_struct.layout) { - .auto, .@"extern" => break :field_name if (loaded_struct.fieldName(ip, extra.field_index).unwrap()) |field_name| - .{ .identifier = field_name.toSlice(ip) } - else - .{ .field = extra.field_index }, - .@"packed" => { - const int_info = struct_ty.intInfo(zcu); - - const bit_offset_ty = try pt.intType(.unsigned, Type.smallestUnsignedBits(int_info.bits - 1)); - - const bit_offset = zcu.structPackedFieldBitOffset(loaded_struct, extra.field_index); - - const field_int_signedness = if (inst_ty.isAbiInt(zcu)) - inst_ty.intInfo(zcu).signedness - else - .unsigned; - const field_int_ty = try pt.intType(field_int_signedness, @as(u16, @intCast(inst_ty.bitSize(zcu)))); - - const temp_local = try f.allocLocal(inst, field_int_ty); - try f.writeCValue(w, temp_local, .Other); - try w.writeAll(" = zig_wrap_"); - try f.object.dg.renderTypeForBuiltinFnName(w, field_int_ty); - try w.writeAll("(("); - try f.renderType(w, field_int_ty); - try w.writeByte(')'); - const cant_cast = int_info.bits > 64; - if (cant_cast) { - if (field_int_ty.bitSize(zcu) > 64) return f.fail("TODO: C backend: implement casting between types > 64 bits", .{}); - try w.writeAll("zig_lo_"); - try f.object.dg.renderTypeForBuiltinFnName(w, struct_ty); - try w.writeByte('('); - } - if (bit_offset > 0) { - try w.writeAll("zig_shr_"); - try f.object.dg.renderTypeForBuiltinFnName(w, struct_ty); - try w.writeByte('('); - } - try f.writeCValue(w, struct_byval, .Other); - if (bit_offset > 0) try w.print(", {f})", .{ - try f.fmtIntLiteralDec(try pt.intValue(bit_offset_ty, bit_offset)), - }); - if (cant_cast) try w.writeByte(')'); - try f.object.dg.renderBuiltinInfo(w, field_int_ty, .bits); - try w.writeAll(");"); - try f.object.newline(); - if (inst_ty.eql(field_int_ty, zcu)) return temp_local; - - const local = try f.allocLocal(inst, inst_ty); - if (local.new_local != temp_local.new_local) { - try w.writeAll("memcpy("); - try f.writeCValue(w, .{ .local_ref = local.new_local }, .FunctionArgument); - try w.writeAll(", "); - try f.writeCValue(w, .{ .local_ref = temp_local.new_local }, .FunctionArgument); - try w.writeAll(", sizeof("); - try f.renderType(w, inst_ty); - try w.writeAll("));"); - try f.object.newline(); - } - try freeLocal(f, inst, temp_local.new_local, null); - return local; - }, + .struct_type => .{ .identifier = struct_ty.structFieldName(extra.field_index, zcu).unwrap().?.toSlice(ip) }, + .union_type => name: { + const union_type = ip.loadUnionType(struct_ty.toIntern()); + const enum_tag_ty: Type = .fromInterned(union_type.enum_tag_ty); + const field_name_str = enum_tag_ty.enumFieldName(extra.field_index, zcu).toSlice(ip); + if (union_type.hasTag(ip)) { + break :name .{ .payload_identifier = field_name_str }; + } else { + break :name .{ .identifier = field_name_str }; } }, .tuple_type => .{ .field = extra.field_index }, - .union_type => field_name: { - const loaded_union = ip.loadUnionType(struct_ty.toIntern()); - switch (loaded_union.flagsUnordered(ip).layout) { - .auto, .@"extern" => { - const name = loaded_union.loadTagType(ip).names.get(ip)[extra.field_index]; - break :field_name if (loaded_union.hasTag(ip)) - .{ .payload_identifier = name.toSlice(ip) } - else - .{ .identifier = name.toSlice(ip) }; - }, - .@"packed" => { - const operand_lval = if (struct_byval == .constant) blk: { - const operand_local = try f.allocLocal(inst, struct_ty); - try f.writeCValue(w, operand_local, .Other); - try w.writeAll(" = "); - try f.writeCValue(w, struct_byval, .Other); - try w.writeByte(';'); - try f.object.newline(); - break :blk operand_local; - } else struct_byval; - const local = try f.allocLocal(inst, inst_ty); - if (switch (local) { - .new_local, .local => |local_index| switch (operand_lval) { - .new_local, .local => |operand_local_index| local_index != operand_local_index, - else => true, - }, - else => true, - }) { - try w.writeAll("memcpy(&"); - try f.writeCValue(w, local, .Other); - try w.writeAll(", &"); - try f.writeCValue(w, operand_lval, .Other); - try w.writeAll(", sizeof("); - try f.renderType(w, inst_ty); - try w.writeAll("));"); - try f.object.newline(); - } - try f.freeCValue(inst, operand_lval); - return local; - }, - } - }, else => unreachable, }; @@ -7702,98 +7471,13 @@ fn airAggregateInit(f: *Function, inst: Air.Inst.Index) !CValue { if (!field_ty.hasRuntimeBitsIgnoreComptime(zcu)) continue; const a = try Assignment.start(f, w, try f.ctypeFromType(field_ty, .complete)); - try f.writeCValueMember(w, local, if (loaded_struct.fieldName(ip, field_index).unwrap()) |field_name| - .{ .identifier = field_name.toSlice(ip) } - else - .{ .field = field_index }); + try f.writeCValueMember(w, local, .{ .identifier = loaded_struct.fieldName(ip, field_index).toSlice(ip) }); try a.assign(f, w); try f.writeCValue(w, resolved_elements[field_index], .Other); try a.end(f, w); } }, - .@"packed" => { - try f.writeCValue(w, local, .Other); - try w.writeAll(" = "); - - const backing_int_ty: Type = .fromInterned(loaded_struct.backingIntTypeUnordered(ip)); - const int_info = backing_int_ty.intInfo(zcu); - - const bit_offset_ty = try pt.intType(.unsigned, Type.smallestUnsignedBits(int_info.bits - 1)); - - var bit_offset: u64 = 0; - - var empty = true; - for (0..elements.len) |field_index| { - if (inst_ty.structFieldIsComptime(field_index, zcu)) continue; - const field_ty = inst_ty.fieldType(field_index, zcu); - if (!field_ty.hasRuntimeBitsIgnoreComptime(zcu)) continue; - - if (!empty) { - try w.writeAll("zig_or_"); - try f.object.dg.renderTypeForBuiltinFnName(w, inst_ty); - try w.writeByte('('); - } - empty = false; - } - empty = true; - for (resolved_elements, 0..) |element, field_index| { - if (inst_ty.structFieldIsComptime(field_index, zcu)) continue; - const field_ty = inst_ty.fieldType(field_index, zcu); - if (!field_ty.hasRuntimeBitsIgnoreComptime(zcu)) continue; - - if (!empty) try w.writeAll(", "); - // TODO: Skip this entire shift if val is 0? - try w.writeAll("zig_shlw_"); - try f.object.dg.renderTypeForBuiltinFnName(w, inst_ty); - try w.writeByte('('); - - if (field_ty.isAbiInt(zcu)) { - try w.writeAll("zig_and_"); - try f.object.dg.renderTypeForBuiltinFnName(w, inst_ty); - try w.writeByte('('); - } - - if (inst_ty.isAbiInt(zcu) and (field_ty.isAbiInt(zcu) or field_ty.isPtrAtRuntime(zcu))) { - try f.renderIntCast(w, inst_ty, element, .{}, field_ty, .FunctionArgument); - } else { - try w.writeByte('('); - try f.renderType(w, inst_ty); - try w.writeByte(')'); - if (field_ty.isPtrAtRuntime(zcu)) { - try w.writeByte('('); - try f.renderType(w, switch (int_info.signedness) { - .unsigned => .usize, - .signed => .isize, - }); - try w.writeByte(')'); - } - try f.writeCValue(w, element, .Other); - } - - if (field_ty.isAbiInt(zcu)) { - try w.writeAll(", "); - const field_int_info = field_ty.intInfo(zcu); - const field_mask = if (int_info.signedness == .signed and int_info.bits == field_int_info.bits) - try pt.intValue(backing_int_ty, -1) - else - try (try pt.intType(.unsigned, field_int_info.bits)).maxIntScalar(pt, backing_int_ty); - try f.object.dg.renderValue(w, field_mask, .FunctionArgument); - try w.writeByte(')'); - } - - try w.print(", {f}", .{ - try f.fmtIntLiteralDec(try pt.intValue(bit_offset_ty, bit_offset)), - }); - try f.object.dg.renderBuiltinInfo(w, inst_ty, .bits); - try w.writeByte(')'); - if (!empty) try w.writeByte(')'); - - bit_offset += field_ty.bitSize(zcu); - empty = false; - } - try w.writeByte(';'); - try f.object.newline(); - }, + .@"packed" => unreachable, // `Air.Legalize.Feature.expand_packed_struct_init` handles this case } }, .tuple_type => |tuple_info| for (0..tuple_info.types.len) |field_index| { @@ -7828,9 +7512,10 @@ fn airUnionInit(f: *Function, inst: Air.Inst.Index) !CValue { try reap(f, inst, &.{extra.init}); const w = &f.object.code.writer; - const local = try f.allocLocal(inst, union_ty); if (loaded_union.flagsUnordered(ip).layout == .@"packed") return f.moveCValue(inst, union_ty, payload); + const local = try f.allocLocal(inst, union_ty); + const field: CValue = if (union_ty.unionTagTypeSafety(zcu)) |tag_ty| field: { const layout = union_ty.unionGetLayout(zcu); if (layout.tag_size != 0) { diff --git a/src/codegen/c/Type.zig b/src/codegen/c/Type.zig index fa4db36a0c69..ac535a47d158 100644 --- a/src/codegen/c/Type.zig +++ b/src/codegen/c/Type.zig @@ -2514,11 +2514,7 @@ pub const Pool = struct { kind.noParameter(), ); if (field_ctype.index == .void) continue; - const field_name = if (loaded_struct.fieldName(ip, field_index) - .unwrap()) |field_name| - try pool.string(allocator, field_name.toSlice(ip)) - else - String.fromUnnamed(@intCast(field_index)); + const field_name = try pool.string(allocator, loaded_struct.fieldName(ip, field_index).toSlice(ip)); const field_alignas = AlignAs.fromAlignment(.{ .@"align" = loaded_struct.fieldAlign(ip, field_index), .abi = field_type.abiAlignment(zcu), diff --git a/src/codegen/llvm.zig b/src/codegen/llvm.zig index 10b90e25b855..b862a23ddc97 100644 --- a/src/codegen/llvm.zig +++ b/src/codegen/llvm.zig @@ -2411,8 +2411,7 @@ pub const Object = struct { const field_size = field_ty.abiSize(zcu); const field_align = ty.fieldAlignment(field_index, zcu); const field_offset = ty.structFieldOffset(field_index, zcu); - const field_name = struct_type.fieldName(ip, field_index).unwrap() orelse - try ip.getOrPutStringFmt(gpa, pt.tid, "{d}", .{field_index}, .no_embedded_nulls); + const field_name = struct_type.fieldName(ip, field_index); fields.appendAssumeCapacity(try o.builder.debugMemberType( try o.builder.metadataString(field_name.toSlice(ip)), null, // File @@ -4887,6 +4886,11 @@ pub const FuncGen = struct { const val: Builder.Value = switch (air_tags[@intFromEnum(inst)]) { // zig fmt: off + + // No "scalarize" legalizations are enabled, so these instructions never appear. + .legalize_vec_elem_val => unreachable, + .legalize_vec_store_elem => unreachable, + .add => try self.airAdd(inst, .normal), .add_optimized => try self.airAdd(inst, .fast), .add_wrap => try self.airAddWrap(inst), @@ -5093,8 +5097,6 @@ pub const FuncGen = struct { .wasm_memory_size => try self.airWasmMemorySize(inst), .wasm_memory_grow => try self.airWasmMemoryGrow(inst), - .vector_store_elem => try self.airVectorStoreElem(inst), - .runtime_nav_ptr => try self.airRuntimeNavPtr(inst), .inferred_alloc, .inferred_alloc_comptime => unreachable, @@ -6873,16 +6875,14 @@ pub const FuncGen = struct { const array_llvm_ty = try o.lowerType(pt, array_ty); const elem_ty = array_ty.childType(zcu); if (isByRef(array_ty, zcu)) { - const indices: [2]Builder.Value = .{ - try o.builder.intValue(try o.lowerType(pt, Type.usize), 0), rhs, - }; + const elem_ptr = try self.wip.gep(.inbounds, array_llvm_ty, array_llvm_val, &.{ + try o.builder.intValue(try o.lowerType(pt, Type.usize), 0), + rhs, + }, ""); if (isByRef(elem_ty, zcu)) { - const elem_ptr = try self.wip.gep(.inbounds, array_llvm_ty, array_llvm_val, &indices, ""); const elem_alignment = elem_ty.abiAlignment(zcu).toLlvm(); return self.loadByRef(elem_ptr, elem_ty, elem_alignment, .normal); } else { - const elem_ptr = - try self.wip.gep(.inbounds, array_llvm_ty, array_llvm_val, &indices, ""); return self.loadTruncate(.normal, elem_ty, elem_ptr, .default); } } @@ -8140,33 +8140,6 @@ pub const FuncGen = struct { }, ""); } - fn airVectorStoreElem(self: *FuncGen, inst: Air.Inst.Index) !Builder.Value { - const o = self.ng.object; - const pt = self.ng.pt; - const zcu = pt.zcu; - const data = self.air.instructions.items(.data)[@intFromEnum(inst)].vector_store_elem; - const extra = self.air.extraData(Air.Bin, data.payload).data; - - const vector_ptr = try self.resolveInst(data.vector_ptr); - const vector_ptr_ty = self.typeOf(data.vector_ptr); - const index = try self.resolveInst(extra.lhs); - const operand = try self.resolveInst(extra.rhs); - - self.maybeMarkAllowZeroAccess(vector_ptr_ty.ptrInfo(zcu)); - - // TODO: Emitting a load here is a violation of volatile semantics. Not fixable in general. - // https://github.com/ziglang/zig/issues/18652#issuecomment-2452844908 - const access_kind: Builder.MemoryAccessKind = - if (vector_ptr_ty.isVolatilePtr(zcu)) .@"volatile" else .normal; - const elem_llvm_ty = try o.lowerType(pt, vector_ptr_ty.childType(zcu)); - const alignment = vector_ptr_ty.ptrAlignment(zcu).toLlvm(); - const loaded = try self.wip.load(access_kind, elem_llvm_ty, vector_ptr, alignment, ""); - - const new_vector = try self.wip.insertElement(loaded, operand, index, ""); - _ = try self.store(vector_ptr, vector_ptr_ty, new_vector, .none); - return .none; - } - fn airRuntimeNavPtr(fg: *FuncGen, inst: Air.Inst.Index) !Builder.Value { const o = fg.ng.object; const pt = fg.ng.pt; @@ -8303,8 +8276,7 @@ pub const FuncGen = struct { const rhs = try self.resolveInst(bin_op.rhs); const inst_ty = self.typeOfIndex(inst); const scalar_ty = inst_ty.scalarType(zcu); - - if (scalar_ty.isAnyFloat()) return self.todo("saturating float add", .{}); + assert(scalar_ty.zigTypeTag(zcu) == .int); return self.wip.callIntrinsic( .normal, .none, @@ -8344,8 +8316,7 @@ pub const FuncGen = struct { const rhs = try self.resolveInst(bin_op.rhs); const inst_ty = self.typeOfIndex(inst); const scalar_ty = inst_ty.scalarType(zcu); - - if (scalar_ty.isAnyFloat()) return self.todo("saturating float sub", .{}); + assert(scalar_ty.zigTypeTag(zcu) == .int); return self.wip.callIntrinsic( .normal, .none, @@ -8385,8 +8356,7 @@ pub const FuncGen = struct { const rhs = try self.resolveInst(bin_op.rhs); const inst_ty = self.typeOfIndex(inst); const scalar_ty = inst_ty.scalarType(zcu); - - if (scalar_ty.isAnyFloat()) return self.todo("saturating float mul", .{}); + assert(scalar_ty.zigTypeTag(zcu) == .int); return self.wip.callIntrinsic( .normal, .none, @@ -11454,7 +11424,6 @@ pub const FuncGen = struct { const access_kind: Builder.MemoryAccessKind = if (info.flags.is_volatile) .@"volatile" else .normal; - assert(info.flags.vector_index != .runtime); if (info.flags.vector_index != .none) { const index_u32 = try o.builder.intValue(.i32, info.flags.vector_index); const vec_elem_ty = try o.lowerType(pt, elem_ty); @@ -11524,7 +11493,6 @@ pub const FuncGen = struct { const access_kind: Builder.MemoryAccessKind = if (info.flags.is_volatile) .@"volatile" else .normal; - assert(info.flags.vector_index != .runtime); if (info.flags.vector_index != .none) { const index_u32 = try o.builder.intValue(.i32, info.flags.vector_index); const vec_elem_ty = try o.lowerType(pt, elem_ty); diff --git a/src/codegen/riscv64/CodeGen.zig b/src/codegen/riscv64/CodeGen.zig index fe40ba4bbb16..cdca3c2fd8bd 100644 --- a/src/codegen/riscv64/CodeGen.zig +++ b/src/codegen/riscv64/CodeGen.zig @@ -1391,6 +1391,11 @@ fn genBody(func: *Func, body: []const Air.Inst.Index) InnerError!void { const tag = air_tags[@intFromEnum(inst)]; switch (tag) { // zig fmt: off + + // No "scalarize" legalizations are enabled, so these instructions never appear. + .legalize_vec_elem_val => unreachable, + .legalize_vec_store_elem => unreachable, + .add, .add_wrap, .sub, @@ -1633,7 +1638,6 @@ fn genBody(func: *Func, body: []const Air.Inst.Index) InnerError!void { .is_named_enum_value => return func.fail("TODO implement is_named_enum_value", .{}), .error_set_has_value => return func.fail("TODO implement error_set_has_value", .{}), - .vector_store_elem => return func.fail("TODO implement vector_store_elem", .{}), .c_va_arg => return func.fail("TODO implement c_va_arg", .{}), .c_va_copy => return func.fail("TODO implement c_va_copy", .{}), diff --git a/src/codegen/sparc64/CodeGen.zig b/src/codegen/sparc64/CodeGen.zig index 48d44e39f973..4cbe07c76279 100644 --- a/src/codegen/sparc64/CodeGen.zig +++ b/src/codegen/sparc64/CodeGen.zig @@ -479,6 +479,11 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void { self.reused_operands = @TypeOf(self.reused_operands).initEmpty(); switch (air_tags[@intFromEnum(inst)]) { // zig fmt: off + + // No "scalarize" legalizations are enabled, so these instructions never appear. + .legalize_vec_elem_val => unreachable, + .legalize_vec_store_elem => unreachable, + .ptr_add => try self.airPtrArithmetic(inst, .ptr_add), .ptr_sub => try self.airPtrArithmetic(inst, .ptr_sub), @@ -702,7 +707,6 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void { .is_named_enum_value => @panic("TODO implement is_named_enum_value"), .error_set_has_value => @panic("TODO implement error_set_has_value"), - .vector_store_elem => @panic("TODO implement vector_store_elem"), .runtime_nav_ptr => @panic("TODO implement runtime_nav_ptr"), .c_va_arg => return self.fail("TODO implement c_va_arg", .{}), diff --git a/src/codegen/spirv/CodeGen.zig b/src/codegen/spirv/CodeGen.zig index c8956a274b11..281504c7d253 100644 --- a/src/codegen/spirv/CodeGen.zig +++ b/src/codegen/spirv/CodeGen.zig @@ -1520,8 +1520,7 @@ fn resolveType(cg: *CodeGen, ty: Type, repr: Repr) Error!Id { const field_ty: Type = .fromInterned(struct_type.field_types.get(ip)[field_index]); if (!field_ty.hasRuntimeBitsIgnoreComptime(zcu)) continue; - const field_name = struct_type.fieldName(ip, field_index).unwrap() orelse - try ip.getOrPutStringFmt(zcu.gpa, pt.tid, "{d}", .{field_index}, .no_embedded_nulls); + const field_name = struct_type.fieldName(ip, field_index); try member_types.append(try cg.resolveType(field_ty, .indirect)); try member_names.append(field_name.toSlice(ip)); try member_offsets.append(@intCast(ty.structFieldOffset(field_index, zcu))); @@ -2726,8 +2725,6 @@ fn genInst(cg: *CodeGen, inst: Air.Inst.Index) Error!void { .ptr_elem_val => try cg.airPtrElemVal(inst), .array_elem_val => try cg.airArrayElemVal(inst), - .vector_store_elem => return cg.airVectorStoreElem(inst), - .set_union_tag => return cg.airSetUnionTag(inst), .get_union_tag => try cg.airGetUnionTag(inst), .union_init => try cg.airUnionInit(inst), @@ -4446,29 +4443,6 @@ fn airPtrElemVal(cg: *CodeGen, inst: Air.Inst.Index) !?Id { return try cg.load(elem_ty, elem_ptr_id, .{ .is_volatile = ptr_ty.isVolatilePtr(zcu) }); } -fn airVectorStoreElem(cg: *CodeGen, inst: Air.Inst.Index) !void { - const zcu = cg.module.zcu; - const data = cg.air.instructions.items(.data)[@intFromEnum(inst)].vector_store_elem; - const extra = cg.air.extraData(Air.Bin, data.payload).data; - - const vector_ptr_ty = cg.typeOf(data.vector_ptr); - const vector_ty = vector_ptr_ty.childType(zcu); - const scalar_ty = vector_ty.scalarType(zcu); - - const scalar_ty_id = try cg.resolveType(scalar_ty, .indirect); - const storage_class = cg.module.storageClass(vector_ptr_ty.ptrAddressSpace(zcu)); - const scalar_ptr_ty_id = try cg.module.ptrType(scalar_ty_id, storage_class); - - const vector_ptr = try cg.resolve(data.vector_ptr); - const index = try cg.resolve(extra.lhs); - const operand = try cg.resolve(extra.rhs); - - const elem_ptr_id = try cg.accessChainId(scalar_ptr_ty_id, vector_ptr, &.{index}); - try cg.store(scalar_ty, elem_ptr_id, operand, .{ - .is_volatile = vector_ptr_ty.isVolatilePtr(zcu), - }); -} - fn airSetUnionTag(cg: *CodeGen, inst: Air.Inst.Index) !void { const zcu = cg.module.zcu; const bin_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].bin_op; diff --git a/src/codegen/wasm/CodeGen.zig b/src/codegen/wasm/CodeGen.zig index d8d8933cc3ea..684513bf8281 100644 --- a/src/codegen/wasm/CodeGen.zig +++ b/src/codegen/wasm/CodeGen.zig @@ -1786,6 +1786,10 @@ fn buildPointerOffset(cg: *CodeGen, ptr_value: WValue, offset: u64, action: enum fn genInst(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void { const air_tags = cg.air.instructions.items(.tag); return switch (air_tags[@intFromEnum(inst)]) { + // No "scalarize" legalizations are enabled, so these instructions never appear. + .legalize_vec_elem_val => unreachable, + .legalize_vec_store_elem => unreachable, + .inferred_alloc, .inferred_alloc_comptime => unreachable, .add => cg.airBinOp(inst, .add), @@ -1978,7 +1982,6 @@ fn genInst(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void { .save_err_return_trace_index, .is_named_enum_value, .addrspace_cast, - .vector_store_elem, .c_va_arg, .c_va_copy, .c_va_end, diff --git a/src/codegen/x86_64/CodeGen.zig b/src/codegen/x86_64/CodeGen.zig index 64340798006d..f0772dcd73c3 100644 --- a/src/codegen/x86_64/CodeGen.zig +++ b/src/codegen/x86_64/CodeGen.zig @@ -854,12 +854,6 @@ const FrameAlloc = struct { } }; -const StackAllocation = struct { - inst: ?Air.Inst.Index, - /// TODO do we need size? should be determined by inst.ty.abiSize(zcu) - size: u32, -}; - const BlockData = struct { relocs: std.ArrayListUnmanaged(Mir.Inst.Index) = .empty, state: State, @@ -89326,7 +89320,6 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { error.SelectFailed => res[0] = try ops[0].load(val_ty, .{ .disp = switch (cg.typeOf(ty_op.operand).ptrInfo(zcu).flags.vector_index) { .none => 0, - .runtime => unreachable, else => |vector_index| @intCast(val_ty.abiSize(zcu) * @intFromEnum(vector_index)), }, }, cg), @@ -89569,7 +89562,6 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { error.SelectFailed => try ops[0].store(&ops[1], .{ .disp = switch (cg.typeOf(bin_op.lhs).ptrInfo(zcu).flags.vector_index) { .none => 0, - .runtime => unreachable, else => |vector_index| @intCast(cg.typeOf(bin_op.rhs).abiSize(zcu) * @intFromEnum(vector_index)), }, .safe = switch (air_tag) { @@ -103934,7 +103926,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { try ops[0].toOffset(0, cg); try ops[0].finish(inst, &.{ty_op.operand}, &ops, cg); }, - .array_elem_val => { + .array_elem_val, .legalize_vec_elem_val => { const bin_op = air_datas[@intFromEnum(inst)].bin_op; const array_ty = cg.typeOf(bin_op.lhs); const res_ty = array_ty.elemType2(zcu); @@ -171402,8 +171394,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .aggregate_init => |air_tag| fallback: { const ty_pl = air_datas[@intFromEnum(inst)].ty_pl; const agg_ty = ty_pl.ty.toType(); - if ((agg_ty.isVector(zcu) and agg_ty.childType(zcu).toIntern() == .bool_type) or - (agg_ty.zigTypeTag(zcu) == .@"struct" and agg_ty.containerLayout(zcu) == .@"packed")) break :fallback try cg.airAggregateInit(inst); + if (agg_ty.isVector(zcu) and agg_ty.childType(zcu).toIntern() == .bool_type) { + break :fallback try cg.airAggregateInitBoolVec(inst); + } var res = try cg.tempAllocMem(agg_ty); const reset_index = cg.next_temp_index; var bt = cg.liveness.iterateBigTomb(inst); @@ -171441,10 +171434,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { } } }, - .@"packed" => return cg.fail("failed to select {s} {f}", .{ - @tagName(air_tag), - agg_ty.fmt(pt), - }), + .@"packed" => unreachable, } }, .tuple_type => |tuple_type| { @@ -173054,10 +173044,28 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { try ert.die(cg); try res.finish(inst, &.{}, &.{}, cg); }, - .vector_store_elem => { - const vector_store_elem = air_datas[@intFromEnum(inst)].vector_store_elem; - const bin_op = cg.air.extraData(Air.Bin, vector_store_elem.payload).data; - var ops = try cg.tempsFromOperands(inst, .{ vector_store_elem.vector_ptr, bin_op.lhs, bin_op.rhs }); + .runtime_nav_ptr => { + const ty_nav = air_datas[@intFromEnum(inst)].ty_nav; + const nav = ip.getNav(ty_nav.nav); + const is_threadlocal = zcu.comp.config.any_non_single_threaded and nav.isThreadlocal(ip); + if (is_threadlocal) if (cg.target.ofmt == .coff or cg.mod.pic) { + try cg.spillRegisters(&.{ .rdi, .rax }); + } else { + try cg.spillRegisters(&.{.rax}); + }; + var res = try cg.tempInit(.fromInterned(ty_nav.ty), .{ .lea_nav = ty_nav.nav }); + if (is_threadlocal) while (try res.toRegClass(true, .general_purpose, cg)) {}; + try res.finish(inst, &.{}, &.{}, cg); + }, + .c_va_arg => try cg.airVaArg(inst), + .c_va_copy => try cg.airVaCopy(inst), + .c_va_end => try cg.airVaEnd(inst), + .c_va_start => try cg.airVaStart(inst), + .legalize_vec_store_elem => { + const pl_op = air_datas[@intFromEnum(inst)].pl_op; + const bin = cg.air.extraData(Air.Bin, pl_op.payload).data; + // vector_ptr, index, elem_val + var ops = try cg.tempsFromOperands(inst, .{ pl_op.operand, bin.lhs, bin.rhs }); cg.select(&.{}, &.{}, &ops, comptime &.{ .{ .src_constraints = .{ .{ .ptr_bool_vec = .byte }, .any, .bool }, .patterns = &.{ @@ -173639,7 +173647,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { } }, } }) catch |err| switch (err) { error.SelectFailed => { - const elem_size = cg.typeOf(bin_op.rhs).abiSize(zcu); + const elem_size = cg.typeOf(bin.rhs).abiSize(zcu); while (try ops[0].toRegClass(true, .general_purpose, cg) or try ops[1].toRegClass(true, .general_purpose, cg)) {} @@ -173681,23 +173689,6 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { }; for (ops) |op| try op.die(cg); }, - .runtime_nav_ptr => { - const ty_nav = air_datas[@intFromEnum(inst)].ty_nav; - const nav = ip.getNav(ty_nav.nav); - const is_threadlocal = zcu.comp.config.any_non_single_threaded and nav.isThreadlocal(ip); - if (is_threadlocal) if (cg.target.ofmt == .coff or cg.mod.pic) { - try cg.spillRegisters(&.{ .rdi, .rax }); - } else { - try cg.spillRegisters(&.{.rax}); - }; - var res = try cg.tempInit(.fromInterned(ty_nav.ty), .{ .lea_nav = ty_nav.nav }); - if (is_threadlocal) while (try res.toRegClass(true, .general_purpose, cg)) {}; - try res.finish(inst, &.{}, &.{}, cg); - }, - .c_va_arg => try cg.airVaArg(inst), - .c_va_copy => try cg.airVaCopy(inst), - .c_va_end => try cg.airVaEnd(inst), - .c_va_start => try cg.airVaStart(inst), .work_item_id, .work_group_size, .work_group_id => unreachable, } try cg.resetTemps(@enumFromInt(0)); @@ -180646,944 +180637,57 @@ fn airSelect(self: *CodeGen, inst: Air.Inst.Index) !void { return self.finishAir(inst, result, .{ pl_op.operand, extra.lhs, extra.rhs }); } -fn airShuffle(self: *CodeGen, inst: Air.Inst.Index) !void { +fn airAggregateInitBoolVec(self: *CodeGen, inst: Air.Inst.Index) !void { const pt = self.pt; const zcu = pt.zcu; + const result_ty = self.typeOfIndex(inst); + const len: usize = @intCast(result_ty.arrayLen(zcu)); const ty_pl = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_pl; - const extra = self.air.extraData(Air.Shuffle, ty_pl.payload).data; - - const dst_ty = self.typeOfIndex(inst); - const elem_ty = dst_ty.childType(zcu); - const elem_abi_size: u16 = @intCast(elem_ty.abiSize(zcu)); - const dst_abi_size: u32 = @intCast(dst_ty.abiSize(zcu)); - const lhs_ty = self.typeOf(extra.a); - const lhs_abi_size: u32 = @intCast(lhs_ty.abiSize(zcu)); - const rhs_ty = self.typeOf(extra.b); - const rhs_abi_size: u32 = @intCast(rhs_ty.abiSize(zcu)); - const max_abi_size = @max(dst_abi_size, lhs_abi_size, rhs_abi_size); - - const ExpectedContents = [32]?i32; - var stack align(@max(@alignOf(ExpectedContents), @alignOf(std.heap.StackFallbackAllocator(0)))) = - std.heap.stackFallback(@sizeOf(ExpectedContents), self.gpa); - const allocator = stack.get(); - - const mask_elems = try allocator.alloc(?i32, extra.mask_len); - defer allocator.free(mask_elems); - for (mask_elems, 0..) |*mask_elem, elem_index| { - const mask_elem_val = - Value.fromInterned(extra.mask).elemValue(pt, elem_index) catch unreachable; - mask_elem.* = if (mask_elem_val.isUndef(zcu)) - null - else - @intCast(mask_elem_val.toSignedInt(zcu)); - } - - const has_avx = self.hasFeature(.avx); - const result = @as(?MCValue, result: { - for (mask_elems) |mask_elem| { - if (mask_elem) |_| break; - } else break :result try self.allocRegOrMem(inst, true); - - for (mask_elems, 0..) |mask_elem, elem_index| { - if (mask_elem orelse continue != elem_index) break; - } else { - const lhs_mcv = try self.resolveInst(extra.a); - if (self.reuseOperand(inst, extra.a, 0, lhs_mcv)) break :result lhs_mcv; - const dst_mcv = try self.allocRegOrMem(inst, true); - try self.genCopy(dst_ty, dst_mcv, lhs_mcv, .{}); - break :result dst_mcv; - } - - for (mask_elems, 0..) |mask_elem, elem_index| { - if (~(mask_elem orelse continue) != elem_index) break; - } else { - const rhs_mcv = try self.resolveInst(extra.b); - if (self.reuseOperand(inst, extra.b, 1, rhs_mcv)) break :result rhs_mcv; - const dst_mcv = try self.allocRegOrMem(inst, true); - try self.genCopy(dst_ty, dst_mcv, rhs_mcv, .{}); - break :result dst_mcv; - } - - for ([_]Mir.Inst.Tag{ .unpckl, .unpckh }) |variant| unpck: { - if (elem_abi_size > 8) break :unpck; - if (dst_abi_size > self.vectorSize(if (elem_abi_size >= 4) .float else .int)) break :unpck; - - var sources: [2]?u1 = @splat(null); - for (mask_elems, 0..) |maybe_mask_elem, elem_index| { - const mask_elem = maybe_mask_elem orelse continue; - const mask_elem_index = - std.math.cast(u5, if (mask_elem < 0) ~mask_elem else mask_elem) orelse break :unpck; - const elem_byte = (elem_index >> 1) * elem_abi_size; - if (mask_elem_index * elem_abi_size != (elem_byte & 0b0111) | @as(u4, switch (variant) { - .unpckl => 0b0000, - .unpckh => 0b1000, - else => unreachable, - }) | (elem_byte << 1 & 0b10000)) break :unpck; - - const source = @intFromBool(mask_elem < 0); - if (sources[elem_index & 0b00001]) |prev_source| { - if (source != prev_source) break :unpck; - } else sources[elem_index & 0b00001] = source; - } - if (sources[0] orelse break :unpck == sources[1] orelse break :unpck) break :unpck; - - const operands = [2]Air.Inst.Ref{ extra.a, extra.b }; - const operand_tys = [2]Type{ lhs_ty, rhs_ty }; - const lhs_mcv = try self.resolveInst(operands[sources[0].?]); - const rhs_mcv = try self.resolveInst(operands[sources[1].?]); - - const dst_mcv: MCValue = if (lhs_mcv.isRegister() and - self.reuseOperand(inst, operands[sources[0].?], sources[0].?, lhs_mcv)) - lhs_mcv - else if (has_avx and lhs_mcv.isRegister()) - .{ .register = try self.register_manager.allocReg(inst, abi.RegisterClass.sse) } - else - try self.copyToRegisterWithInstTracking(inst, operand_tys[sources[0].?], lhs_mcv); - const dst_reg = dst_mcv.getReg().?; - const dst_alias = registerAlias(dst_reg, max_abi_size); - - const mir_tag: Mir.Inst.FixedTag = if ((elem_abi_size >= 4 and elem_ty.isRuntimeFloat()) or - (dst_abi_size > 16 and !self.hasFeature(.avx2))) .{ switch (elem_abi_size) { - 4 => if (has_avx) .v_ps else ._ps, - 8 => if (has_avx) .v_pd else ._pd, - else => unreachable, - }, variant } else .{ if (has_avx) .vp_ else .p_, switch (variant) { - .unpckl => switch (elem_abi_size) { - 1 => .unpcklbw, - 2 => .unpcklwd, - 4 => .unpckldq, - 8 => .unpcklqdq, - else => unreachable, - }, - .unpckh => switch (elem_abi_size) { - 1 => .unpckhbw, - 2 => .unpckhwd, - 4 => .unpckhdq, - 8 => .unpckhqdq, - else => unreachable, - }, - else => unreachable, - } }; - if (has_avx) if (rhs_mcv.isBase()) try self.asmRegisterRegisterMemory( - mir_tag, - dst_alias, - registerAlias(lhs_mcv.getReg() orelse dst_reg, max_abi_size), - try rhs_mcv.mem(self, .{ .size = .fromSize(max_abi_size) }), - ) else try self.asmRegisterRegisterRegister( - mir_tag, - dst_alias, - registerAlias(lhs_mcv.getReg() orelse dst_reg, max_abi_size), - registerAlias(if (rhs_mcv.isRegister()) - rhs_mcv.getReg().? - else - try self.copyToTmpRegister(operand_tys[sources[1].?], rhs_mcv), max_abi_size), - ) else if (rhs_mcv.isBase()) try self.asmRegisterMemory( - mir_tag, - dst_alias, - try rhs_mcv.mem(self, .{ .size = .fromSize(max_abi_size) }), - ) else try self.asmRegisterRegister( - mir_tag, - dst_alias, - registerAlias(if (rhs_mcv.isRegister()) - rhs_mcv.getReg().? - else - try self.copyToTmpRegister(operand_tys[sources[1].?], rhs_mcv), max_abi_size), - ); - break :result dst_mcv; - } - - pshufd: { - if (elem_abi_size != 4) break :pshufd; - if (max_abi_size > self.vectorSize(.float)) break :pshufd; - - var control: u8 = 0b00_00_00_00; - var sources: [1]?u1 = @splat(null); - for (mask_elems, 0..) |maybe_mask_elem, elem_index| { - const mask_elem = maybe_mask_elem orelse continue; - const mask_elem_index: u3 = @intCast(if (mask_elem < 0) ~mask_elem else mask_elem); - if (mask_elem_index & 0b100 != elem_index & 0b100) break :pshufd; - - const source = @intFromBool(mask_elem < 0); - if (sources[0]) |prev_source| { - if (source != prev_source) break :pshufd; - } else sources[(elem_index & 0b010) >> 1] = source; - - const select_bit: u3 = @intCast((elem_index & 0b011) << 1); - const select_mask = @as(u8, @intCast(mask_elem_index & 0b011)) << select_bit; - if (elem_index & 0b100 == 0) - control |= select_mask - else if (control & @as(u8, 0b11) << select_bit != select_mask) break :pshufd; - } - - const operands = [2]Air.Inst.Ref{ extra.a, extra.b }; - const operand_tys = [2]Type{ lhs_ty, rhs_ty }; - const src_mcv = try self.resolveInst(operands[sources[0] orelse break :pshufd]); - - const dst_reg = if (src_mcv.isRegister() and - self.reuseOperand(inst, operands[sources[0].?], sources[0].?, src_mcv)) - src_mcv.getReg().? - else - try self.register_manager.allocReg(inst, abi.RegisterClass.sse); - const dst_alias = registerAlias(dst_reg, max_abi_size); - - if (src_mcv.isBase()) try self.asmRegisterMemoryImmediate( - .{ if (has_avx) .vp_d else .p_d, .shuf }, - dst_alias, - try src_mcv.mem(self, .{ .size = .fromSize(max_abi_size) }), - .u(control), - ) else try self.asmRegisterRegisterImmediate( - .{ if (has_avx) .vp_d else .p_d, .shuf }, - dst_alias, - registerAlias(if (src_mcv.isRegister()) - src_mcv.getReg().? - else - try self.copyToTmpRegister(operand_tys[sources[0].?], src_mcv), max_abi_size), - .u(control), - ); - break :result .{ .register = dst_reg }; - } - - shufps: { - if (elem_abi_size != 4) break :shufps; - if (max_abi_size > self.vectorSize(.float)) break :shufps; - - var control: u8 = 0b00_00_00_00; - var sources: [2]?u1 = @splat(null); - for (mask_elems, 0..) |maybe_mask_elem, elem_index| { - const mask_elem = maybe_mask_elem orelse continue; - const mask_elem_index: u3 = @intCast(if (mask_elem < 0) ~mask_elem else mask_elem); - if (mask_elem_index & 0b100 != elem_index & 0b100) break :shufps; - - const source = @intFromBool(mask_elem < 0); - if (sources[(elem_index & 0b010) >> 1]) |prev_source| { - if (source != prev_source) break :shufps; - } else sources[(elem_index & 0b010) >> 1] = source; - - const select_bit: u3 = @intCast((elem_index & 0b011) << 1); - const select_mask = @as(u8, @intCast(mask_elem_index & 0b011)) << select_bit; - if (elem_index & 0b100 == 0) - control |= select_mask - else if (control & @as(u8, 0b11) << select_bit != select_mask) break :shufps; - } - if (sources[0] orelse break :shufps == sources[1] orelse break :shufps) break :shufps; - - const operands = [2]Air.Inst.Ref{ extra.a, extra.b }; - const operand_tys = [2]Type{ lhs_ty, rhs_ty }; - const lhs_mcv = try self.resolveInst(operands[sources[0].?]); - const rhs_mcv = try self.resolveInst(operands[sources[1].?]); - - const dst_mcv: MCValue = if (lhs_mcv.isRegister() and - self.reuseOperand(inst, operands[sources[0].?], sources[0].?, lhs_mcv)) - lhs_mcv - else if (has_avx and lhs_mcv.isRegister()) - .{ .register = try self.register_manager.allocReg(inst, abi.RegisterClass.sse) } - else - try self.copyToRegisterWithInstTracking(inst, operand_tys[sources[0].?], lhs_mcv); - const dst_reg = dst_mcv.getReg().?; - const dst_alias = registerAlias(dst_reg, max_abi_size); - - if (has_avx) if (rhs_mcv.isBase()) try self.asmRegisterRegisterMemoryImmediate( - .{ .v_ps, .shuf }, - dst_alias, - registerAlias(lhs_mcv.getReg() orelse dst_reg, max_abi_size), - try rhs_mcv.mem(self, .{ .size = .fromSize(max_abi_size) }), - .u(control), - ) else try self.asmRegisterRegisterRegisterImmediate( - .{ .v_ps, .shuf }, - dst_alias, - registerAlias(lhs_mcv.getReg() orelse dst_reg, max_abi_size), - registerAlias(if (rhs_mcv.isRegister()) - rhs_mcv.getReg().? - else - try self.copyToTmpRegister(operand_tys[sources[1].?], rhs_mcv), max_abi_size), - .u(control), - ) else if (rhs_mcv.isBase()) try self.asmRegisterMemoryImmediate( - .{ ._ps, .shuf }, - dst_alias, - try rhs_mcv.mem(self, .{ .size = .fromSize(max_abi_size) }), - .u(control), - ) else try self.asmRegisterRegisterImmediate( - .{ ._ps, .shuf }, - dst_alias, - registerAlias(if (rhs_mcv.isRegister()) - rhs_mcv.getReg().? - else - try self.copyToTmpRegister(operand_tys[sources[1].?], rhs_mcv), max_abi_size), - .u(control), - ); - break :result dst_mcv; - } - - shufpd: { - if (elem_abi_size != 8) break :shufpd; - if (max_abi_size > self.vectorSize(.float)) break :shufpd; - - var control: u4 = 0b0_0_0_0; - var sources: [2]?u1 = @splat(null); - for (mask_elems, 0..) |maybe_mask_elem, elem_index| { - const mask_elem = maybe_mask_elem orelse continue; - const mask_elem_index: u2 = @intCast(if (mask_elem < 0) ~mask_elem else mask_elem); - if (mask_elem_index & 0b10 != elem_index & 0b10) break :shufpd; - - const source = @intFromBool(mask_elem < 0); - if (sources[elem_index & 0b01]) |prev_source| { - if (source != prev_source) break :shufpd; - } else sources[elem_index & 0b01] = source; - - control |= @as(u4, @intCast(mask_elem_index & 0b01)) << @intCast(elem_index); - } - if (sources[0] orelse break :shufpd == sources[1] orelse break :shufpd) break :shufpd; - - const operands: [2]Air.Inst.Ref = .{ extra.a, extra.b }; - const operand_tys: [2]Type = .{ lhs_ty, rhs_ty }; - const lhs_mcv = try self.resolveInst(operands[sources[0].?]); - const rhs_mcv = try self.resolveInst(operands[sources[1].?]); - - const dst_mcv: MCValue = if (lhs_mcv.isRegister() and - self.reuseOperand(inst, operands[sources[0].?], sources[0].?, lhs_mcv)) - lhs_mcv - else if (has_avx and lhs_mcv.isRegister()) - .{ .register = try self.register_manager.allocReg(inst, abi.RegisterClass.sse) } - else - try self.copyToRegisterWithInstTracking(inst, operand_tys[sources[0].?], lhs_mcv); - const dst_reg = dst_mcv.getReg().?; - const dst_alias = registerAlias(dst_reg, max_abi_size); - - if (has_avx) if (rhs_mcv.isBase()) try self.asmRegisterRegisterMemoryImmediate( - .{ .v_pd, .shuf }, - dst_alias, - registerAlias(lhs_mcv.getReg() orelse dst_reg, max_abi_size), - try rhs_mcv.mem(self, .{ .size = .fromSize(max_abi_size) }), - .u(control), - ) else try self.asmRegisterRegisterRegisterImmediate( - .{ .v_pd, .shuf }, - dst_alias, - registerAlias(lhs_mcv.getReg() orelse dst_reg, max_abi_size), - registerAlias(if (rhs_mcv.isRegister()) - rhs_mcv.getReg().? - else - try self.copyToTmpRegister(operand_tys[sources[1].?], rhs_mcv), max_abi_size), - .u(control), - ) else if (rhs_mcv.isBase()) try self.asmRegisterMemoryImmediate( - .{ ._pd, .shuf }, - dst_alias, - try rhs_mcv.mem(self, .{ .size = .fromSize(max_abi_size) }), - .u(control), - ) else try self.asmRegisterRegisterImmediate( - .{ ._pd, .shuf }, - dst_alias, - registerAlias(if (rhs_mcv.isRegister()) - rhs_mcv.getReg().? - else - try self.copyToTmpRegister(operand_tys[sources[1].?], rhs_mcv), max_abi_size), - .u(control), - ); - break :result dst_mcv; - } - - blend: { - if (elem_abi_size < 2) break :blend; - if (dst_abi_size > self.vectorSize(.float)) break :blend; - if (!self.hasFeature(.sse4_1)) break :blend; - - var control: u8 = 0b0_0_0_0_0_0_0_0; - for (mask_elems, 0..) |maybe_mask_elem, elem_index| { - const mask_elem = maybe_mask_elem orelse continue; - const mask_elem_index = - std.math.cast(u4, if (mask_elem < 0) ~mask_elem else mask_elem) orelse break :blend; - if (mask_elem_index != elem_index) break :blend; - - const select_mask = @as(u8, @intFromBool(mask_elem < 0)) << @truncate(elem_index); - if (elem_index & 0b1000 == 0) - control |= select_mask - else if (control & @as(u8, 0b1) << @truncate(elem_index) != select_mask) break :blend; - } - - if (!elem_ty.isRuntimeFloat() and self.hasFeature(.avx2)) vpblendd: { - const expanded_control = switch (elem_abi_size) { - 4 => control, - 8 => @as(u8, if (control & 0b0001 != 0) 0b00_00_00_11 else 0b00_00_00_00) | - @as(u8, if (control & 0b0010 != 0) 0b00_00_11_00 else 0b00_00_00_00) | - @as(u8, if (control & 0b0100 != 0) 0b00_11_00_00 else 0b00_00_00_00) | - @as(u8, if (control & 0b1000 != 0) 0b11_00_00_00 else 0b00_00_00_00), - else => break :vpblendd, - }; - - const lhs_mcv = try self.resolveInst(extra.a); - const lhs_reg = if (lhs_mcv.isRegister()) - lhs_mcv.getReg().? - else - try self.copyToTmpRegister(dst_ty, lhs_mcv); - const lhs_lock = self.register_manager.lockReg(lhs_reg); - defer if (lhs_lock) |lock| self.register_manager.unlockReg(lock); - - const rhs_mcv = try self.resolveInst(extra.b); - const dst_reg = try self.register_manager.allocReg(inst, abi.RegisterClass.sse); - if (rhs_mcv.isBase()) try self.asmRegisterRegisterMemoryImmediate( - .{ .vp_d, .blend }, - registerAlias(dst_reg, dst_abi_size), - registerAlias(lhs_reg, dst_abi_size), - try rhs_mcv.mem(self, .{ .size = .fromSize(dst_abi_size) }), - .u(expanded_control), - ) else try self.asmRegisterRegisterRegisterImmediate( - .{ .vp_d, .blend }, - registerAlias(dst_reg, dst_abi_size), - registerAlias(lhs_reg, dst_abi_size), - registerAlias(if (rhs_mcv.isRegister()) - rhs_mcv.getReg().? - else - try self.copyToTmpRegister(dst_ty, rhs_mcv), dst_abi_size), - .u(expanded_control), - ); - break :result .{ .register = dst_reg }; - } - - if (!elem_ty.isRuntimeFloat() or elem_abi_size == 2) pblendw: { - const expanded_control = switch (elem_abi_size) { - 2 => control, - 4 => if (dst_abi_size <= 16 or - @as(u4, @intCast(control >> 4)) == @as(u4, @truncate(control >> 0))) - @as(u8, if (control & 0b0001 != 0) 0b00_00_00_11 else 0b00_00_00_00) | - @as(u8, if (control & 0b0010 != 0) 0b00_00_11_00 else 0b00_00_00_00) | - @as(u8, if (control & 0b0100 != 0) 0b00_11_00_00 else 0b00_00_00_00) | - @as(u8, if (control & 0b1000 != 0) 0b11_00_00_00 else 0b00_00_00_00) - else - break :pblendw, - 8 => if (dst_abi_size <= 16 or - @as(u2, @intCast(control >> 2)) == @as(u2, @truncate(control >> 0))) - @as(u8, if (control & 0b01 != 0) 0b0000_1111 else 0b0000_0000) | - @as(u8, if (control & 0b10 != 0) 0b1111_0000 else 0b0000_0000) - else - break :pblendw, - 16 => break :pblendw, - else => unreachable, - }; - - const lhs_mcv = try self.resolveInst(extra.a); - const rhs_mcv = try self.resolveInst(extra.b); - - const dst_mcv: MCValue = if (lhs_mcv.isRegister() and - self.reuseOperand(inst, extra.a, 0, lhs_mcv)) - lhs_mcv - else if (has_avx and lhs_mcv.isRegister()) - .{ .register = try self.register_manager.allocReg(inst, abi.RegisterClass.sse) } - else - try self.copyToRegisterWithInstTracking(inst, dst_ty, lhs_mcv); - const dst_reg = dst_mcv.getReg().?; - - if (has_avx) if (rhs_mcv.isBase()) try self.asmRegisterRegisterMemoryImmediate( - .{ .vp_w, .blend }, - registerAlias(dst_reg, dst_abi_size), - registerAlias(if (lhs_mcv.isRegister()) - lhs_mcv.getReg().? - else - dst_reg, dst_abi_size), - try rhs_mcv.mem(self, .{ .size = .fromSize(dst_abi_size) }), - .u(expanded_control), - ) else try self.asmRegisterRegisterRegisterImmediate( - .{ .vp_w, .blend }, - registerAlias(dst_reg, dst_abi_size), - registerAlias(if (lhs_mcv.isRegister()) - lhs_mcv.getReg().? - else - dst_reg, dst_abi_size), - registerAlias(if (rhs_mcv.isRegister()) - rhs_mcv.getReg().? - else - try self.copyToTmpRegister(dst_ty, rhs_mcv), dst_abi_size), - .u(expanded_control), - ) else if (rhs_mcv.isBase()) try self.asmRegisterMemoryImmediate( - .{ .p_w, .blend }, - registerAlias(dst_reg, dst_abi_size), - try rhs_mcv.mem(self, .{ .size = .fromSize(dst_abi_size) }), - .u(expanded_control), - ) else try self.asmRegisterRegisterImmediate( - .{ .p_w, .blend }, - registerAlias(dst_reg, dst_abi_size), - registerAlias(if (rhs_mcv.isRegister()) - rhs_mcv.getReg().? - else - try self.copyToTmpRegister(dst_ty, rhs_mcv), dst_abi_size), - .u(expanded_control), - ); - break :result .{ .register = dst_reg }; - } - - const expanded_control = switch (elem_abi_size) { - 4, 8 => control, - 16 => @as(u4, if (control & 0b01 != 0) 0b00_11 else 0b00_00) | - @as(u4, if (control & 0b10 != 0) 0b11_00 else 0b00_00), - else => unreachable, - }; - - const lhs_mcv = try self.resolveInst(extra.a); - const rhs_mcv = try self.resolveInst(extra.b); - - const dst_mcv: MCValue = if (lhs_mcv.isRegister() and - self.reuseOperand(inst, extra.a, 0, lhs_mcv)) - lhs_mcv - else if (has_avx and lhs_mcv.isRegister()) - .{ .register = try self.register_manager.allocReg(inst, abi.RegisterClass.sse) } - else - try self.copyToRegisterWithInstTracking(inst, dst_ty, lhs_mcv); - const dst_reg = dst_mcv.getReg().?; - - if (has_avx) if (rhs_mcv.isBase()) try self.asmRegisterRegisterMemoryImmediate( - switch (elem_abi_size) { - 4 => .{ .v_ps, .blend }, - 8, 16 => .{ .v_pd, .blend }, - else => unreachable, - }, - registerAlias(dst_reg, dst_abi_size), - registerAlias(if (lhs_mcv.isRegister()) - lhs_mcv.getReg().? - else - dst_reg, dst_abi_size), - try rhs_mcv.mem(self, .{ .size = .fromSize(dst_abi_size) }), - .u(expanded_control), - ) else try self.asmRegisterRegisterRegisterImmediate( - switch (elem_abi_size) { - 4 => .{ .v_ps, .blend }, - 8, 16 => .{ .v_pd, .blend }, - else => unreachable, - }, - registerAlias(dst_reg, dst_abi_size), - registerAlias(if (lhs_mcv.isRegister()) - lhs_mcv.getReg().? - else - dst_reg, dst_abi_size), - registerAlias(if (rhs_mcv.isRegister()) - rhs_mcv.getReg().? - else - try self.copyToTmpRegister(dst_ty, rhs_mcv), dst_abi_size), - .u(expanded_control), - ) else if (rhs_mcv.isBase()) try self.asmRegisterMemoryImmediate( - switch (elem_abi_size) { - 4 => .{ ._ps, .blend }, - 8, 16 => .{ ._pd, .blend }, - else => unreachable, - }, - registerAlias(dst_reg, dst_abi_size), - try rhs_mcv.mem(self, .{ .size = .fromSize(dst_abi_size) }), - .u(expanded_control), - ) else try self.asmRegisterRegisterImmediate( - switch (elem_abi_size) { - 4 => .{ ._ps, .blend }, - 8, 16 => .{ ._pd, .blend }, - else => unreachable, - }, - registerAlias(dst_reg, dst_abi_size), - registerAlias(if (rhs_mcv.isRegister()) - rhs_mcv.getReg().? - else - try self.copyToTmpRegister(dst_ty, rhs_mcv), dst_abi_size), - .u(expanded_control), - ); - break :result .{ .register = dst_reg }; - } - - blendv: { - if (dst_abi_size > self.vectorSize(if (elem_abi_size >= 4) .float else .int)) break :blendv; - - const select_mask_elem_ty = try pt.intType(.unsigned, elem_abi_size * 8); - const select_mask_ty = try pt.vectorType(.{ - .len = @intCast(mask_elems.len), - .child = select_mask_elem_ty.toIntern(), - }); - var select_mask_elems: [32]InternPool.Index = undefined; - for ( - select_mask_elems[0..mask_elems.len], - mask_elems, - 0.., - ) |*select_mask_elem, maybe_mask_elem, elem_index| { - const mask_elem = maybe_mask_elem orelse continue; - const mask_elem_index = - std.math.cast(u5, if (mask_elem < 0) ~mask_elem else mask_elem) orelse break :blendv; - if (mask_elem_index != elem_index) break :blendv; - - select_mask_elem.* = (if (mask_elem < 0) - try select_mask_elem_ty.maxIntScalar(pt, select_mask_elem_ty) - else - try select_mask_elem_ty.minIntScalar(pt, select_mask_elem_ty)).toIntern(); - } - const select_mask_mcv = try self.lowerValue( - try pt.aggregateValue(select_mask_ty, select_mask_elems[0..mask_elems.len]), - ); + const elements: []const Air.Inst.Ref = @ptrCast(self.air.extra.items[ty_pl.payload..][0..len]); - if (self.hasFeature(.sse4_1)) { - const mir_tag: Mir.Inst.FixedTag = .{ - if ((elem_abi_size >= 4 and elem_ty.isRuntimeFloat()) or - (dst_abi_size > 16 and !self.hasFeature(.avx2))) switch (elem_abi_size) { - 4 => if (has_avx) .v_ps else ._ps, - 8 => if (has_avx) .v_pd else ._pd, - else => unreachable, - } else if (has_avx) .vp_b else .p_b, - .blendv, - }; + assert(result_ty.zigTypeTag(zcu) == .vector); + assert(result_ty.childType(zcu).toIntern() == .bool_type); - const select_mask_reg = if (!has_avx) reg: { - try self.register_manager.getKnownReg(.xmm0, null); - try self.genSetReg(.xmm0, select_mask_elem_ty, select_mask_mcv, .{}); - break :reg .xmm0; - } else try self.copyToTmpRegister(select_mask_ty, select_mask_mcv); - const select_mask_alias = registerAlias(select_mask_reg, dst_abi_size); - const select_mask_lock = self.register_manager.lockRegAssumeUnused(select_mask_reg); - defer self.register_manager.unlockReg(select_mask_lock); - - const lhs_mcv = try self.resolveInst(extra.a); - const rhs_mcv = try self.resolveInst(extra.b); - - const dst_mcv: MCValue = if (lhs_mcv.isRegister() and - self.reuseOperand(inst, extra.a, 0, lhs_mcv)) - lhs_mcv - else if (has_avx and lhs_mcv.isRegister()) - .{ .register = try self.register_manager.allocReg(inst, abi.RegisterClass.sse) } - else - try self.copyToRegisterWithInstTracking(inst, dst_ty, lhs_mcv); - const dst_reg = dst_mcv.getReg().?; - const dst_alias = registerAlias(dst_reg, dst_abi_size); + const result_size = result_ty.abiSize(zcu); + if (result_size > 8) return self.fail("TODO airAggregateInitBoolVec over 8 bytes", .{}); - if (has_avx) if (rhs_mcv.isBase()) try self.asmRegisterRegisterMemoryRegister( - mir_tag, - dst_alias, - if (lhs_mcv.isRegister()) - registerAlias(lhs_mcv.getReg().?, dst_abi_size) - else - dst_alias, - try rhs_mcv.mem(self, .{ .size = .fromSize(dst_abi_size) }), - select_mask_alias, - ) else try self.asmRegisterRegisterRegisterRegister( - mir_tag, - dst_alias, - if (lhs_mcv.isRegister()) - registerAlias(lhs_mcv.getReg().?, dst_abi_size) - else - dst_alias, - registerAlias(if (rhs_mcv.isRegister()) - rhs_mcv.getReg().? - else - try self.copyToTmpRegister(dst_ty, rhs_mcv), dst_abi_size), - select_mask_alias, - ) else if (rhs_mcv.isBase()) try self.asmRegisterMemoryRegister( - mir_tag, - dst_alias, - try rhs_mcv.mem(self, .{ .size = .fromSize(dst_abi_size) }), - select_mask_alias, - ) else try self.asmRegisterRegisterRegister( - mir_tag, - dst_alias, - registerAlias(if (rhs_mcv.isRegister()) - rhs_mcv.getReg().? - else - try self.copyToTmpRegister(dst_ty, rhs_mcv), dst_abi_size), - select_mask_alias, - ); - break :result dst_mcv; - } + const dst_reg = try self.register_manager.allocReg(inst, abi.RegisterClass.gp); - const lhs_mcv = try self.resolveInst(extra.a); - const rhs_mcv = try self.resolveInst(extra.b); + { + const dst_lock = self.register_manager.lockRegAssumeUnused(dst_reg); + defer self.register_manager.unlockReg(dst_lock); - const dst_mcv: MCValue = if (rhs_mcv.isRegister() and - self.reuseOperand(inst, extra.b, 1, rhs_mcv)) - rhs_mcv - else - try self.copyToRegisterWithInstTracking(inst, dst_ty, rhs_mcv); - const dst_reg = dst_mcv.getReg().?; - const dst_alias = registerAlias(dst_reg, dst_abi_size); + try self.spillEflagsIfOccupied(); + try self.asmRegisterRegister( + .{ ._, .xor }, + registerAlias(dst_reg, @min(result_size, 4)), + registerAlias(dst_reg, @min(result_size, 4)), + ); - const mask_reg = try self.copyToTmpRegister(select_mask_ty, select_mask_mcv); - const mask_alias = registerAlias(mask_reg, dst_abi_size); - const mask_lock = self.register_manager.lockRegAssumeUnused(mask_reg); - defer self.register_manager.unlockReg(mask_lock); + for (elements, 0..) |elem, elem_i| { + const elem_reg = try self.copyToTmpRegister(.bool, .{ .air_ref = elem }); + const elem_lock = self.register_manager.lockRegAssumeUnused(elem_reg); + defer self.register_manager.unlockReg(elem_lock); - const mir_fixes: Mir.Inst.Fixes = if (elem_ty.isRuntimeFloat()) - switch (elem_ty.floatBits(self.target)) { - 16, 80, 128 => .p_, - 32 => ._ps, - 64 => ._pd, - else => unreachable, - } - else - .p_; - try self.asmRegisterRegister(.{ mir_fixes, .@"and" }, dst_alias, mask_alias); - if (lhs_mcv.isBase()) try self.asmRegisterMemory( - .{ mir_fixes, .andn }, - mask_alias, - try lhs_mcv.mem(self, .{ .size = .fromSize(dst_abi_size) }), - ) else try self.asmRegisterRegister( - .{ mir_fixes, .andn }, - mask_alias, - if (lhs_mcv.isRegister()) - lhs_mcv.getReg().? - else - try self.copyToTmpRegister(dst_ty, lhs_mcv), + try self.asmRegisterImmediate( + .{ ._, .@"and" }, + registerAlias(elem_reg, @min(result_size, 4)), + .u(1), ); - try self.asmRegisterRegister(.{ mir_fixes, .@"or" }, dst_alias, mask_alias); - break :result dst_mcv; - } - - pshufb: { - if (max_abi_size > 16) break :pshufb; - if (!self.hasFeature(.ssse3)) break :pshufb; - - const temp_regs = - try self.register_manager.allocRegs(2, .{ inst, null }, abi.RegisterClass.sse); - const temp_locks = self.register_manager.lockRegsAssumeUnused(2, temp_regs); - defer for (temp_locks) |lock| self.register_manager.unlockReg(lock); - - const lhs_temp_alias = registerAlias(temp_regs[0], max_abi_size); - try self.genSetReg(temp_regs[0], lhs_ty, .{ .air_ref = extra.a }, .{}); - - const rhs_temp_alias = registerAlias(temp_regs[1], max_abi_size); - try self.genSetReg(temp_regs[1], rhs_ty, .{ .air_ref = extra.b }, .{}); - - var lhs_mask_elems: [16]InternPool.Index = undefined; - for (lhs_mask_elems[0..max_abi_size], 0..) |*lhs_mask_elem, byte_index| { - const elem_index = byte_index / elem_abi_size; - lhs_mask_elem.* = (try pt.intValue(.u8, if (elem_index >= mask_elems.len) 0b1_00_00000 else elem: { - const mask_elem = mask_elems[elem_index] orelse break :elem 0b1_00_00000; - if (mask_elem < 0) break :elem 0b1_00_00000; - const mask_elem_index: u31 = @intCast(mask_elem); - const byte_off: u32 = @intCast(byte_index % elem_abi_size); - break :elem mask_elem_index * elem_abi_size + byte_off; - })).toIntern(); - } - const lhs_mask_ty = try pt.vectorType(.{ .len = max_abi_size, .child = .u8_type }); - const lhs_mask_mcv = try self.lowerValue( - try pt.aggregateValue(lhs_mask_ty, lhs_mask_elems[0..max_abi_size]), + if (elem_i > 0) try self.asmRegisterImmediate( + .{ ._l, .sh }, + registerAlias(elem_reg, @intCast(result_size)), + .u(@intCast(elem_i)), ); - const lhs_mask_mem: Memory = .{ - .base = .{ .reg = try self.copyToTmpRegister(.usize, lhs_mask_mcv.address()) }, - .mod = .{ .rm = .{ .size = .fromSize(@max(max_abi_size, 16)) } }, - }; - if (has_avx) try self.asmRegisterRegisterMemory( - .{ .vp_b, .shuf }, - lhs_temp_alias, - lhs_temp_alias, - lhs_mask_mem, - ) else try self.asmRegisterMemory( - .{ .p_b, .shuf }, - lhs_temp_alias, - lhs_mask_mem, + try self.asmRegisterRegister( + .{ ._, .@"or" }, + registerAlias(dst_reg, @intCast(result_size)), + registerAlias(elem_reg, @intCast(result_size)), ); - - var rhs_mask_elems: [16]InternPool.Index = undefined; - for (rhs_mask_elems[0..max_abi_size], 0..) |*rhs_mask_elem, byte_index| { - const elem_index = byte_index / elem_abi_size; - rhs_mask_elem.* = (try pt.intValue(.u8, if (elem_index >= mask_elems.len) 0b1_00_00000 else elem: { - const mask_elem = mask_elems[elem_index] orelse break :elem 0b1_00_00000; - if (mask_elem >= 0) break :elem 0b1_00_00000; - const mask_elem_index: u31 = @intCast(~mask_elem); - const byte_off: u32 = @intCast(byte_index % elem_abi_size); - break :elem mask_elem_index * elem_abi_size + byte_off; - })).toIntern(); - } - const rhs_mask_ty = try pt.vectorType(.{ .len = max_abi_size, .child = .u8_type }); - const rhs_mask_mcv = try self.lowerValue( - try pt.aggregateValue(rhs_mask_ty, rhs_mask_elems[0..max_abi_size]), - ); - const rhs_mask_mem: Memory = .{ - .base = .{ .reg = try self.copyToTmpRegister(.usize, rhs_mask_mcv.address()) }, - .mod = .{ .rm = .{ .size = .fromSize(@max(max_abi_size, 16)) } }, - }; - if (has_avx) try self.asmRegisterRegisterMemory( - .{ .vp_b, .shuf }, - rhs_temp_alias, - rhs_temp_alias, - rhs_mask_mem, - ) else try self.asmRegisterMemory( - .{ .p_b, .shuf }, - rhs_temp_alias, - rhs_mask_mem, - ); - - if (has_avx) try self.asmRegisterRegisterRegister( - .{ switch (elem_ty.zigTypeTag(zcu)) { - else => break :result null, - .int => .vp_, - .float => switch (elem_ty.floatBits(self.target)) { - 32 => .v_ps, - 64 => .v_pd, - 16, 80, 128 => break :result null, - else => unreachable, - }, - }, .@"or" }, - lhs_temp_alias, - lhs_temp_alias, - rhs_temp_alias, - ) else try self.asmRegisterRegister( - .{ switch (elem_ty.zigTypeTag(zcu)) { - else => break :result null, - .int => .p_, - .float => switch (elem_ty.floatBits(self.target)) { - 32 => ._ps, - 64 => ._pd, - 16, 80, 128 => break :result null, - else => unreachable, - }, - }, .@"or" }, - lhs_temp_alias, - rhs_temp_alias, - ); - break :result .{ .register = temp_regs[0] }; } + } - break :result null; - }) orelse return self.fail("TODO implement airShuffle from {f} and {f} to {f} with {f}", .{ - lhs_ty.fmt(pt), - rhs_ty.fmt(pt), - dst_ty.fmt(pt), - Value.fromInterned(extra.mask).fmtValue(pt), - }); - return self.finishAir(inst, result, .{ extra.a, extra.b, .none }); -} - -fn airAggregateInit(self: *CodeGen, inst: Air.Inst.Index) !void { - const pt = self.pt; - const zcu = pt.zcu; - const result_ty = self.typeOfIndex(inst); - const len: usize = @intCast(result_ty.arrayLen(zcu)); - const ty_pl = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_pl; - const elements: []const Air.Inst.Ref = @ptrCast(self.air.extra.items[ty_pl.payload..][0..len]); - const result: MCValue = result: { - switch (result_ty.zigTypeTag(zcu)) { - .@"struct" => { - if (result_ty.containerLayout(zcu) == .@"packed") return self.fail( - "TODO implement airAggregateInit for {f}", - .{result_ty.fmt(pt)}, - ); - const frame_index = try self.allocFrameIndex(.initSpill(result_ty, zcu)); - const loaded_struct = zcu.intern_pool.loadStructType(result_ty.toIntern()); - try self.genInlineMemset( - .{ .lea_frame = .{ .index = frame_index } }, - .{ .immediate = 0 }, - .{ .immediate = result_ty.abiSize(zcu) }, - .{}, - ); - for (elements, 0..) |elem, elem_i_usize| { - const elem_i: u32 = @intCast(elem_i_usize); - if ((try result_ty.structFieldValueComptime(pt, elem_i)) != null) continue; - - const elem_ty = result_ty.fieldType(elem_i, zcu); - const elem_bit_size: u32 = @intCast(elem_ty.bitSize(zcu)); - if (elem_bit_size > 64) { - return self.fail( - "TODO airAggregateInit implement packed structs with large fields", - .{}, - ); - } - const elem_abi_size: u32 = @intCast(elem_ty.abiSize(zcu)); - const elem_abi_bits = elem_abi_size * 8; - const elem_off = zcu.structPackedFieldBitOffset(loaded_struct, elem_i); - const elem_byte_off: i32 = @intCast(elem_off / elem_abi_bits * elem_abi_size); - const elem_bit_off = elem_off % elem_abi_bits; - const elem_mcv = try self.resolveInst(elem); - const elem_lock = switch (elem_mcv) { - .register => |reg| self.register_manager.lockReg(reg), - .immediate => |imm| lock: { - if (imm == 0) continue; - break :lock null; - }, - else => null, - }; - defer if (elem_lock) |lock| self.register_manager.unlockReg(lock); - - const elem_extra_bits = self.regExtraBits(elem_ty); - { - const temp_reg = try self.copyToTmpRegister(elem_ty, elem_mcv); - const temp_alias = registerAlias(temp_reg, elem_abi_size); - const temp_lock = self.register_manager.lockRegAssumeUnused(temp_reg); - defer self.register_manager.unlockReg(temp_lock); - - if (elem_bit_off < elem_extra_bits) { - try self.truncateRegister(elem_ty, temp_alias); - } - if (elem_bit_off > 0) try self.genShiftBinOpMir( - .{ ._l, .sh }, - elem_ty, - .{ .register = temp_alias }, - .u8, - .{ .immediate = elem_bit_off }, - ); - try self.genBinOpMir( - .{ ._, .@"or" }, - elem_ty, - .{ .load_frame = .{ .index = frame_index, .off = elem_byte_off } }, - .{ .register = temp_alias }, - ); - } - if (elem_bit_off > elem_extra_bits) { - const temp_reg = try self.copyToTmpRegister(elem_ty, elem_mcv); - const temp_alias = registerAlias(temp_reg, elem_abi_size); - const temp_lock = self.register_manager.lockRegAssumeUnused(temp_reg); - defer self.register_manager.unlockReg(temp_lock); - - if (elem_extra_bits > 0) { - try self.truncateRegister(elem_ty, temp_alias); - } - try self.genShiftBinOpMir( - .{ ._r, .sh }, - elem_ty, - .{ .register = temp_reg }, - .u8, - .{ .immediate = elem_abi_bits - elem_bit_off }, - ); - try self.genBinOpMir( - .{ ._, .@"or" }, - elem_ty, - .{ .load_frame = .{ - .index = frame_index, - .off = elem_byte_off + @as(i32, @intCast(elem_abi_size)), - } }, - .{ .register = temp_alias }, - ); - } - } - break :result .{ .load_frame = .{ .index = frame_index } }; - }, - .vector => { - const elem_ty = result_ty.childType(zcu); - if (elem_ty.toIntern() != .bool_type) return self.fail( - "TODO implement airAggregateInit for {f}", - .{result_ty.fmt(pt)}, - ); - const result_size: u32 = @intCast(result_ty.abiSize(zcu)); - const dst_reg = try self.register_manager.allocReg(inst, abi.RegisterClass.gp); - const dst_lock = self.register_manager.lockRegAssumeUnused(dst_reg); - defer self.register_manager.unlockReg(dst_lock); - try self.asmRegisterRegister( - .{ ._, .xor }, - registerAlias(dst_reg, @min(result_size, 4)), - registerAlias(dst_reg, @min(result_size, 4)), - ); - - for (elements, 0..) |elem, elem_i| { - const elem_reg = try self.copyToTmpRegister(elem_ty, .{ .air_ref = elem }); - const elem_lock = self.register_manager.lockRegAssumeUnused(elem_reg); - defer self.register_manager.unlockReg(elem_lock); - - try self.asmRegisterImmediate( - .{ ._, .@"and" }, - registerAlias(elem_reg, @min(result_size, 4)), - .u(1), - ); - if (elem_i > 0) try self.asmRegisterImmediate( - .{ ._l, .sh }, - registerAlias(elem_reg, result_size), - .u(@intCast(elem_i)), - ); - try self.asmRegisterRegister( - .{ ._, .@"or" }, - registerAlias(dst_reg, result_size), - registerAlias(elem_reg, result_size), - ); - } - break :result .{ .register = dst_reg }; - }, - else => unreachable, - } - }; + const result: MCValue = .{ .register = dst_reg }; if (elements.len <= Air.Liveness.bpi - 1) { var buf: [Air.Liveness.bpi - 1]Air.Inst.Ref = @splat(.none); @@ -182269,15 +181373,6 @@ fn fail(cg: *CodeGen, comptime format: []const u8, args: anytype) error{ OutOfMe }; } -fn failMsg(cg: *CodeGen, msg: *Zcu.ErrorMsg) error{ OutOfMemory, CodegenFail } { - @branchHint(.cold); - const zcu = cg.pt.zcu; - return switch (cg.owner) { - .nav_index => |i| zcu.codegenFailMsg(i, msg), - .lazy_sym => |s| zcu.codegenFailTypeMsg(s.ty, msg), - }; -} - fn parseRegName(name: []const u8) ?Register { if (std.mem.startsWith(u8, name, "db")) return @enumFromInt( @intFromEnum(Register.dr0) + (std.fmt.parseInt(u4, name["db".len..], 0) catch return null), @@ -188819,7 +187914,6 @@ const Select = struct { const ptr_info = ty.ptrInfo(zcu); return switch (ptr_info.flags.vector_index) { .none => false, - .runtime => unreachable, else => ptr_info.child == .bool_type, }; }, @@ -188827,7 +187921,6 @@ const Select = struct { const ptr_info = ty.ptrInfo(zcu); return switch (ptr_info.flags.vector_index) { .none => false, - .runtime => unreachable, else => ptr_info.child == .bool_type and size.bitSize(cg.target) >= ptr_info.packed_offset.host_size, }; }, @@ -190814,7 +189907,7 @@ const Select = struct { .src0_elem_size_mul_src1 => @intCast(Select.Operand.Ref.src0.typeOf(s).elemType2(s.cg.pt.zcu).abiSize(s.cg.pt.zcu) * Select.Operand.Ref.src1.valueOf(s).immediate), .vector_index => switch (op.flags.base.ref.typeOf(s).ptrInfo(s.cg.pt.zcu).flags.vector_index) { - .none, .runtime => unreachable, + .none => unreachable, else => |vector_index| @intFromEnum(vector_index), }, .src1 => @intCast(Select.Operand.Ref.src1.valueOf(s).immediate), diff --git a/src/link/Dwarf.zig b/src/link/Dwarf.zig index e03517f97a2c..e8fea3c988c7 100644 --- a/src/link/Dwarf.zig +++ b/src/link/Dwarf.zig @@ -3158,11 +3158,7 @@ fn updateComptimeNavInner(dwarf: *Dwarf, pt: Zcu.PerThread, nav_index: InternPoo .struct_field else .struct_field); - if (loaded_struct.fieldName(ip, field_index).unwrap()) |field_name| try wip_nav.strp(field_name.toSlice(ip)) else { - var field_name_buf: [std.fmt.count("{d}", .{std.math.maxInt(u32)})]u8 = undefined; - const field_name = std.fmt.bufPrint(&field_name_buf, "{d}", .{field_index}) catch unreachable; - try wip_nav.strp(field_name); - } + try wip_nav.strp(loaded_struct.fieldName(ip, field_index).toSlice(ip)); try wip_nav.refType(field_type); if (!is_comptime) { try diw.writeUleb128(loaded_struct.offsets.get(ip)[field_index]); @@ -3187,7 +3183,7 @@ fn updateComptimeNavInner(dwarf: *Dwarf, pt: Zcu.PerThread, nav_index: InternPoo var field_bit_offset: u16 = 0; for (0..loaded_struct.field_types.len) |field_index| { try wip_nav.abbrevCode(.packed_struct_field); - try wip_nav.strp(loaded_struct.fieldName(ip, field_index).unwrap().?.toSlice(ip)); + try wip_nav.strp(loaded_struct.fieldName(ip, field_index).toSlice(ip)); const field_type: Type = .fromInterned(loaded_struct.field_types.get(ip)[field_index]); try wip_nav.refType(field_type); try diw.writeUleb128(field_bit_offset); @@ -4269,11 +4265,7 @@ fn updateLazyValue( .comptime_value_field_runtime_bits else continue); - if (loaded_struct_type.fieldName(ip, field_index).unwrap()) |field_name| try wip_nav.strp(field_name.toSlice(ip)) else { - var field_name_buf: [std.fmt.count("{d}", .{std.math.maxInt(u32)})]u8 = undefined; - const field_name = std.fmt.bufPrint(&field_name_buf, "{d}", .{field_index}) catch unreachable; - try wip_nav.strp(field_name); - } + try wip_nav.strp(loaded_struct_type.fieldName(ip, field_index).toSlice(ip)); const field_value: Value = .fromInterned(switch (aggregate.storage) { .bytes => unreachable, .elems => |elems| elems[field_index], @@ -4467,11 +4459,7 @@ fn updateContainerTypeWriterError( .struct_field else .struct_field); - if (loaded_struct.fieldName(ip, field_index).unwrap()) |field_name| try wip_nav.strp(field_name.toSlice(ip)) else { - var field_name_buf: [std.fmt.count("{d}", .{std.math.maxInt(u32)})]u8 = undefined; - const field_name = std.fmt.bufPrint(&field_name_buf, "{d}", .{field_index}) catch unreachable; - try wip_nav.strp(field_name); - } + try wip_nav.strp(loaded_struct.fieldName(ip, field_index).toSlice(ip)); try wip_nav.refType(field_type); if (!is_comptime) { try diw.writeUleb128(loaded_struct.offsets.get(ip)[field_index]); @@ -4573,11 +4561,7 @@ fn updateContainerTypeWriterError( .struct_field else .struct_field); - if (loaded_struct.fieldName(ip, field_index).unwrap()) |field_name| try wip_nav.strp(field_name.toSlice(ip)) else { - var field_name_buf: [std.fmt.count("{d}", .{std.math.maxInt(u32)})]u8 = undefined; - const field_name = std.fmt.bufPrint(&field_name_buf, "{d}", .{field_index}) catch unreachable; - try wip_nav.strp(field_name); - } + try wip_nav.strp(loaded_struct.fieldName(ip, field_index).toSlice(ip)); try wip_nav.refType(field_type); if (!is_comptime) { try diw.writeUleb128(loaded_struct.offsets.get(ip)[field_index]); @@ -4600,7 +4584,7 @@ fn updateContainerTypeWriterError( var field_bit_offset: u16 = 0; for (0..loaded_struct.field_types.len) |field_index| { try wip_nav.abbrevCode(.packed_struct_field); - try wip_nav.strp(loaded_struct.fieldName(ip, field_index).unwrap().?.toSlice(ip)); + try wip_nav.strp(loaded_struct.fieldName(ip, field_index).toSlice(ip)); const field_type: Type = .fromInterned(loaded_struct.field_types.get(ip)[field_index]); try wip_nav.refType(field_type); try diw.writeUleb128(field_bit_offset); diff --git a/stage1/zig.h b/stage1/zig.h index 5253912490ce..baae5926101b 100644 --- a/stage1/zig.h +++ b/stage1/zig.h @@ -40,6 +40,8 @@ #elif defined(__mips__) #define zig_mips32 #define zig_mips +#elif defined(__or1k__) +#define zig_or1k #elif defined(__powerpc64__) #define zig_powerpc64 #define zig_powerpc @@ -72,6 +74,9 @@ #elif defined (__x86_64__) || (defined(zig_msvc) && defined(_M_X64)) #define zig_x86_64 #define zig_x86 +#elif defined(__I86__) +#define zig_x86_16 +#define zig_x86 #endif #if defined(zig_msvc) || __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ @@ -82,9 +87,7 @@ #define zig_big_endian 1 #endif -#if defined(_AIX) -#define zig_aix -#elif defined(__MACH__) +#if defined(__MACH__) #define zig_darwin #elif defined(__DragonFly__) #define zig_dragonfly @@ -114,20 +117,14 @@ #define zig_wasi #elif defined(_WIN32) #define zig_windows -#elif defined(__MVS__) -#define zig_zos #endif #if defined(zig_windows) #define zig_coff #elif defined(__ELF__) #define zig_elf -#elif defined(zig_zos) -#define zig_goff #elif defined(zig_darwin) #define zig_macho -#elif defined(zig_aix) -#define zig_xcoff #endif #define zig_concat(lhs, rhs) lhs##rhs @@ -390,12 +387,16 @@ #define zig_trap() __asm__ volatile(".word 0x0") #elif defined(zig_mips) #define zig_trap() __asm__ volatile(".word 0x3d") +#elif defined(zig_or1k) +#define zig_trap() __asm__ volatile("l.cust8") #elif defined(zig_riscv) #define zig_trap() __asm__ volatile("unimp") #elif defined(zig_s390x) #define zig_trap() __asm__ volatile("j 0x2") #elif defined(zig_sparc) #define zig_trap() __asm__ volatile("illtrap") +#elif defined(zig_x86_16) +#define zig_trap() __asm__ volatile("int $0x3") #elif defined(zig_x86) #define zig_trap() __asm__ volatile("ud2") #else @@ -422,6 +423,8 @@ #define zig_breakpoint() __asm__ volatile("break 0x0") #elif defined(zig_mips) #define zig_breakpoint() __asm__ volatile("break") +#elif defined(zig_or1k) +#define zig_breakpoint() __asm__ volatile("l.trap 0x0") #elif defined(zig_powerpc) #define zig_breakpoint() __asm__ volatile("trap") #elif defined(zig_riscv) @@ -804,15 +807,13 @@ static inline bool zig_addo_u32(uint32_t *res, uint32_t lhs, uint32_t rhs, uint8 #endif } -zig_extern int32_t __addosi4(int32_t lhs, int32_t rhs, int *overflow); static inline bool zig_addo_i32(int32_t *res, int32_t lhs, int32_t rhs, uint8_t bits) { #if zig_has_builtin(add_overflow) || defined(zig_gcc) int32_t full_res; bool overflow = __builtin_add_overflow(lhs, rhs, &full_res); #else - int overflow_int; - int32_t full_res = __addosi4(lhs, rhs, &overflow_int); - bool overflow = overflow_int != 0; + int32_t full_res = (int32_t)((uint32_t)lhs + (uint32_t)rhs); + bool overflow = ((full_res ^ lhs) & (full_res ^ rhs)) < 0; #endif *res = zig_wrap_i32(full_res, bits); return overflow || full_res < zig_minInt_i(32, bits) || full_res > zig_maxInt_i(32, bits); @@ -830,15 +831,13 @@ static inline bool zig_addo_u64(uint64_t *res, uint64_t lhs, uint64_t rhs, uint8 #endif } -zig_extern int64_t __addodi4(int64_t lhs, int64_t rhs, int *overflow); static inline bool zig_addo_i64(int64_t *res, int64_t lhs, int64_t rhs, uint8_t bits) { #if zig_has_builtin(add_overflow) || defined(zig_gcc) int64_t full_res; bool overflow = __builtin_add_overflow(lhs, rhs, &full_res); #else - int overflow_int; - int64_t full_res = __addodi4(lhs, rhs, &overflow_int); - bool overflow = overflow_int != 0; + int64_t full_res = (int64_t)((uint64_t)lhs + (uint64_t)rhs); + bool overflow = ((full_res ^ lhs) & (full_res ^ rhs)) < 0; #endif *res = zig_wrap_i64(full_res, bits); return overflow || full_res < zig_minInt_i(64, bits) || full_res > zig_maxInt_i(64, bits); @@ -912,15 +911,13 @@ static inline bool zig_subo_u32(uint32_t *res, uint32_t lhs, uint32_t rhs, uint8 #endif } -zig_extern int32_t __subosi4(int32_t lhs, int32_t rhs, int *overflow); static inline bool zig_subo_i32(int32_t *res, int32_t lhs, int32_t rhs, uint8_t bits) { #if zig_has_builtin(sub_overflow) || defined(zig_gcc) int32_t full_res; bool overflow = __builtin_sub_overflow(lhs, rhs, &full_res); #else - int overflow_int; - int32_t full_res = __subosi4(lhs, rhs, &overflow_int); - bool overflow = overflow_int != 0; + int32_t full_res = (int32_t)((uint32_t)lhs - (uint32_t)rhs); + bool overflow = ((lhs ^ rhs) & (full_res ^ lhs)) < 0; #endif *res = zig_wrap_i32(full_res, bits); return overflow || full_res < zig_minInt_i(32, bits) || full_res > zig_maxInt_i(32, bits); @@ -938,15 +935,13 @@ static inline bool zig_subo_u64(uint64_t *res, uint64_t lhs, uint64_t rhs, uint8 #endif } -zig_extern int64_t __subodi4(int64_t lhs, int64_t rhs, int *overflow); static inline bool zig_subo_i64(int64_t *res, int64_t lhs, int64_t rhs, uint8_t bits) { #if zig_has_builtin(sub_overflow) || defined(zig_gcc) int64_t full_res; bool overflow = __builtin_sub_overflow(lhs, rhs, &full_res); #else - int overflow_int; - int64_t full_res = __subodi4(lhs, rhs, &overflow_int); - bool overflow = overflow_int != 0; + int64_t full_res = (int64_t)((uint64_t)lhs - (uint64_t)rhs); + bool overflow = ((lhs ^ rhs) & (full_res ^ lhs)) < 0; #endif *res = zig_wrap_i64(full_res, bits); return overflow || full_res < zig_minInt_i(64, bits) || full_res > zig_maxInt_i(64, bits); @@ -1750,15 +1745,13 @@ static inline bool zig_addo_u128(zig_u128 *res, zig_u128 lhs, zig_u128 rhs, uint #endif } -zig_extern zig_i128 __addoti4(zig_i128 lhs, zig_i128 rhs, int *overflow); static inline bool zig_addo_i128(zig_i128 *res, zig_i128 lhs, zig_i128 rhs, uint8_t bits) { #if zig_has_builtin(add_overflow) zig_i128 full_res; bool overflow = __builtin_add_overflow(lhs, rhs, &full_res); #else - int overflow_int; - zig_i128 full_res = __addoti4(lhs, rhs, &overflow_int); - bool overflow = overflow_int != 0; + zig_i128 full_res = (zig_i128)((zig_u128)lhs + (zig_u128)rhs); + bool overflow = ((full_res ^ lhs) & (full_res ^ rhs)) < 0; #endif *res = zig_wrap_i128(full_res, bits); return overflow || full_res < zig_minInt_i(128, bits) || full_res > zig_maxInt_i(128, bits); @@ -1776,15 +1769,13 @@ static inline bool zig_subo_u128(zig_u128 *res, zig_u128 lhs, zig_u128 rhs, uint #endif } -zig_extern zig_i128 __suboti4(zig_i128 lhs, zig_i128 rhs, int *overflow); static inline bool zig_subo_i128(zig_i128 *res, zig_i128 lhs, zig_i128 rhs, uint8_t bits) { #if zig_has_builtin(sub_overflow) zig_i128 full_res; bool overflow = __builtin_sub_overflow(lhs, rhs, &full_res); #else - int overflow_int; - zig_i128 full_res = __suboti4(lhs, rhs, &overflow_int); - bool overflow = overflow_int != 0; + zig_i128 full_res = (zig_i128)((zig_u128)lhs - (zig_u128)rhs); + bool overflow = ((lhs ^ rhs) & (full_res ^ lhs)) < 0; #endif *res = zig_wrap_i128(full_res, bits); return overflow || full_res < zig_minInt_i(128, bits) || full_res > zig_maxInt_i(128, bits); @@ -4213,7 +4204,7 @@ static inline void zig_loongarch_cpucfg(uint32_t word, uint32_t* result) { #endif } -#elif defined(zig_x86) +#elif defined(zig_x86) && !defined(zig_x86_16) static inline void zig_x86_cpuid(uint32_t leaf_id, uint32_t subid, uint32_t* eax, uint32_t* ebx, uint32_t* ecx, uint32_t* edx) { #if defined(zig_msvc) diff --git a/test/behavior/union.zig b/test/behavior/union.zig index 27663feeb670..115c43fbd803 100644 --- a/test/behavior/union.zig +++ b/test/behavior/union.zig @@ -218,10 +218,13 @@ test "union with specified enum tag" { } test "packed union generates correctly aligned type" { + // This test will be removed after the following accepted proposal is implemented: + // https://github.com/ziglang/zig/issues/24657 if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_spirv) return error.SkipZigTest; if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest; + if (builtin.zig_backend == .stage2_c) return error.SkipZigTest; const U = packed union { f1: *const fn () error{TestUnexpectedResult}!void, @@ -1544,7 +1547,7 @@ test "packed union field pointer has correct alignment" { const host_size = switch (builtin.zig_backend) { else => comptime std.math.divCeil(comptime_int, @bitSizeOf(S), 8) catch unreachable, - .stage2_x86_64 => @sizeOf(S), + .stage2_x86_64, .stage2_c => @sizeOf(S), }; comptime assert(@TypeOf(ap) == *align(4:2:host_size) u20); comptime assert(@TypeOf(bp) == *align(1:2:host_size) u20);