Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
af1191e
x86_64: rewrite
jacobly0 Nov 25, 2024
beadf70
x86_64: rewrite arithmetic
jacobly0 Nov 8, 2024
7c71325
x86_64: looped instructions
jacobly0 Nov 9, 2024
c4b9355
x86_64: testing
jacobly0 Dec 2, 2024
b9c4400
x86_64: implement fallback for pcmpeqq
jacobly0 Dec 21, 2024
73a4295
x86_64: 2 means better
jacobly0 Dec 23, 2024
a1828eb
x86_64: demolish the old
jacobly0 Dec 23, 2024
a7efc56
x86_64: the previous loop abstraction was too confusing
jacobly0 Dec 27, 2024
ac1a975
x86_64: implement clz and not
jacobly0 Dec 29, 2024
e5d5a8b
x86_64: implement switch jump tables
jacobly0 Jan 2, 2025
df7661b
x86_64: optimize value copying slightly
jacobly0 Jan 2, 2025
7f22c41
x86_64: add some ReleaseSmall support
jacobly0 Jan 3, 2025
3c74a47
x86_64: fix unnecessary register saving
jacobly0 Jan 3, 2025
074232b
x86_64: implement a custom calling convention for the Zig language
jacobly0 Jan 3, 2025
b7acd97
x86_64: fix hazards exposed by new calling convention
jacobly0 Jan 3, 2025
094ac8c
x86_64: fix f16 miscomp exposed by new calling convention
jacobly0 Jan 3, 2025
870443f
x86_64: implement passing undefined as a call arg with the new cc
jacobly0 Jan 3, 2025
0d9079f
x86_64: implement element access
jacobly0 Jan 4, 2025
3240adf
x86_64: implement pointer addition and subtraction
jacobly0 Jan 5, 2025
5069f57
x86_64: remove pointless jump to epilogue
jacobly0 Jan 8, 2025
6373044
x86_64: implement union access
jacobly0 Jan 8, 2025
6d1fc0f
x86_64: implement aggregate access
jacobly0 Jan 9, 2025
666d76d
x86_64: implement load and store
jacobly0 Jan 9, 2025
c3d3344
x86_64: pass more behavior tests
jacobly0 Jan 9, 2025
8c8dfb3
x86_64: fix crashes compiling the compiler and tests
jacobly0 Jan 14, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions lib/std/Target/Query.zig
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,13 @@
/// `null` means native.
cpu_arch: ?Target.Cpu.Arch = null,

cpu_model: CpuModel = CpuModel.determined_by_arch_os,
cpu_model: CpuModel = .determined_by_arch_os,

/// Sparse set of CPU features to add to the set from `cpu_model`.
cpu_features_add: Target.Cpu.Feature.Set = Target.Cpu.Feature.Set.empty,
cpu_features_add: Target.Cpu.Feature.Set = .empty,

/// Sparse set of CPU features to remove from the set from `cpu_model`.
cpu_features_sub: Target.Cpu.Feature.Set = Target.Cpu.Feature.Set.empty,
cpu_features_sub: Target.Cpu.Feature.Set = .empty,

/// `null` means native.
os_tag: ?Target.Os.Tag = null,
Expand All @@ -38,7 +38,7 @@ abi: ?Target.Abi = null,

/// When `os_tag` is `null`, then `null` means native. Otherwise it means the standard path
/// based on the `os_tag`.
dynamic_linker: Target.DynamicLinker = Target.DynamicLinker.none,
dynamic_linker: Target.DynamicLinker = .none,

/// `null` means default for the cpu/arch/os combo.
ofmt: ?Target.ObjectFormat = null,
Expand Down
65 changes: 65 additions & 0 deletions lib/std/Target/x86.zig
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ pub const Feature = enum {
bmi2,
branch_hint,
branchfusion,
bsf_bsr_0_clobbers_result,
ccmp,
cf,
cldemote,
Expand Down Expand Up @@ -167,6 +168,8 @@ pub const Feature = enum {
slow_unaligned_mem_32,
sm3,
sm4,
smap,
smep,
soft_float,
sse,
sse2,
Expand Down Expand Up @@ -497,6 +500,11 @@ pub const all_features = blk: {
.description = "CMP/TEST can be fused with conditional branches",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.bsf_bsr_0_clobbers_result)] = .{
.llvm_name = null,
.description = "BSF/BSR may clobber the lower 32-bits of the result register when the source is zero",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.ccmp)] = .{
.llvm_name = "ccmp",
.description = "Support conditional cmp & test instructions",
Expand Down Expand Up @@ -1127,6 +1135,16 @@ pub const all_features = blk: {
.avx2,
}),
};
result[@intFromEnum(Feature.smap)] = .{
.llvm_name = null,
.description = "Enable Supervisor Mode Access Prevention",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.smep)] = .{
.llvm_name = null,
.description = "Enable Supervisor Mode Execution Prevention",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.soft_float)] = .{
.llvm_name = "soft-float",
.description = "Use software floating point features",
Expand Down Expand Up @@ -1371,6 +1389,8 @@ pub const cpu = struct {
.sha,
.shstk,
.slow_3ops_lea,
.smap,
.smep,
.tuning_fast_imm_vector_shift,
.vaes,
.vpclmulqdq,
Expand Down Expand Up @@ -1467,6 +1487,8 @@ pub const cpu = struct {
.sha,
.shstk,
.slow_3ops_lea,
.smap,
.smep,
.tuning_fast_imm_vector_shift,
.uintr,
.vaes,
Expand Down Expand Up @@ -1545,6 +1567,8 @@ pub const cpu = struct {
.slow_3ops_lea,
.sm3,
.sm4,
.smap,
.smep,
.tuning_fast_imm_vector_shift,
.uintr,
.vaes,
Expand Down Expand Up @@ -1783,6 +1807,8 @@ pub const cpu = struct {
.sahf,
.sbb_dep_breaking,
.slow_shld,
.smap,
.smep,
.sse4a,
.vzeroupper,
.x87,
Expand Down Expand Up @@ -1995,6 +2021,8 @@ pub const cpu = struct {
.rdseed,
.sahf,
.slow_3ops_lea,
.smap,
.smep,
.vzeroupper,
.x87,
.xsaveopt,
Expand Down Expand Up @@ -2136,6 +2164,8 @@ pub const cpu = struct {
.sahf,
.sha,
.slow_3ops_lea,
.smap,
.smep,
.tuning_fast_imm_vector_shift,
.vzeroupper,
.x87,
Expand Down Expand Up @@ -2195,6 +2225,8 @@ pub const cpu = struct {
.rdseed,
.sahf,
.slow_3ops_lea,
.smap,
.smep,
.tuning_fast_imm_vector_shift,
.vzeroupper,
.x87,
Expand Down Expand Up @@ -2450,6 +2482,8 @@ pub const cpu = struct {
.serialize,
.sha,
.shstk,
.smap,
.smep,
.tsxldtrk,
.tuning_fast_imm_vector_shift,
.uintr,
Expand Down Expand Up @@ -2519,6 +2553,8 @@ pub const cpu = struct {
.slow_incdec,
.slow_lea,
.slow_two_mem_ops,
.smap,
.smep,
.sse4_2,
.use_glm_div_sqrt_costs,
.vzeroupper,
Expand Down Expand Up @@ -2898,6 +2934,7 @@ pub const cpu = struct {
.rdrnd,
.sahf,
.slow_3ops_lea,
.smep,
.vzeroupper,
.x87,
.xsaveopt,
Expand All @@ -2907,6 +2944,7 @@ pub const cpu = struct {
.name = "i386",
.llvm_name = "i386",
.features = featureSet(&[_]Feature{
.bsf_bsr_0_clobbers_result,
.slow_unaligned_mem_16,
.vzeroupper,
.x87,
Expand All @@ -2916,6 +2954,7 @@ pub const cpu = struct {
.name = "i486",
.llvm_name = "i486",
.features = featureSet(&[_]Feature{
.bsf_bsr_0_clobbers_result,
.slow_unaligned_mem_16,
.vzeroupper,
.x87,
Expand Down Expand Up @@ -3096,6 +3135,7 @@ pub const cpu = struct {
.sahf,
.slow_3ops_lea,
.slow_unaligned_mem_32,
.smep,
.vzeroupper,
.x87,
.xsaveopt,
Expand Down Expand Up @@ -3403,6 +3443,8 @@ pub const cpu = struct {
.sha,
.shstk,
.slow_3ops_lea,
.smap,
.smep,
.tuning_fast_imm_vector_shift,
.vaes,
.vpclmulqdq,
Expand Down Expand Up @@ -3766,6 +3808,8 @@ pub const cpu = struct {
.sha,
.shstk,
.slow_3ops_lea,
.smap,
.smep,
.tuning_fast_imm_vector_shift,
.vaes,
.vpclmulqdq,
Expand Down Expand Up @@ -3831,6 +3875,8 @@ pub const cpu = struct {
.rdseed,
.sahf,
.sha,
.smap,
.smep,
.tuning_fast_imm_vector_shift,
.vaes,
.vpclmulqdq,
Expand Down Expand Up @@ -3939,6 +3985,8 @@ pub const cpu = struct {
.serialize,
.sha,
.shstk,
.smap,
.smep,
.tsxldtrk,
.tuning_fast_imm_vector_shift,
.uintr,
Expand Down Expand Up @@ -4042,6 +4090,7 @@ pub const cpu = struct {
.slow_lea,
.slow_pmulld,
.slow_two_mem_ops,
.smep,
.sse4_2,
.use_slm_arith_costs,
.vzeroupper,
Expand Down Expand Up @@ -4098,6 +4147,8 @@ pub const cpu = struct {
.rdseed,
.sahf,
.slow_3ops_lea,
.smap,
.smep,
.tuning_fast_imm_vector_shift,
.vzeroupper,
.x87,
Expand Down Expand Up @@ -4150,6 +4201,8 @@ pub const cpu = struct {
.rdseed,
.sahf,
.slow_3ops_lea,
.smap,
.smep,
.vzeroupper,
.x87,
.xsavec,
Expand Down Expand Up @@ -4305,6 +4358,8 @@ pub const cpu = struct {
.sahf,
.sha,
.shstk,
.smap,
.smep,
.tuning_fast_imm_vector_shift,
.vaes,
.vpclmulqdq,
Expand Down Expand Up @@ -4574,6 +4629,8 @@ pub const cpu = struct {
.sbb_dep_breaking,
.sha,
.slow_shld,
.smap,
.smep,
.sse4a,
.vzeroupper,
.x87,
Expand Down Expand Up @@ -4629,6 +4686,8 @@ pub const cpu = struct {
.sbb_dep_breaking,
.sha,
.slow_shld,
.smap,
.smep,
.sse4a,
.vzeroupper,
.wbnoinvd,
Expand Down Expand Up @@ -4686,6 +4745,8 @@ pub const cpu = struct {
.sbb_dep_breaking,
.sha,
.slow_shld,
.smap,
.smep,
.sse4a,
.vaes,
.vpclmulqdq,
Expand Down Expand Up @@ -4757,6 +4818,8 @@ pub const cpu = struct {
.sha,
.shstk,
.slow_shld,
.smap,
.smep,
.sse4a,
.vaes,
.vpclmulqdq,
Expand Down Expand Up @@ -4833,6 +4896,8 @@ pub const cpu = struct {
.sha,
.shstk,
.slow_shld,
.smap,
.smep,
.sse4a,
.vaes,
.vpclmulqdq,
Expand Down
4 changes: 3 additions & 1 deletion lib/std/Thread.zig
Original file line number Diff line number Diff line change
Expand Up @@ -372,9 +372,11 @@ pub const SpawnConfig = struct {
// https://github.com/ziglang/zig/issues/157

/// Size in bytes of the Thread's stack
stack_size: usize = 16 * 1024 * 1024,
stack_size: usize = default_stack_size,
/// The allocator to be used to allocate memory for the to-be-spawned thread
allocator: ?std.mem.Allocator = null,

pub const default_stack_size = 16 * 1024 * 1024;
};

pub const SpawnError = error{
Expand Down
6 changes: 3 additions & 3 deletions lib/std/Thread/Condition.zig
Original file line number Diff line number Diff line change
Expand Up @@ -161,17 +161,17 @@ const WindowsImpl = struct {
}
}

if (comptime builtin.mode == .Debug) {
if (builtin.mode == .Debug) {
// The internal state of the DebugMutex needs to be handled here as well.
mutex.impl.locking_thread.store(0, .unordered);
}
const rc = os.windows.kernel32.SleepConditionVariableSRW(
&self.condition,
if (comptime builtin.mode == .Debug) &mutex.impl.impl.srwlock else &mutex.impl.srwlock,
if (builtin.mode == .Debug) &mutex.impl.impl.srwlock else &mutex.impl.srwlock,
timeout_ms,
0, // the srwlock was assumed to acquired in exclusive mode not shared
);
if (comptime builtin.mode == .Debug) {
if (builtin.mode == .Debug) {
// The internal state of the DebugMutex needs to be handled here as well.
mutex.impl.locking_thread.store(std.Thread.getCurrentId(), .unordered);
}
Expand Down
2 changes: 1 addition & 1 deletion lib/std/Thread/Mutex.zig
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,7 @@ const FutexImpl = struct {
// On x86, use `lock bts` instead of `lock cmpxchg` as:
// - they both seem to mark the cache-line as modified regardless: https://stackoverflow.com/a/63350048
// - `lock bts` is smaller instruction-wise which makes it better for inlining
if (comptime builtin.target.cpu.arch.isX86()) {
if (builtin.target.cpu.arch.isX86()) {
const locked_bit = @ctz(locked);
return self.state.bitSet(locked_bit, .acquire) == 0;
}
Expand Down
6 changes: 5 additions & 1 deletion lib/std/Thread/Pool.zig
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ pub const Options = struct {
allocator: std.mem.Allocator,
n_jobs: ?usize = null,
track_ids: bool = false,
stack_size: usize = std.Thread.SpawnConfig.default_stack_size,
};

pub fn init(pool: *Pool, options: Options) !void {
Expand Down Expand Up @@ -54,7 +55,10 @@ pub fn init(pool: *Pool, options: Options) !void {
errdefer pool.join(spawned);

for (pool.threads) |*thread| {
thread.* = try std.Thread.spawn(.{}, worker, .{pool});
thread.* = try std.Thread.spawn(.{
.stack_size = options.stack_size,
.allocator = allocator,
}, worker, .{pool});
spawned += 1;
}
}
Expand Down
2 changes: 1 addition & 1 deletion lib/std/crypto/aes/aesni.zig
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ const mem = std.mem;
const debug = std.debug;

const has_vaes = builtin.cpu.arch == .x86_64 and std.Target.x86.featureSetHas(builtin.cpu.features, .vaes);
const has_avx512f = builtin.cpu.arch == .x86_64 and std.Target.x86.featureSetHas(builtin.cpu.features, .avx512f);
const has_avx512f = builtin.cpu.arch == .x86_64 and builtin.zig_backend != .stage2_x86_64 and std.Target.x86.featureSetHas(builtin.cpu.features, .avx512f);

/// A single AES block.
pub const Block = struct {
Expand Down
4 changes: 1 addition & 3 deletions lib/std/crypto/chacha20.zig
Original file line number Diff line number Diff line change
Expand Up @@ -499,11 +499,9 @@ fn ChaChaNonVecImpl(comptime rounds_nb: usize) type {
fn ChaChaImpl(comptime rounds_nb: usize) type {
switch (builtin.cpu.arch) {
.x86_64 => {
if (builtin.zig_backend == .stage2_x86_64) return ChaChaNonVecImpl(rounds_nb);

const has_avx2 = std.Target.x86.featureSetHas(builtin.cpu.features, .avx2);
const has_avx512f = std.Target.x86.featureSetHas(builtin.cpu.features, .avx512f);
if (has_avx512f) return ChaChaVecImpl(rounds_nb, 4);
if (builtin.zig_backend != .stage2_x86_64 and has_avx512f) return ChaChaVecImpl(rounds_nb, 4);
if (has_avx2) return ChaChaVecImpl(rounds_nb, 2);
return ChaChaVecImpl(rounds_nb, 1);
},
Expand Down
Loading
Loading