Skip to content

Commit 4bace0f

Browse files
authored
Merge pull request #22386 from jacobly0/x86_64-rewrite
x86_64: begin rewriting instruction selection
2 parents 257054a + 8c8dfb3 commit 4bace0f

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

75 files changed

+16973
-4863
lines changed

lib/std/Target/Query.zig

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,13 +6,13 @@
66
/// `null` means native.
77
cpu_arch: ?Target.Cpu.Arch = null,
88

9-
cpu_model: CpuModel = CpuModel.determined_by_arch_os,
9+
cpu_model: CpuModel = .determined_by_arch_os,
1010

1111
/// Sparse set of CPU features to add to the set from `cpu_model`.
12-
cpu_features_add: Target.Cpu.Feature.Set = Target.Cpu.Feature.Set.empty,
12+
cpu_features_add: Target.Cpu.Feature.Set = .empty,
1313

1414
/// Sparse set of CPU features to remove from the set from `cpu_model`.
15-
cpu_features_sub: Target.Cpu.Feature.Set = Target.Cpu.Feature.Set.empty,
15+
cpu_features_sub: Target.Cpu.Feature.Set = .empty,
1616

1717
/// `null` means native.
1818
os_tag: ?Target.Os.Tag = null,
@@ -38,7 +38,7 @@ abi: ?Target.Abi = null,
3838

3939
/// When `os_tag` is `null`, then `null` means native. Otherwise it means the standard path
4040
/// based on the `os_tag`.
41-
dynamic_linker: Target.DynamicLinker = Target.DynamicLinker.none,
41+
dynamic_linker: Target.DynamicLinker = .none,
4242

4343
/// `null` means default for the cpu/arch/os combo.
4444
ofmt: ?Target.ObjectFormat = null,

lib/std/Target/x86.zig

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ pub const Feature = enum {
4747
bmi2,
4848
branch_hint,
4949
branchfusion,
50+
bsf_bsr_0_clobbers_result,
5051
ccmp,
5152
cf,
5253
cldemote,
@@ -167,6 +168,8 @@ pub const Feature = enum {
167168
slow_unaligned_mem_32,
168169
sm3,
169170
sm4,
171+
smap,
172+
smep,
170173
soft_float,
171174
sse,
172175
sse2,
@@ -497,6 +500,11 @@ pub const all_features = blk: {
497500
.description = "CMP/TEST can be fused with conditional branches",
498501
.dependencies = featureSet(&[_]Feature{}),
499502
};
503+
result[@intFromEnum(Feature.bsf_bsr_0_clobbers_result)] = .{
504+
.llvm_name = null,
505+
.description = "BSF/BSR may clobber the lower 32-bits of the result register when the source is zero",
506+
.dependencies = featureSet(&[_]Feature{}),
507+
};
500508
result[@intFromEnum(Feature.ccmp)] = .{
501509
.llvm_name = "ccmp",
502510
.description = "Support conditional cmp & test instructions",
@@ -1127,6 +1135,16 @@ pub const all_features = blk: {
11271135
.avx2,
11281136
}),
11291137
};
1138+
result[@intFromEnum(Feature.smap)] = .{
1139+
.llvm_name = null,
1140+
.description = "Enable Supervisor Mode Access Prevention",
1141+
.dependencies = featureSet(&[_]Feature{}),
1142+
};
1143+
result[@intFromEnum(Feature.smep)] = .{
1144+
.llvm_name = null,
1145+
.description = "Enable Supervisor Mode Execution Prevention",
1146+
.dependencies = featureSet(&[_]Feature{}),
1147+
};
11301148
result[@intFromEnum(Feature.soft_float)] = .{
11311149
.llvm_name = "soft-float",
11321150
.description = "Use software floating point features",
@@ -1371,6 +1389,8 @@ pub const cpu = struct {
13711389
.sha,
13721390
.shstk,
13731391
.slow_3ops_lea,
1392+
.smap,
1393+
.smep,
13741394
.tuning_fast_imm_vector_shift,
13751395
.vaes,
13761396
.vpclmulqdq,
@@ -1467,6 +1487,8 @@ pub const cpu = struct {
14671487
.sha,
14681488
.shstk,
14691489
.slow_3ops_lea,
1490+
.smap,
1491+
.smep,
14701492
.tuning_fast_imm_vector_shift,
14711493
.uintr,
14721494
.vaes,
@@ -1545,6 +1567,8 @@ pub const cpu = struct {
15451567
.slow_3ops_lea,
15461568
.sm3,
15471569
.sm4,
1570+
.smap,
1571+
.smep,
15481572
.tuning_fast_imm_vector_shift,
15491573
.uintr,
15501574
.vaes,
@@ -1783,6 +1807,8 @@ pub const cpu = struct {
17831807
.sahf,
17841808
.sbb_dep_breaking,
17851809
.slow_shld,
1810+
.smap,
1811+
.smep,
17861812
.sse4a,
17871813
.vzeroupper,
17881814
.x87,
@@ -1995,6 +2021,8 @@ pub const cpu = struct {
19952021
.rdseed,
19962022
.sahf,
19972023
.slow_3ops_lea,
2024+
.smap,
2025+
.smep,
19982026
.vzeroupper,
19992027
.x87,
20002028
.xsaveopt,
@@ -2136,6 +2164,8 @@ pub const cpu = struct {
21362164
.sahf,
21372165
.sha,
21382166
.slow_3ops_lea,
2167+
.smap,
2168+
.smep,
21392169
.tuning_fast_imm_vector_shift,
21402170
.vzeroupper,
21412171
.x87,
@@ -2195,6 +2225,8 @@ pub const cpu = struct {
21952225
.rdseed,
21962226
.sahf,
21972227
.slow_3ops_lea,
2228+
.smap,
2229+
.smep,
21982230
.tuning_fast_imm_vector_shift,
21992231
.vzeroupper,
22002232
.x87,
@@ -2450,6 +2482,8 @@ pub const cpu = struct {
24502482
.serialize,
24512483
.sha,
24522484
.shstk,
2485+
.smap,
2486+
.smep,
24532487
.tsxldtrk,
24542488
.tuning_fast_imm_vector_shift,
24552489
.uintr,
@@ -2519,6 +2553,8 @@ pub const cpu = struct {
25192553
.slow_incdec,
25202554
.slow_lea,
25212555
.slow_two_mem_ops,
2556+
.smap,
2557+
.smep,
25222558
.sse4_2,
25232559
.use_glm_div_sqrt_costs,
25242560
.vzeroupper,
@@ -2898,6 +2934,7 @@ pub const cpu = struct {
28982934
.rdrnd,
28992935
.sahf,
29002936
.slow_3ops_lea,
2937+
.smep,
29012938
.vzeroupper,
29022939
.x87,
29032940
.xsaveopt,
@@ -2907,6 +2944,7 @@ pub const cpu = struct {
29072944
.name = "i386",
29082945
.llvm_name = "i386",
29092946
.features = featureSet(&[_]Feature{
2947+
.bsf_bsr_0_clobbers_result,
29102948
.slow_unaligned_mem_16,
29112949
.vzeroupper,
29122950
.x87,
@@ -2916,6 +2954,7 @@ pub const cpu = struct {
29162954
.name = "i486",
29172955
.llvm_name = "i486",
29182956
.features = featureSet(&[_]Feature{
2957+
.bsf_bsr_0_clobbers_result,
29192958
.slow_unaligned_mem_16,
29202959
.vzeroupper,
29212960
.x87,
@@ -3096,6 +3135,7 @@ pub const cpu = struct {
30963135
.sahf,
30973136
.slow_3ops_lea,
30983137
.slow_unaligned_mem_32,
3138+
.smep,
30993139
.vzeroupper,
31003140
.x87,
31013141
.xsaveopt,
@@ -3403,6 +3443,8 @@ pub const cpu = struct {
34033443
.sha,
34043444
.shstk,
34053445
.slow_3ops_lea,
3446+
.smap,
3447+
.smep,
34063448
.tuning_fast_imm_vector_shift,
34073449
.vaes,
34083450
.vpclmulqdq,
@@ -3766,6 +3808,8 @@ pub const cpu = struct {
37663808
.sha,
37673809
.shstk,
37683810
.slow_3ops_lea,
3811+
.smap,
3812+
.smep,
37693813
.tuning_fast_imm_vector_shift,
37703814
.vaes,
37713815
.vpclmulqdq,
@@ -3831,6 +3875,8 @@ pub const cpu = struct {
38313875
.rdseed,
38323876
.sahf,
38333877
.sha,
3878+
.smap,
3879+
.smep,
38343880
.tuning_fast_imm_vector_shift,
38353881
.vaes,
38363882
.vpclmulqdq,
@@ -3939,6 +3985,8 @@ pub const cpu = struct {
39393985
.serialize,
39403986
.sha,
39413987
.shstk,
3988+
.smap,
3989+
.smep,
39423990
.tsxldtrk,
39433991
.tuning_fast_imm_vector_shift,
39443992
.uintr,
@@ -4042,6 +4090,7 @@ pub const cpu = struct {
40424090
.slow_lea,
40434091
.slow_pmulld,
40444092
.slow_two_mem_ops,
4093+
.smep,
40454094
.sse4_2,
40464095
.use_slm_arith_costs,
40474096
.vzeroupper,
@@ -4098,6 +4147,8 @@ pub const cpu = struct {
40984147
.rdseed,
40994148
.sahf,
41004149
.slow_3ops_lea,
4150+
.smap,
4151+
.smep,
41014152
.tuning_fast_imm_vector_shift,
41024153
.vzeroupper,
41034154
.x87,
@@ -4150,6 +4201,8 @@ pub const cpu = struct {
41504201
.rdseed,
41514202
.sahf,
41524203
.slow_3ops_lea,
4204+
.smap,
4205+
.smep,
41534206
.vzeroupper,
41544207
.x87,
41554208
.xsavec,
@@ -4305,6 +4358,8 @@ pub const cpu = struct {
43054358
.sahf,
43064359
.sha,
43074360
.shstk,
4361+
.smap,
4362+
.smep,
43084363
.tuning_fast_imm_vector_shift,
43094364
.vaes,
43104365
.vpclmulqdq,
@@ -4574,6 +4629,8 @@ pub const cpu = struct {
45744629
.sbb_dep_breaking,
45754630
.sha,
45764631
.slow_shld,
4632+
.smap,
4633+
.smep,
45774634
.sse4a,
45784635
.vzeroupper,
45794636
.x87,
@@ -4629,6 +4686,8 @@ pub const cpu = struct {
46294686
.sbb_dep_breaking,
46304687
.sha,
46314688
.slow_shld,
4689+
.smap,
4690+
.smep,
46324691
.sse4a,
46334692
.vzeroupper,
46344693
.wbnoinvd,
@@ -4686,6 +4745,8 @@ pub const cpu = struct {
46864745
.sbb_dep_breaking,
46874746
.sha,
46884747
.slow_shld,
4748+
.smap,
4749+
.smep,
46894750
.sse4a,
46904751
.vaes,
46914752
.vpclmulqdq,
@@ -4757,6 +4818,8 @@ pub const cpu = struct {
47574818
.sha,
47584819
.shstk,
47594820
.slow_shld,
4821+
.smap,
4822+
.smep,
47604823
.sse4a,
47614824
.vaes,
47624825
.vpclmulqdq,
@@ -4833,6 +4896,8 @@ pub const cpu = struct {
48334896
.sha,
48344897
.shstk,
48354898
.slow_shld,
4899+
.smap,
4900+
.smep,
48364901
.sse4a,
48374902
.vaes,
48384903
.vpclmulqdq,

lib/std/Thread.zig

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -372,9 +372,11 @@ pub const SpawnConfig = struct {
372372
// https://github.com/ziglang/zig/issues/157
373373

374374
/// Size in bytes of the Thread's stack
375-
stack_size: usize = 16 * 1024 * 1024,
375+
stack_size: usize = default_stack_size,
376376
/// The allocator to be used to allocate memory for the to-be-spawned thread
377377
allocator: ?std.mem.Allocator = null,
378+
379+
pub const default_stack_size = 16 * 1024 * 1024;
378380
};
379381

380382
pub const SpawnError = error{

lib/std/Thread/Condition.zig

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -161,17 +161,17 @@ const WindowsImpl = struct {
161161
}
162162
}
163163

164-
if (comptime builtin.mode == .Debug) {
164+
if (builtin.mode == .Debug) {
165165
// The internal state of the DebugMutex needs to be handled here as well.
166166
mutex.impl.locking_thread.store(0, .unordered);
167167
}
168168
const rc = os.windows.kernel32.SleepConditionVariableSRW(
169169
&self.condition,
170-
if (comptime builtin.mode == .Debug) &mutex.impl.impl.srwlock else &mutex.impl.srwlock,
170+
if (builtin.mode == .Debug) &mutex.impl.impl.srwlock else &mutex.impl.srwlock,
171171
timeout_ms,
172172
0, // the srwlock was assumed to acquired in exclusive mode not shared
173173
);
174-
if (comptime builtin.mode == .Debug) {
174+
if (builtin.mode == .Debug) {
175175
// The internal state of the DebugMutex needs to be handled here as well.
176176
mutex.impl.locking_thread.store(std.Thread.getCurrentId(), .unordered);
177177
}

lib/std/Thread/Mutex.zig

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -158,7 +158,7 @@ const FutexImpl = struct {
158158
// On x86, use `lock bts` instead of `lock cmpxchg` as:
159159
// - they both seem to mark the cache-line as modified regardless: https://stackoverflow.com/a/63350048
160160
// - `lock bts` is smaller instruction-wise which makes it better for inlining
161-
if (comptime builtin.target.cpu.arch.isX86()) {
161+
if (builtin.target.cpu.arch.isX86()) {
162162
const locked_bit = @ctz(locked);
163163
return self.state.bitSet(locked_bit, .acquire) == 0;
164164
}

lib/std/Thread/Pool.zig

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ pub const Options = struct {
2727
allocator: std.mem.Allocator,
2828
n_jobs: ?usize = null,
2929
track_ids: bool = false,
30+
stack_size: usize = std.Thread.SpawnConfig.default_stack_size,
3031
};
3132

3233
pub fn init(pool: *Pool, options: Options) !void {
@@ -54,7 +55,10 @@ pub fn init(pool: *Pool, options: Options) !void {
5455
errdefer pool.join(spawned);
5556

5657
for (pool.threads) |*thread| {
57-
thread.* = try std.Thread.spawn(.{}, worker, .{pool});
58+
thread.* = try std.Thread.spawn(.{
59+
.stack_size = options.stack_size,
60+
.allocator = allocator,
61+
}, worker, .{pool});
5862
spawned += 1;
5963
}
6064
}

lib/std/crypto/aes/aesni.zig

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ const mem = std.mem;
44
const debug = std.debug;
55

66
const has_vaes = builtin.cpu.arch == .x86_64 and std.Target.x86.featureSetHas(builtin.cpu.features, .vaes);
7-
const has_avx512f = builtin.cpu.arch == .x86_64 and std.Target.x86.featureSetHas(builtin.cpu.features, .avx512f);
7+
const has_avx512f = builtin.cpu.arch == .x86_64 and builtin.zig_backend != .stage2_x86_64 and std.Target.x86.featureSetHas(builtin.cpu.features, .avx512f);
88

99
/// A single AES block.
1010
pub const Block = struct {

lib/std/crypto/chacha20.zig

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -499,11 +499,9 @@ fn ChaChaNonVecImpl(comptime rounds_nb: usize) type {
499499
fn ChaChaImpl(comptime rounds_nb: usize) type {
500500
switch (builtin.cpu.arch) {
501501
.x86_64 => {
502-
if (builtin.zig_backend == .stage2_x86_64) return ChaChaNonVecImpl(rounds_nb);
503-
504502
const has_avx2 = std.Target.x86.featureSetHas(builtin.cpu.features, .avx2);
505503
const has_avx512f = std.Target.x86.featureSetHas(builtin.cpu.features, .avx512f);
506-
if (has_avx512f) return ChaChaVecImpl(rounds_nb, 4);
504+
if (builtin.zig_backend != .stage2_x86_64 and has_avx512f) return ChaChaVecImpl(rounds_nb, 4);
507505
if (has_avx2) return ChaChaVecImpl(rounds_nb, 2);
508506
return ChaChaVecImpl(rounds_nb, 1);
509507
},

0 commit comments

Comments
 (0)