JuliaLang · vtjnash · May 7, 2025 · Nov 25, 2024 · Jan 9, 2025
diff --git a/base/atomics.jl b/base/atomics.jl
@@ -1,7 +1,5 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
-using Core.Intrinsics: llvmcall
-
 import .Base: setindex!, getindex, unsafe_convert
 import .Base.Sys: ARCH, WORD_SIZE
 
@@ -13,45 +11,13 @@ export
     atomic_and!, atomic_nand!, atomic_or!, atomic_xor!,
     atomic_max!, atomic_min!,
     atomic_fence
-##
-# Filter out unsupported atomic types on platforms
-# - 128-bit atomics do not exist on AArch32.
-# - Omitting 128-bit types on 32bit x86 and ppc64
-# - LLVM doesn't currently support atomics on floats for ppc64
-#   C++20 is adding limited support for atomics on float, but as of
-#   now Clang does not support that yet.
-if Sys.ARCH === :i686 || startswith(string(Sys.ARCH), "arm") ||
-   Sys.ARCH === :powerpc64le || Sys.ARCH === :ppc64le
-    const inttypes = (Int8, Int16, Int32, Int64,
-                      UInt8, UInt16, UInt32, UInt64)
-else
-    const inttypes = (Int8, Int16, Int32, Int64, Int128,
-                      UInt8, UInt16, UInt32, UInt64, UInt128)
-end
-const floattypes = (Float16, Float32, Float64)
-const arithmetictypes = (inttypes..., floattypes...)
-# TODO: Support Ptr
-if Sys.ARCH === :powerpc64le || Sys.ARCH === :ppc64le
-    const atomictypes = (inttypes..., Bool)
-else
-    const atomictypes = (arithmetictypes..., Bool)
-end
-
-const IntTypes = Union{inttypes...}
-const FloatTypes = Union{floattypes...}
-const ArithmeticTypes = Union{arithmetictypes...}
-const AtomicTypes = Union{atomictypes...}
 
 """
     Threads.Atomic{T}
 
 Holds a reference to an object of type `T`, ensuring that it is only
 accessed atomically, i.e. in a thread-safe manner.
 
-Only certain "simple" types can be used atomically, namely the
-primitive boolean, integer, and float-point types. These are `Bool`,
-`Int8`...`Int128`, `UInt8`...`UInt128`, and `Float16`...`Float64`.
-
 New atomic objects can be created from a non-atomic values; if none is
 specified, the atomic object is initialized with zero.
 
@@ -72,10 +38,10 @@ julia> x[]
 Atomic operations use an `atomic_` prefix, such as [`atomic_add!`](@ref),
 [`atomic_xchg!`](@ref), etc.
 """
-mutable struct Atomic{T<:AtomicTypes}
-    value::T
-    Atomic{T}() where {T<:AtomicTypes} = new(zero(T))
-    Atomic{T}(value) where {T<:AtomicTypes} = new(value)
+mutable struct Atomic{T}
+    @atomic value::T
+    Atomic{T}() where {T} = new(zero(T))
+    Atomic{T}(value) where {T} = new(value)
 end
 
 Atomic() = Atomic{Int}()
@@ -332,120 +298,21 @@ julia> x[]
 """
 function atomic_min! end
 
-unsafe_convert(::Type{Ptr{T}}, x::Atomic{T}) where {T} = convert(Ptr{T}, pointer_from_objref(x))
-setindex!(x::Atomic{T}, v) where {T} = setindex!(x, convert(T, v))
-
-const llvmtypes = IdDict{Any,String}(
-    Bool => "i8",  # julia represents bools with 8-bits for now. # TODO: is this okay?
-    Int8 => "i8", UInt8 => "i8",
-    Int16 => "i16", UInt16 => "i16",
-    Int32 => "i32", UInt32 => "i32",
-    Int64 => "i64", UInt64 => "i64",
-    Int128 => "i128", UInt128 => "i128",
-    Float16 => "half",
-    Float32 => "float",
-    Float64 => "double",
-)
-inttype(::Type{T}) where {T<:Integer} = T
-inttype(::Type{Float16}) = Int16
-inttype(::Type{Float32}) = Int32
-inttype(::Type{Float64}) = Int64
-
-
-import ..Base.gc_alignment
-
-# All atomic operations have acquire and/or release semantics, depending on
-# whether the load or store values. Most of the time, this is what one wants
-# anyway, and it's only moderately expensive on most hardware.
-for typ in atomictypes
-    lt = llvmtypes[typ]
-    ilt = llvmtypes[inttype(typ)]
-    rt = "$lt, $lt*"
-    irt = "$ilt, $ilt*"
-    @eval getindex(x::Atomic{$typ}) =
-        GC.@preserve x llvmcall($"""
-                 %ptr = bitcast i8* %0 to $lt*
-                 %rv = load atomic $rt %ptr acquire, align $(gc_alignment(typ))
-                 ret $lt %rv
-                 """, $typ, Tuple{Ptr{$typ}}, unsafe_convert(Ptr{$typ}, x))
-    @eval setindex!(x::Atomic{$typ}, v::$typ) =
-        GC.@preserve x llvmcall($"""
-                 %ptr = bitcast i8* %0 to $lt*
-                 store atomic $lt %1, $lt* %ptr release, align $(gc_alignment(typ))
-                 ret void
-                 """, Cvoid, Tuple{Ptr{$typ}, $typ}, unsafe_convert(Ptr{$typ}, x), v)
-
-    # Note: atomic_cas! succeeded (i.e. it stored "new") if and only if the result is "cmp"
-    if typ <: Integer
-        @eval atomic_cas!(x::Atomic{$typ}, cmp::$typ, new::$typ) =
-            GC.@preserve x llvmcall($"""
-                     %ptr = bitcast i8* %0 to $lt*
-                     %rs = cmpxchg $lt* %ptr, $lt %1, $lt %2 acq_rel acquire
-                     %rv = extractvalue { $lt, i1 } %rs, 0
-                     ret $lt %rv
-                     """, $typ, Tuple{Ptr{$typ},$typ,$typ},
-                     unsafe_convert(Ptr{$typ}, x), cmp, new)
-    else
-        @eval atomic_cas!(x::Atomic{$typ}, cmp::$typ, new::$typ) =
-            GC.@preserve x llvmcall($"""
-                     %iptr = bitcast i8* %0 to $ilt*
-                     %icmp = bitcast $lt %1 to $ilt
-                     %inew = bitcast $lt %2 to $ilt
-                     %irs = cmpxchg $ilt* %iptr, $ilt %icmp, $ilt %inew acq_rel acquire
-                     %irv = extractvalue { $ilt, i1 } %irs, 0
-                     %rv = bitcast $ilt %irv to $lt
-                     ret $lt %rv
-                     """, $typ, Tuple{Ptr{$typ},$typ,$typ},
-                     unsafe_convert(Ptr{$typ}, x), cmp, new)
-    end
-
-    arithmetic_ops = [:add, :sub]
-    for rmwop in [arithmetic_ops..., :xchg, :and, :nand, :or, :xor, :max, :min]
-        rmw = string(rmwop)
-        fn = Symbol("atomic_", rmw, "!")
-        if (rmw == "max" || rmw == "min") && typ <: Unsigned
-            # LLVM distinguishes signedness in the operation, not the integer type.
-            rmw = "u" * rmw
-        end
-        if rmwop in arithmetic_ops && !(typ <: ArithmeticTypes) continue end
-        if typ <: Integer
-            @eval $fn(x::Atomic{$typ}, v::$typ) =
-                GC.@preserve x llvmcall($"""
-                         %ptr = bitcast i8* %0 to $lt*
-                         %rv = atomicrmw $rmw $lt* %ptr, $lt %1 acq_rel
-                         ret $lt %rv
-                         """, $typ, Tuple{Ptr{$typ}, $typ}, unsafe_convert(Ptr{$typ}, x), v)
-        else
-            rmwop === :xchg || continue
-            @eval $fn(x::Atomic{$typ}, v::$typ) =
-                GC.@preserve x llvmcall($"""
-                         %iptr = bitcast i8* %0 to $ilt*
-                         %ival = bitcast $lt %1 to $ilt
-                         %irv = atomicrmw $rmw $ilt* %iptr, $ilt %ival acq_rel
-                         %rv = bitcast $ilt %irv to $lt
-                         ret $lt %rv
-                         """, $typ, Tuple{Ptr{$typ}, $typ}, unsafe_convert(Ptr{$typ}, x), v)
-        end
-    end
-end
-
-# Provide atomic floating-point operations via atomic_cas!
-const opnames = Dict{Symbol, Symbol}(:+ => :add, :- => :sub)
-for op in [:+, :-, :max, :min]
-    opname = get(opnames, op, op)
-    @eval function $(Symbol("atomic_", opname, "!"))(var::Atomic{T}, val::T) where T<:FloatTypes
-        IT = inttype(T)
-        old = var[]
-        while true
-            new = $op(old, val)
-            cmp = old
-            old = atomic_cas!(var, cmp, new)
-            reinterpret(IT, old) == reinterpret(IT, cmp) && return old
-            # Temporary solution before we have gc transition support in codegen.
-            ccall(:jl_gc_safepoint, Cvoid, ())
-        end
-    end
-end
+#const nand = (~) ∘ (&) # ComposedFunction generated very poor code quality
+nand(x, y) = ~(x & y)
+
+getindex(x::Atomic) = @atomic :acquire x.value
+setindex!(x::Atomic, v) = (@atomic :release x.value = v; x)
+atomic_cas!(x::Atomic, cmp, new) = (@atomicreplace :acquire_release :acquire x.value cmp => new).old
+atomic_add!(x::Atomic, v) = (@atomic :acquire_release x.value + v).first
+atomic_sub!(x::Atomic, v) = (@atomic :acquire_release x.value - v).first
+atomic_and!(x::Atomic, v) = (@atomic :acquire_release x.value & v).first
+atomic_or!(x::Atomic, v) = (@atomic :acquire_release x.value | v).first
+atomic_xor!(x::Atomic, v) = (@atomic :acquire_release x.value ⊻ v).first
+atomic_nand!(x::Atomic, v) = (@atomic :acquire_release x.value nand v).first
+atomic_xchg!(x::Atomic, v) = (@atomicswap :acquire_release x.value = v)
+atomic_min!(x::Atomic, v) = (@atomic :acquire_release x.value min v).first
+atomic_max!(x::Atomic, v) = (@atomic :acquire_release x.value max v).first
 
 """
     Threads.atomic_fence()
@@ -462,7 +329,4 @@ fences should not be necessary in most cases.
 
 For further details, see LLVM's `fence` instruction.
 """
-atomic_fence() = llvmcall("""
-                          fence seq_cst
-                          ret void
-                          """, Cvoid, Tuple{})
+atomic_fence() = Core.Intrinsics.atomic_fence(:sequentially_consistent)
diff --git a/src/Makefile b/src/Makefile
@@ -79,7 +79,8 @@ endif
 CODEGEN_SRCS := codegen jitlayers aotcompile debuginfo disasm llvm-simdloop \
 	llvm-pass-helpers llvm-ptls llvm-propagate-addrspaces null_sysimage \
 	llvm-multiversioning llvm-alloc-opt llvm-alloc-helpers cgmemmgr llvm-remove-addrspaces \
-	llvm-remove-ni llvm-julia-licm llvm-demote-float16 llvm-cpufeatures pipeline llvm_api \
+	llvm-remove-ni llvm-julia-licm llvm-demote-float16 llvm-cpufeatures llvm-expand-atomic-modify \
+	pipeline llvm_api \
 	$(GC_CODEGEN_SRCS)
 FLAGS += -I$(shell $(LLVM_CONFIG_HOST) --includedir)
 CG_LLVM_LIBS := all
@@ -338,7 +339,7 @@ $(BUILDDIR)/julia_flisp.boot: $(addprefix $(SRCDIR)/,jlfrontend.scm flisp/aliase
 		$(call cygpath_w,$(SRCDIR)/mk_julia_flisp_boot.scm) $(call cygpath_w,$(dir $<)) $(notdir $<) $(call cygpath_w,$@))
 
 # additional dependency links
-$(BUILDDIR)/codegen-stubs.o $(BUILDDIR)/codegen-stubs.dbg.obj: $(SRCDIR)/intrinsics.h
+$(BUILDDIR)/codegen-stubs.o $(BUILDDIR)/codegen-stubs.dbg.obj: $(addprefix $(SRCDIR)/,intrinsics.h llvm-julia-passes.inc)
 $(BUILDDIR)/aotcompile.o $(BUILDDIR)/aotcompile.dbg.obj: $(SRCDIR)/jitlayers.h $(SRCDIR)/llvm-codegen-shared.h $(SRCDIR)/processor.h
 $(BUILDDIR)/ast.o $(BUILDDIR)/ast.dbg.obj: $(BUILDDIR)/julia_flisp.boot.inc $(SRCDIR)/flisp/*.h
 $(BUILDDIR)/builtins.o $(BUILDDIR)/builtins.dbg.obj: $(SRCDIR)/iddict.c $(SRCDIR)/idset.c $(SRCDIR)/builtin_proto.h
@@ -378,7 +379,8 @@ $(BUILDDIR)/signal-handling.o $(BUILDDIR)/signal-handling.dbg.obj: $(addprefix $
 $(BUILDDIR)/staticdata.o $(BUILDDIR)/staticdata.dbg.obj: $(SRCDIR)/staticdata_utils.c $(SRCDIR)/precompile_utils.c $(SRCDIR)/processor.h $(SRCDIR)/builtin_proto.h
 $(BUILDDIR)/toplevel.o $(BUILDDIR)/toplevel.dbg.obj: $(SRCDIR)/builtin_proto.h
 $(BUILDDIR)/ircode.o $(BUILDDIR)/ircode.dbg.obj: $(SRCDIR)/serialize.h $(SRCDIR)/common_symbols1.inc $(SRCDIR)/common_symbols2.inc
-$(BUILDDIR)/pipeline.o $(BUILDDIR)/pipeline.dbg.obj: $(SRCDIR)/passes.h $(SRCDIR)/jitlayers.h
+$(BUILDDIR)/pipeline.o $(BUILDDIR)/pipeline.dbg.obj: $(addprefix $(SRCDIR)/,passes.h jitlayers.h llvm-julia-passes.inc)
+$(BUILDDIR)/llvm_api.o $(BUILDDIR)/llvm_api.dbg.obj: $(SRCDIR)/llvm-julia-passes.inc
 
 $(addprefix $(BUILDDIR)/,threading.o threading.dbg.obj gc-common.o gc-stock.o gc.dbg.obj init.c init.dbg.obj task.o task.dbg.obj): $(addprefix $(SRCDIR)/,threading.h)
 $(addprefix $(BUILDDIR)/,APInt-C.o APInt-C.dbg.obj runtime_intrinsics.o runtime_intrinsics.dbg.obj): $(SRCDIR)/APInt-C.h