Skip to content

[MTE] [NFC] use vector to collect globals to tag (#120283) #142329

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 8 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions llvm/docs/LangRef.rst
Original file line number Diff line number Diff line change
Expand Up @@ -11244,8 +11244,8 @@ If the ``load`` is marked as ``atomic``, it takes an extra :ref:`ordering
<ordering>` and optional ``syncscope("<target-scope>")`` argument. The
``release`` and ``acq_rel`` orderings are not valid on ``load`` instructions.
Atomic loads produce :ref:`defined <memmodel>` results when they may see
multiple atomic stores. The type of the pointee must be an integer, pointer, or
floating-point type whose bit width is a power of two greater than or equal to
multiple atomic stores. The type of the pointee must be an integer, pointer,
floating-point, or vector type whose bit width is a power of two greater than or equal to
eight and less than or equal to a target-specific size limit. ``align`` must be
explicitly specified on atomic loads. Note: if the alignment is not greater or
equal to the size of the `<value>` type, the atomic operation is likely to
Expand Down Expand Up @@ -11385,8 +11385,8 @@ If the ``store`` is marked as ``atomic``, it takes an extra :ref:`ordering
<ordering>` and optional ``syncscope("<target-scope>")`` argument. The
``acquire`` and ``acq_rel`` orderings aren't valid on ``store`` instructions.
Atomic loads produce :ref:`defined <memmodel>` results when they may see
multiple atomic stores. The type of the pointee must be an integer, pointer, or
floating-point type whose bit width is a power of two greater than or equal to
multiple atomic stores. The type of the pointee must be an integer, pointer,
floating-point, or vector type whose bit width is a power of two greater than or equal to
eight and less than or equal to a target-specific size limit. ``align`` must be
explicitly specified on atomic stores. Note: if the alignment is not greater or
equal to the size of the `<value>` type, the atomic operation is likely to
Expand Down
1 change: 1 addition & 0 deletions llvm/docs/ReleaseNotes.md
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ Changes to the LLVM IR
removed:

* `mul`
* A `load atomic` may now be used with vector types.

* Updated semantics of `llvm.type.checked.load.relative` to match that of
`llvm.load.relative`.
Expand Down
14 changes: 14 additions & 0 deletions llvm/include/llvm/Target/TargetSelectionDAG.td
Original file line number Diff line number Diff line change
Expand Up @@ -1904,6 +1904,20 @@ def atomic_load_64 :
let MemoryVT = i64;
}

def atomic_load_128_v2i64 :
PatFrag<(ops node:$ptr),
(atomic_load node:$ptr)> {
let IsAtomic = true;
let MemoryVT = v2i64;
}

def atomic_load_128_v4i32 :
PatFrag<(ops node:$ptr),
(atomic_load node:$ptr)> {
let IsAtomic = true;
let MemoryVT = v4i32;
}

def atomic_load_nonext_8 :
PatFrag<(ops node:$ptr), (atomic_load_nonext node:$ptr)> {
let IsAtomic = true; // FIXME: Should be IsLoad and/or IsAtomic?
Expand Down
15 changes: 12 additions & 3 deletions llvm/lib/CodeGen/AtomicExpandPass.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2066,9 +2066,18 @@ bool AtomicExpandImpl::expandAtomicOpToLibcall(
I->replaceAllUsesWith(V);
} else if (HasResult) {
Value *V;
if (UseSizedLibcall)
V = Builder.CreateBitOrPointerCast(Result, I->getType());
else {
if (UseSizedLibcall) {
// Add bitcasts from Result's scalar type to I's <n x ptr> vector type
auto *PtrTy = dyn_cast<PointerType>(I->getType()->getScalarType());
auto *VTy = dyn_cast<VectorType>(I->getType());
if (VTy && PtrTy && !Result->getType()->isVectorTy()) {
unsigned AS = PtrTy->getAddressSpace();
Value *BC = Builder.CreateBitCast(
Result, VTy->getWithNewType(DL.getIntPtrType(Ctx, AS)));
V = Builder.CreateIntToPtr(BC, I->getType());
} else
V = Builder.CreateBitOrPointerCast(Result, I->getType());
} else {
V = Builder.CreateAlignedLoad(I->getType(), AllocaResult,
AllocaAlignment);
Builder.CreateLifetimeEnd(AllocaResult, SizeVal64);
Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
Original file line number Diff line number Diff line change
Expand Up @@ -879,6 +879,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
SDValue ScalarizeVecRes_UnaryOpWithExtraInput(SDNode *N);
SDValue ScalarizeVecRes_INSERT_VECTOR_ELT(SDNode *N);
SDValue ScalarizeVecRes_LOAD(LoadSDNode *N);
SDValue ScalarizeVecRes_ATOMIC_LOAD(AtomicSDNode *N);
SDValue ScalarizeVecRes_SCALAR_TO_VECTOR(SDNode *N);
SDValue ScalarizeVecRes_VSELECT(SDNode *N);
SDValue ScalarizeVecRes_SELECT(SDNode *N);
Expand Down Expand Up @@ -1067,6 +1068,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
SDValue WidenVecRes_EXTRACT_SUBVECTOR(SDNode* N);
SDValue WidenVecRes_INSERT_SUBVECTOR(SDNode *N);
SDValue WidenVecRes_INSERT_VECTOR_ELT(SDNode* N);
SDValue WidenVecRes_ATOMIC_LOAD(AtomicSDNode *N);
SDValue WidenVecRes_LOAD(SDNode* N);
SDValue WidenVecRes_VP_LOAD(VPLoadSDNode *N);
SDValue WidenVecRes_VP_STRIDED_LOAD(VPStridedLoadSDNode *N);
Expand Down
112 changes: 89 additions & 23 deletions llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,9 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {
R = ScalarizeVecRes_UnaryOpWithExtraInput(N);
break;
case ISD::INSERT_VECTOR_ELT: R = ScalarizeVecRes_INSERT_VECTOR_ELT(N); break;
case ISD::ATOMIC_LOAD:
R = ScalarizeVecRes_ATOMIC_LOAD(cast<AtomicSDNode>(N));
break;
case ISD::LOAD: R = ScalarizeVecRes_LOAD(cast<LoadSDNode>(N));break;
case ISD::SCALAR_TO_VECTOR: R = ScalarizeVecRes_SCALAR_TO_VECTOR(N); break;
case ISD::SIGN_EXTEND_INREG: R = ScalarizeVecRes_InregOp(N); break;
Expand Down Expand Up @@ -455,6 +458,18 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_INSERT_VECTOR_ELT(SDNode *N) {
return Op;
}

SDValue DAGTypeLegalizer::ScalarizeVecRes_ATOMIC_LOAD(AtomicSDNode *N) {
SDValue Result = DAG.getAtomicLoad(
ISD::NON_EXTLOAD, SDLoc(N), N->getMemoryVT().getVectorElementType(),
N->getValueType(0).getVectorElementType(), N->getChain(), N->getBasePtr(),
N->getMemOperand());

// Legalize the chain result - switch anything that used the old chain to
// use the new one.
ReplaceValueWith(SDValue(N, 1), Result.getValue(1));
return Result;
}

SDValue DAGTypeLegalizer::ScalarizeVecRes_LOAD(LoadSDNode *N) {
assert(N->isUnindexed() && "Indexed vector load?");

Expand Down Expand Up @@ -4607,6 +4622,9 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
break;
case ISD::EXTRACT_SUBVECTOR: Res = WidenVecRes_EXTRACT_SUBVECTOR(N); break;
case ISD::INSERT_VECTOR_ELT: Res = WidenVecRes_INSERT_VECTOR_ELT(N); break;
case ISD::ATOMIC_LOAD:
Res = WidenVecRes_ATOMIC_LOAD(cast<AtomicSDNode>(N));
break;
case ISD::LOAD: Res = WidenVecRes_LOAD(N); break;
case ISD::STEP_VECTOR:
case ISD::SPLAT_VECTOR:
Expand Down Expand Up @@ -5988,6 +6006,74 @@ SDValue DAGTypeLegalizer::WidenVecRes_INSERT_VECTOR_ELT(SDNode *N) {
N->getOperand(1), N->getOperand(2));
}

/// Either return the same load or provide appropriate casts
/// from the load and return that.
static SDValue coerceLoadedValue(SDValue LdOp, EVT FirstVT, EVT WidenVT,
TypeSize LdWidth, TypeSize FirstVTWidth,
SDLoc dl, SelectionDAG &DAG) {
assert(TypeSize::isKnownLE(LdWidth, FirstVTWidth));
TypeSize WidenWidth = WidenVT.getSizeInBits();
if (!FirstVT.isVector()) {
unsigned NumElts =
WidenWidth.getFixedValue() / FirstVTWidth.getFixedValue();
EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), FirstVT, NumElts);
SDValue VecOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NewVecVT, LdOp);
return DAG.getNode(ISD::BITCAST, dl, WidenVT, VecOp);
}
assert(FirstVT == WidenVT);
return LdOp;
}

static std::optional<EVT> findMemType(SelectionDAG &DAG,
const TargetLowering &TLI, unsigned Width,
EVT WidenVT, unsigned Align,
unsigned WidenEx);

SDValue DAGTypeLegalizer::WidenVecRes_ATOMIC_LOAD(AtomicSDNode *LD) {
EVT WidenVT =
TLI.getTypeToTransformTo(*DAG.getContext(), LD->getValueType(0));
EVT LdVT = LD->getMemoryVT();
SDLoc dl(LD);
assert(LdVT.isVector() && WidenVT.isVector() && "Expected vectors");
assert(LdVT.isScalableVector() == WidenVT.isScalableVector() &&
"Must be scalable");
assert(LdVT.getVectorElementType() == WidenVT.getVectorElementType() &&
"Expected equivalent element types");

// Load information
SDValue Chain = LD->getChain();
SDValue BasePtr = LD->getBasePtr();
MachineMemOperand::Flags MMOFlags = LD->getMemOperand()->getFlags();
AAMDNodes AAInfo = LD->getAAInfo();

TypeSize LdWidth = LdVT.getSizeInBits();
TypeSize WidenWidth = WidenVT.getSizeInBits();
TypeSize WidthDiff = WidenWidth - LdWidth;

// Find the vector type that can load from.
std::optional<EVT> FirstVT =
findMemType(DAG, TLI, LdWidth.getKnownMinValue(), WidenVT, /*LdAlign=*/0,
WidthDiff.getKnownMinValue());

if (!FirstVT)
return SDValue();

SmallVector<EVT, 8> MemVTs;
TypeSize FirstVTWidth = FirstVT->getSizeInBits();

SDValue LdOp = DAG.getAtomicLoad(ISD::NON_EXTLOAD, dl, *FirstVT, *FirstVT,
Chain, BasePtr, LD->getMemOperand());

// Load the element with one instruction.
SDValue Result = coerceLoadedValue(LdOp, *FirstVT, WidenVT, LdWidth,
FirstVTWidth, dl, DAG);

// Modified the chain - switch anything that used the old chain to use
// the new one.
ReplaceValueWith(SDValue(LD, 1), LdOp.getValue(1));
return Result;
}

SDValue DAGTypeLegalizer::WidenVecRes_LOAD(SDNode *N) {
LoadSDNode *LD = cast<LoadSDNode>(N);
ISD::LoadExtType ExtType = LD->getExtensionType();
Expand Down Expand Up @@ -7879,29 +7965,9 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain,
LdChain.push_back(LdOp.getValue(1));

// Check if we can load the element with one instruction.
if (MemVTs.empty()) {
assert(TypeSize::isKnownLE(LdWidth, FirstVTWidth));
if (!FirstVT->isVector()) {
unsigned NumElts =
WidenWidth.getFixedValue() / FirstVTWidth.getFixedValue();
EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), *FirstVT, NumElts);
SDValue VecOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NewVecVT, LdOp);
return DAG.getNode(ISD::BITCAST, dl, WidenVT, VecOp);
}
if (FirstVT == WidenVT)
return LdOp;

// TODO: We don't currently have any tests that exercise this code path.
assert(WidenWidth.getFixedValue() % FirstVTWidth.getFixedValue() == 0);
unsigned NumConcat =
WidenWidth.getFixedValue() / FirstVTWidth.getFixedValue();
SmallVector<SDValue, 16> ConcatOps(NumConcat);
SDValue UndefVal = DAG.getUNDEF(*FirstVT);
ConcatOps[0] = LdOp;
for (unsigned i = 1; i != NumConcat; ++i)
ConcatOps[i] = UndefVal;
return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, ConcatOps);
}
if (MemVTs.empty())
return coerceLoadedValue(LdOp, *FirstVT, WidenVT, LdWidth, FirstVTWidth, dl,
DAG);

// Load vector by using multiple loads from largest vector to scalar.
SmallVector<SDValue, 16> LdOps;
Expand Down
14 changes: 8 additions & 6 deletions llvm/lib/IR/Verifier.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4323,9 +4323,10 @@ void Verifier::visitLoadInst(LoadInst &LI) {
Check(LI.getOrdering() != AtomicOrdering::Release &&
LI.getOrdering() != AtomicOrdering::AcquireRelease,
"Load cannot have Release ordering", &LI);
Check(ElTy->isIntOrPtrTy() || ElTy->isFloatingPointTy(),
"atomic load operand must have integer, pointer, or floating point "
"type!",
Check(ElTy->getScalarType()->isIntOrPtrTy() ||
ElTy->getScalarType()->isFloatingPointTy(),
"atomic load operand must have integer, pointer, floating point, "
"or vector type!",
ElTy, &LI);
checkAtomicMemAccessSize(ElTy, &LI);
} else {
Expand All @@ -4349,9 +4350,10 @@ void Verifier::visitStoreInst(StoreInst &SI) {
Check(SI.getOrdering() != AtomicOrdering::Acquire &&
SI.getOrdering() != AtomicOrdering::AcquireRelease,
"Store cannot have Acquire ordering", &SI);
Check(ElTy->isIntOrPtrTy() || ElTy->isFloatingPointTy(),
"atomic store operand must have integer, pointer, or floating point "
"type!",
Check(ElTy->getScalarType()->isIntOrPtrTy() ||
ElTy->getScalarType()->isFloatingPointTy(),
"atomic store operand must have integer, pointer, floating point, "
"or vector type!",
ElTy, &SI);
checkAtomicMemAccessSize(ElTy, &SI);
} else {
Expand Down
11 changes: 11 additions & 0 deletions llvm/lib/Target/X86/X86ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2653,6 +2653,10 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(Op, MVT::f32, Promote);
}

setOperationPromotedToType(ISD::ATOMIC_LOAD, MVT::f16, MVT::i16);
setOperationPromotedToType(ISD::ATOMIC_LOAD, MVT::f32, MVT::i32);
setOperationPromotedToType(ISD::ATOMIC_LOAD, MVT::f64, MVT::i64);

// We have target-specific dag combine patterns for the following nodes:
setTargetDAGCombine({ISD::VECTOR_SHUFFLE,
ISD::SCALAR_TO_VECTOR,
Expand Down Expand Up @@ -32066,6 +32070,13 @@ X86TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
}
}

TargetLowering::AtomicExpansionKind
X86TargetLowering::shouldCastAtomicLoadInIR(LoadInst *LI) const {
if (LI->getType()->getScalarType()->isFloatingPointTy())
return AtomicExpansionKind::CastToInteger;
return AtomicExpansionKind::None;
}

LoadInst *
X86TargetLowering::lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const {
unsigned NativeWidth = Subtarget.is64Bit() ? 64 : 32;
Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/Target/X86/X86ISelLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -1839,6 +1839,8 @@ namespace llvm {
shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
TargetLoweringBase::AtomicExpansionKind
shouldExpandLogicAtomicRMWInIR(AtomicRMWInst *AI) const;
TargetLoweringBase::AtomicExpansionKind
shouldCastAtomicLoadInIR(LoadInst *LI) const override;
void emitBitTestAtomicRMWIntrinsic(AtomicRMWInst *AI) const override;
void emitCmpArithAtomicRMWIntrinsic(AtomicRMWInst *AI) const override;

Expand Down
12 changes: 12 additions & 0 deletions llvm/lib/Target/X86/X86InstrCompiler.td
Original file line number Diff line number Diff line change
Expand Up @@ -1204,6 +1204,18 @@ def : Pat<(i16 (atomic_load_nonext_16 addr:$src)), (MOV16rm addr:$src)>;
def : Pat<(i32 (atomic_load_nonext_32 addr:$src)), (MOV32rm addr:$src)>;
def : Pat<(i64 (atomic_load_nonext_64 addr:$src)), (MOV64rm addr:$src)>;

def : Pat<(v4i32 (scalar_to_vector (i32 (zext (i16 (atomic_load_16 addr:$src)))))),
(MOVDI2PDIrm addr:$src)>; // load atomic <2 x i8>
def : Pat<(v4i32 (scalar_to_vector (i32 (atomic_load_32 addr:$src)))),
(MOVDI2PDIrm addr:$src)>; // load atomic <2 x i16>
def : Pat<(v2i64 (scalar_to_vector (i64 (atomic_load_64 addr:$src)))),
(MOV64toPQIrm addr:$src)>; // load atomic <2 x i32,float>

def : Pat<(v2i64 (atomic_load_128_v2i64 addr:$src)),
(VMOVAPDrm addr:$src)>; // load atomic <2 x i64>
def : Pat<(v4i32 (atomic_load_128_v4i32 addr:$src)),
(VMOVAPDrm addr:$src)>; // load atomic <4 x i32>

// Floating point loads/stores.
def : Pat<(atomic_store_32 (i32 (bitconvert (f32 FR32:$src))), addr:$dst),
(MOVSSmr addr:$dst, FR32:$src)>, Requires<[UseSSE1]>;
Expand Down
19 changes: 19 additions & 0 deletions llvm/test/Assembler/atomic.ll
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,25 @@ define void @f(ptr %x) {
; CHECK: atomicrmw volatile usub_sat ptr %x, i32 10 syncscope("agent") monotonic
atomicrmw volatile usub_sat ptr %x, i32 10 syncscope("agent") monotonic

; CHECK : load atomic <1 x i32>, ptr %x unordered, align 4
load atomic <1 x i32>, ptr %x unordered, align 4
; CHECK : store atomic <1 x i32> splat (i32 3), ptr %x release, align 4
store atomic <1 x i32> <i32 3>, ptr %x release, align 4
; CHECK : load atomic <2 x i32>, ptr %x unordered, align 4
load atomic <2 x i32>, ptr %x unordered, align 4
; CHECK : store atomic <2 x i32> <i32 3, i32 4>, ptr %x release, align 4
store atomic <2 x i32> <i32 3, i32 4>, ptr %x release, align 4

; CHECK : load atomic <2 x ptr>, ptr %x unordered, align 4
load atomic <2 x ptr>, ptr %x unordered, align 4
; CHECK : store atomic <2 x ptr> zeroinitializer, ptr %x release, align 4
store atomic <2 x ptr> zeroinitializer, ptr %x release, align 4

; CHECK : load atomic <2 x float>, ptr %x unordered, align 4
load atomic <2 x float>, ptr %x unordered, align 4
; CHECK : store atomic <2 x float> <float 3.0, float 4.0>, ptr %x release, align 4
store atomic <2 x float> <float 3.0, float 4.0>, ptr %x release, align 4

; CHECK: fence syncscope("singlethread") release
fence syncscope("singlethread") release
; CHECK: fence seq_cst
Expand Down
51 changes: 51 additions & 0 deletions llvm/test/CodeGen/ARM/atomic-load-store.ll
Original file line number Diff line number Diff line change
Expand Up @@ -983,3 +983,54 @@ define void @store_atomic_f64__seq_cst(ptr %ptr, double %val1) {
store atomic double %val1, ptr %ptr seq_cst, align 8
ret void
}

define <1 x ptr> @atomic_vec1_ptr(ptr %x) #0 {
; ARM-LABEL: atomic_vec1_ptr:
; ARM: @ %bb.0:
; ARM-NEXT: ldr r0, [r0]
; ARM-NEXT: dmb ish
; ARM-NEXT: bx lr
;
; ARMOPTNONE-LABEL: atomic_vec1_ptr:
; ARMOPTNONE: @ %bb.0:
; ARMOPTNONE-NEXT: ldr r0, [r0]
; ARMOPTNONE-NEXT: dmb ish
; ARMOPTNONE-NEXT: bx lr
;
; THUMBTWO-LABEL: atomic_vec1_ptr:
; THUMBTWO: @ %bb.0:
; THUMBTWO-NEXT: ldr r0, [r0]
; THUMBTWO-NEXT: dmb ish
; THUMBTWO-NEXT: bx lr
;
; THUMBONE-LABEL: atomic_vec1_ptr:
; THUMBONE: @ %bb.0:
; THUMBONE-NEXT: push {r7, lr}
; THUMBONE-NEXT: movs r1, #0
; THUMBONE-NEXT: mov r2, r1
; THUMBONE-NEXT: bl __sync_val_compare_and_swap_4
; THUMBONE-NEXT: pop {r7, pc}
;
; ARMV4-LABEL: atomic_vec1_ptr:
; ARMV4: @ %bb.0:
; ARMV4-NEXT: push {r11, lr}
; ARMV4-NEXT: mov r1, #2
; ARMV4-NEXT: bl __atomic_load_4
; ARMV4-NEXT: pop {r11, lr}
; ARMV4-NEXT: mov pc, lr
;
; ARMV6-LABEL: atomic_vec1_ptr:
; ARMV6: @ %bb.0:
; ARMV6-NEXT: ldr r0, [r0]
; ARMV6-NEXT: mov r1, #0
; ARMV6-NEXT: mcr p15, #0, r1, c7, c10, #5
; ARMV6-NEXT: bx lr
;
; THUMBM-LABEL: atomic_vec1_ptr:
; THUMBM: @ %bb.0:
; THUMBM-NEXT: ldr r0, [r0]
; THUMBM-NEXT: dmb sy
; THUMBM-NEXT: bx lr
%ret = load atomic <1 x ptr>, ptr %x acquire, align 4
ret <1 x ptr> %ret
}
Loading
Loading