Skip to content

Commit c97a107

Browse files
committed
[IR][TBAA] Allow multiple fileds with same offset in TBAA struct-path
Support for multiple fields to have same offset in TBAA struct-path metadata nodes. Primary goal is to support union-like structures to participate in TBAA struct-path resolution.
1 parent 923f039 commit c97a107

File tree

5 files changed

+162
-66
lines changed

5 files changed

+162
-66
lines changed

llvm/include/llvm/IR/Verifier.h

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -59,8 +59,15 @@ class TBAAVerifier {
5959

6060
/// \name Helper functions used by \c visitTBAAMetadata.
6161
/// @{
62-
MDNode *getFieldNodeFromTBAABaseNode(Instruction &I, const MDNode *BaseNode,
63-
APInt &Offset, bool IsNewFormat);
62+
std::vector<MDNode *> getFieldNodeFromTBAABaseNode(Instruction &I,
63+
const MDNode *BaseNode,
64+
APInt &Offset,
65+
bool IsNewFormat);
66+
bool findAccessTypeNode(Instruction &I,
67+
SmallPtrSetImpl<const MDNode *> &StructPath,
68+
APInt Offset, bool IsNewFormat,
69+
const MDNode *AccessType, const MDNode *BaseNode,
70+
const MDNode *MD);
6471
TBAAVerifier::TBAABaseNodeSummary verifyTBAABaseNode(Instruction &I,
6572
const MDNode *BaseNode,
6673
bool IsNewFormat);

llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp

Lines changed: 34 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,7 @@
121121
#include "llvm/Support/ErrorHandling.h"
122122
#include <cassert>
123123
#include <cstdint>
124+
#include <stack>
124125

125126
using namespace llvm;
126127

@@ -299,21 +300,22 @@ class TBAAStructTypeNode {
299300
return TBAAStructTypeNode(TypeNode);
300301
}
301302

302-
/// Get this TBAAStructTypeNode's field in the type DAG with
303+
/// Get this TBAAStructTypeNode's fields in the type DAG with
303304
/// given offset. Update the offset to be relative to the field type.
304-
TBAAStructTypeNode getField(uint64_t &Offset) const {
305+
/// There could be multiple fields with same offset.
306+
std::vector<TBAAStructTypeNode> getField(uint64_t &Offset) const {
305307
bool NewFormat = isNewFormat();
306308
const ArrayRef<MDOperand> Operands = Node->operands();
307309
const unsigned NumOperands = Operands.size();
308310

309311
if (NewFormat) {
310312
// New-format root and scalar type nodes have no fields.
311313
if (NumOperands < 6)
312-
return TBAAStructTypeNode();
314+
return {TBAAStructTypeNode()};
313315
} else {
314316
// Parent can be omitted for the root node.
315317
if (NumOperands < 2)
316-
return TBAAStructTypeNode();
318+
return {TBAAStructTypeNode()};
317319

318320
// Fast path for a scalar type node and a struct type node with a single
319321
// field.
@@ -325,8 +327,8 @@ class TBAAStructTypeNode {
325327
Offset -= Cur;
326328
MDNode *P = dyn_cast_or_null<MDNode>(Operands[1]);
327329
if (!P)
328-
return TBAAStructTypeNode();
329-
return TBAAStructTypeNode(P);
330+
return {TBAAStructTypeNode()};
331+
return {TBAAStructTypeNode(P)};
330332
}
331333
}
332334

@@ -336,6 +338,8 @@ class TBAAStructTypeNode {
336338
unsigned NumOpsPerField = NewFormat ? 3 : 2;
337339
unsigned TheIdx = 0;
338340

341+
std::vector<TBAAStructTypeNode> Ret;
342+
339343
for (unsigned Idx = FirstFieldOpNo; Idx < NumOperands;
340344
Idx += NumOpsPerField) {
341345
uint64_t Cur =
@@ -353,10 +357,20 @@ class TBAAStructTypeNode {
353357
uint64_t Cur =
354358
mdconst::extract<ConstantInt>(Operands[TheIdx + 1])->getZExtValue();
355359
Offset -= Cur;
360+
361+
// Collect all fields that have right offset.
356362
MDNode *P = dyn_cast_or_null<MDNode>(Operands[TheIdx]);
357-
if (!P)
358-
return TBAAStructTypeNode();
359-
return TBAAStructTypeNode(P);
363+
Ret.emplace_back(P ? TBAAStructTypeNode(P) : TBAAStructTypeNode());
364+
365+
while (TheIdx > FirstFieldOpNo) {
366+
TheIdx -= NumOpsPerField;
367+
auto Val = mdconst::extract<ConstantInt>(Operands[TheIdx + 1]);
368+
if (Cur != Val->getZExtValue())
369+
break;
370+
MDNode *P = dyn_cast_or_null<MDNode>(Operands[TheIdx]);
371+
P ? Ret.emplace_back(P) : Ret.emplace_back();
372+
}
373+
return Ret;
360374
}
361375
};
362376

@@ -599,11 +613,19 @@ static bool mayBeAccessToSubobjectOf(TBAAStructTagNode BaseTag,
599613
// from the base type, follow the edge with the correct offset in the type DAG
600614
// and adjust the offset until we reach the field type or until we reach the
601615
// access type.
616+
// If multiple fields have same offset in some base type, then scan each such
617+
// field.
602618
bool NewFormat = BaseTag.isNewFormat();
603619
TBAAStructTypeNode BaseType(BaseTag.getBaseType());
604620
uint64_t OffsetInBase = BaseTag.getOffset();
605621

622+
SmallVector<std::pair<TBAAStructTypeNode, uint64_t>, 4> ToCheck;
623+
ToCheck.emplace_back(BaseType, OffsetInBase);
606624
for (;;) {
625+
assert(!ToCheck.empty() && "check list should not be empty");
626+
std::tie(BaseType, OffsetInBase) = ToCheck.back();
627+
ToCheck.pop_back();
628+
607629
// In the old format there is no distinction between fields and parent
608630
// types, so in this case we consider all nodes up to the root.
609631
if (!BaseType.getNode()) {
@@ -627,7 +649,9 @@ static bool mayBeAccessToSubobjectOf(TBAAStructTagNode BaseTag,
627649

628650
// Follow the edge with the correct offset. Offset will be adjusted to
629651
// be relative to the field type.
630-
BaseType = BaseType.getField(OffsetInBase);
652+
for (auto &&F : BaseType.getField(OffsetInBase)) {
653+
ToCheck.emplace_back(F, OffsetInBase);
654+
}
631655
}
632656

633657
// If the base object has a direct or indirect field of the subobject's type,

llvm/lib/IR/Verifier.cpp

Lines changed: 94 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -6945,48 +6945,56 @@ bool TBAAVerifier::isValidScalarTBAANode(const MDNode *MD) {
69456945
return Result;
69466946
}
69476947

6948-
/// Returns the field node at the offset \p Offset in \p BaseNode. Update \p
6949-
/// Offset in place to be the offset within the field node returned.
6948+
/// Returns one or several field nodes at the offset \p Offset in \p BaseNode.
6949+
/// Returns empty vector if \p BaseNode has no fields with specified offset.
6950+
/// Update \p Offset in place to be the offset within the field node returned.
69506951
///
69516952
/// We assume we've okayed \p BaseNode via \c verifyTBAABaseNode.
6952-
MDNode *TBAAVerifier::getFieldNodeFromTBAABaseNode(Instruction &I,
6953-
const MDNode *BaseNode,
6954-
APInt &Offset,
6955-
bool IsNewFormat) {
6953+
std::vector<MDNode *> TBAAVerifier::getFieldNodeFromTBAABaseNode(
6954+
Instruction &I, const MDNode *BaseNode, APInt &Offset, bool IsNewFormat) {
69566955
assert(BaseNode->getNumOperands() >= 2 && "Invalid base node!");
69576956

69586957
// Scalar nodes have only one possible "field" -- their parent in the access
69596958
// hierarchy. Offset must be zero at this point, but our caller is supposed
69606959
// to check that.
69616960
if (BaseNode->getNumOperands() == 2)
6962-
return cast<MDNode>(BaseNode->getOperand(1));
6961+
return {cast<MDNode>(BaseNode->getOperand(1))};
69636962

69646963
unsigned FirstFieldOpNo = IsNewFormat ? 3 : 1;
69656964
unsigned NumOpsPerField = IsNewFormat ? 3 : 2;
6965+
6966+
unsigned LastIdx = BaseNode->getNumOperands() - NumOpsPerField;
69666967
for (unsigned Idx = FirstFieldOpNo; Idx < BaseNode->getNumOperands();
69676968
Idx += NumOpsPerField) {
69686969
auto *OffsetEntryCI =
69696970
mdconst::extract<ConstantInt>(BaseNode->getOperand(Idx + 1));
69706971
if (OffsetEntryCI->getValue().ugt(Offset)) {
69716972
if (Idx == FirstFieldOpNo) {
6972-
CheckFailed("Could not find TBAA parent in struct type node", &I,
6973-
BaseNode, &Offset);
6974-
return nullptr;
6973+
return {};
69756974
}
69766975

6977-
unsigned PrevIdx = Idx - NumOpsPerField;
6978-
auto *PrevOffsetEntryCI =
6979-
mdconst::extract<ConstantInt>(BaseNode->getOperand(PrevIdx + 1));
6980-
Offset -= PrevOffsetEntryCI->getValue();
6981-
return cast<MDNode>(BaseNode->getOperand(PrevIdx));
6976+
LastIdx = Idx - NumOpsPerField;
6977+
break;
69826978
}
69836979
}
69846980

6985-
unsigned LastIdx = BaseNode->getNumOperands() - NumOpsPerField;
69866981
auto *LastOffsetEntryCI = mdconst::extract<ConstantInt>(
69876982
BaseNode->getOperand(LastIdx + 1));
6988-
Offset -= LastOffsetEntryCI->getValue();
6989-
return cast<MDNode>(BaseNode->getOperand(LastIdx));
6983+
auto LastOffsetVal = LastOffsetEntryCI->getValue();
6984+
Offset -= LastOffsetVal;
6985+
6986+
std::vector<MDNode *> Ret;
6987+
Ret.emplace_back(cast<MDNode>(BaseNode->getOperand(LastIdx)));
6988+
while (LastIdx > FirstFieldOpNo) {
6989+
LastIdx -= NumOpsPerField;
6990+
LastOffsetEntryCI =
6991+
mdconst::extract<ConstantInt>(BaseNode->getOperand(LastIdx + 1));
6992+
if (LastOffsetEntryCI->getValue() != LastOffsetVal)
6993+
break;
6994+
Ret.emplace_back(cast<MDNode>(BaseNode->getOperand(LastIdx)));
6995+
}
6996+
6997+
return Ret;
69906998
}
69916999

69927000
static bool isNewFormatTBAATypeNode(llvm::MDNode *Type) {
@@ -7063,47 +7071,84 @@ bool TBAAVerifier::visitTBAAMetadata(Instruction &I, const MDNode *MD) {
70637071
CheckTBAA(OffsetCI, "Offset must be constant integer", &I, MD);
70647072

70657073
APInt Offset = OffsetCI->getValue();
7066-
bool SeenAccessTypeInPath = false;
70677074

7068-
SmallPtrSet<MDNode *, 4> StructPath;
7075+
SmallPtrSet<const MDNode *, 4> StructPath;
70697076

7070-
for (/* empty */; BaseNode && !IsRootTBAANode(BaseNode);
7071-
BaseNode = getFieldNodeFromTBAABaseNode(I, BaseNode, Offset,
7072-
IsNewFormat)) {
7073-
if (!StructPath.insert(BaseNode).second) {
7074-
CheckFailed("Cycle detected in struct path", &I, MD);
7075-
return false;
7076-
}
7077+
auto &&[Invalid, BaseNodeBitWidth] =
7078+
verifyTBAABaseNode(I, BaseNode, IsNewFormat);
70777079

7078-
bool Invalid;
7079-
unsigned BaseNodeBitWidth;
7080-
std::tie(Invalid, BaseNodeBitWidth) = verifyTBAABaseNode(I, BaseNode,
7081-
IsNewFormat);
7080+
// If the base node is invalid in itself, then we've already printed all the
7081+
// errors we wanted to print.
7082+
if (Invalid)
7083+
return false;
70827084

7083-
// If the base node is invalid in itself, then we've already printed all the
7084-
// errors we wanted to print.
7085-
if (Invalid)
7086-
return false;
7085+
bool SeenAccessTypeInPath = BaseNode == AccessType;
7086+
if (SeenAccessTypeInPath) {
7087+
CheckTBAA(Offset == 0, "Offset not zero at the point of scalar access", &I,
7088+
MD, &Offset);
7089+
if (IsNewFormat)
7090+
return true;
7091+
}
70877092

7088-
SeenAccessTypeInPath |= BaseNode == AccessType;
7093+
CheckTBAA(findAccessTypeNode(I, StructPath, Offset, IsNewFormat, AccessType,
7094+
BaseNode, MD) ||
7095+
SeenAccessTypeInPath,
7096+
"Did not see access type in access path!", &I, MD);
7097+
return true;
7098+
}
70897099

7090-
if (isValidScalarTBAANode(BaseNode) || BaseNode == AccessType)
7091-
CheckTBAA(Offset == 0, "Offset not zero at the point of scalar access",
7092-
&I, MD, &Offset);
7100+
bool TBAAVerifier::findAccessTypeNode(
7101+
Instruction &I, SmallPtrSetImpl<const MDNode *> &StructPath, APInt Offset,
7102+
bool IsNewFormat, const MDNode *AccessType, const MDNode *BaseNode,
7103+
const MDNode *MD) {
7104+
if (!BaseNode || IsRootTBAANode(BaseNode))
7105+
return false;
70937106

7094-
CheckTBAA(BaseNodeBitWidth == Offset.getBitWidth() ||
7095-
(BaseNodeBitWidth == 0 && Offset == 0) ||
7096-
(IsNewFormat && BaseNodeBitWidth == ~0u),
7097-
"Access bit-width not the same as description bit-width", &I, MD,
7098-
BaseNodeBitWidth, Offset.getBitWidth());
7107+
auto &&[Invalid, BaseNodeBitWidth] =
7108+
verifyTBAABaseNode(I, BaseNode, IsNewFormat);
70997109

7100-
if (IsNewFormat && SeenAccessTypeInPath)
7101-
break;
7110+
// If the base node is invalid in itself, then we've already printed all the
7111+
// errors we wanted to print.
7112+
if (Invalid)
7113+
return false;
7114+
7115+
// Offset at point of scalar access must be zero. Skip mismatched nodes.
7116+
if ((isValidScalarTBAANode(BaseNode) || BaseNode == AccessType) &&
7117+
Offset != 0)
7118+
return false;
7119+
7120+
CheckTBAA(BaseNodeBitWidth == Offset.getBitWidth() ||
7121+
(BaseNodeBitWidth == 0 && Offset == 0) ||
7122+
(IsNewFormat && BaseNodeBitWidth == ~0u),
7123+
"Access bit-width not the same as description bit-width", &I, MD,
7124+
BaseNodeBitWidth, Offset.getBitWidth());
7125+
7126+
bool SeenAccessTypeInPath = (BaseNode == AccessType && Offset == 0);
7127+
7128+
if (IsNewFormat && SeenAccessTypeInPath)
7129+
return true;
7130+
7131+
auto ProbableNodes =
7132+
getFieldNodeFromTBAABaseNode(I, BaseNode, Offset, IsNewFormat);
7133+
7134+
if (!StructPath.insert(BaseNode).second) {
7135+
CheckFailed("Cycle detected in struct path", &I, MD);
7136+
return false;
71027137
}
71037138

7104-
CheckTBAA(SeenAccessTypeInPath, "Did not see access type in access path!", &I,
7105-
MD);
7106-
return true;
7139+
for (auto *PN : ProbableNodes) {
7140+
if (!PN || IsRootTBAANode(PN))
7141+
continue;
7142+
7143+
SmallPtrSet<const MDNode *, 4> StructPathCopy;
7144+
StructPathCopy.insert(StructPath.begin(), StructPath.end());
7145+
7146+
if (findAccessTypeNode(I, StructPathCopy, Offset, IsNewFormat, AccessType,
7147+
PN, MD))
7148+
return true;
7149+
}
7150+
7151+
return SeenAccessTypeInPath;
71077152
}
71087153

71097154
char VerifierLegacyPass::ID = 0;

llvm/test/Analysis/TypeBasedAliasAnalysis/aggregates.ll

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,22 @@ entry:
105105
ret i32 %0
106106
}
107107

108+
; C vs. D => MayAlias.
109+
define i32 @f7(ptr %c, ptr %d) {
110+
entry:
111+
; CHECK-LABEL: f7
112+
; CHECK: MayAlias: store i16 7, {{.*}} <-> store i32 5,
113+
; OPT-LABEL: f7
114+
; OPT: store i32 5,
115+
; OPT: store i16 7,
116+
; OPT: load i32
117+
; OPT: ret i32
118+
store i32 5, ptr %c, align 4, !tbaa !18 ; TAG_Union_int
119+
store i16 7, ptr %d, align 4, !tbaa !17 ; TAG_Union_short
120+
%0 = load i32, ptr %c, align 4, !tbaa !18 ; TAG_Union_int
121+
ret i32 %0
122+
}
123+
108124
!0 = !{!"root"}
109125
!1 = !{!0, i64 1, !"char"}
110126
!2 = !{!1, i64 4, !"int"}
@@ -128,3 +144,7 @@ entry:
128144

129145
!14 = !{!4, i64 2, !"D", !11, i64 0, i64 2}
130146
!15 = !{!14, !14, i64 0, i64 2} ; TAG_D
147+
148+
!16 = !{!1, i64 2, !"Union", !11, i64 0, i64 2, !2, i64 0, i64 4}
149+
!17 = !{!16, !11, i64 0, i64 2} ; TAG_Union_short
150+
!18 = !{!16, !2, i64 0, i64 4} ; TAG_Union_int

llvm/test/Verifier/tbaa.ll

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -61,15 +61,15 @@ define void @f_1(ptr %ptr) {
6161
; CHECK: Cycle detected in struct path
6262
; CHECK-NEXT: store i32 0, ptr %ptr, align 4, !tbaa !{{[0-9]+}}
6363

64-
; CHECK: Offset not zero at the point of scalar access
64+
; CHECK: Did not see access type in access path
65+
; CHECK-NEXT: store i32 0, ptr %ptr, align 4, !tbaa !{{[0-9]+}}
66+
67+
; CHECK: Did not see access type in access path
6568
; CHECK-NEXT: store i32 1, ptr %ptr, align 4, !tbaa !{{[0-9]+}}
6669

67-
; CHECK: Offset not zero at the point of scalar access
70+
; CHECK: Did not see access type in access path
6871
; CHECK-NEXT: store i32 2, ptr %ptr, align 4, !tbaa !{{[0-9]+}}
6972

70-
; CHECK: Could not find TBAA parent in struct type node
71-
; CHECK-NEXT: store i32 3, ptr %ptr, align 4, !tbaa !{{[0-9]+}}
72-
7373
; CHECK: Did not see access type in access path!
7474
; CHECK-NEXT: store i32 3, ptr %ptr, align 4, !tbaa !{{[0-9]+}}
7575

0 commit comments

Comments
 (0)