Skip to content

Commit 661712f

Browse files
authored
Fix custom forward mode erasure (rust-lang#645)
* Fix custom forward mode erasure * Fix vector reverse add * Fix test * Bitcast order inv
1 parent 31e0273 commit 661712f

File tree

5 files changed

+243
-16
lines changed

5 files changed

+243
-16
lines changed

enzyme/Enzyme/AdjointGenerator.h

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -675,23 +675,23 @@ class AdjointGenerator
675675
Type *isfloat = I.getType()->isFPOrFPVectorTy()
676676
? I.getType()->getScalarType()
677677
: nullptr;
678-
if (!isfloat && type->isIntOrIntVectorTy()) {
679-
auto LoadSize = DL.getTypeSizeInBits(type) / 8;
678+
if (!isfloat && I.getType()->isIntOrIntVectorTy()) {
679+
auto LoadSize = DL.getTypeSizeInBits(I.getType()) / 8;
680680
ConcreteType vd = BaseType::Unknown;
681681
if (!OrigOffset)
682682
vd =
683683
TR.firstPointer(LoadSize, I.getOperand(0),
684684
/*errifnotfound*/ false, /*pointerIntSame*/ true);
685685
if (vd.isKnown())
686686
isfloat = vd.isFloat();
687-
else
687+
else {
688688
isfloat =
689689
TR.intType(LoadSize, &I, /*errIfNotFound*/ !looseTypeAnalysis)
690690
.isFloat();
691+
}
691692
}
692693

693694
if (isfloat) {
694-
695695
switch (Mode) {
696696
case DerivativeMode::ForwardModeSplit:
697697
case DerivativeMode::ForwardMode: {
@@ -8273,11 +8273,10 @@ class AdjointGenerator
82738273

82748274
if (normalReturn && normalReturn != newCall) {
82758275
assert(normalReturn->getType() == newCall->getType());
8276-
assert(Mode != DerivativeMode::ReverseModeGradient);
82778276
gutils->replaceAWithB(newCall, normalReturn);
82788277
gutils->erase(newCall);
8279-
}
8280-
eraseIfUnused(*orig);
8278+
} else
8279+
eraseIfUnused(*orig);
82818280
return;
82828281
}
82838282
}

enzyme/Enzyme/Enzyme.cpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -613,7 +613,6 @@ class Enzyme : public ModulePass {
613613
#endif
614614
{
615615
Value *res = CI->getArgOperand(i);
616-
617616
if (truei >= FT->getNumParams()) {
618617
if (!isa<MetadataAsValue>(res) &&
619618
(mode == DerivativeMode::ReverseModeGradient ||

enzyme/Enzyme/GradientUtils.h

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2405,21 +2405,25 @@ class DiffeGradientUtils : public GradientUtils {
24052405
auto rule1 = [&](Value *ptr) {
24062406
return BuilderM.CreateBitCast(
24072407
ptr, PointerType::get(
2408-
IntToFloatTy(dif->getType()),
2408+
IntToFloatTy(diffType),
24092409
cast<PointerType>(ptr->getType())->getAddressSpace()));
24102410
};
24112411

2412-
ptr = applyChainRule(diffType, BuilderM, rule1, ptr);
2412+
ptr = applyChainRule(
2413+
PointerType::get(
2414+
IntToFloatTy(diffType),
2415+
cast<PointerType>(origptr->getType())->getAddressSpace()),
2416+
BuilderM, rule1, ptr);
24132417

24142418
auto rule2 = [&](Value *dif) {
2415-
return BuilderM.CreateBitCast(dif, IntToFloatTy(dif->getType()));
2419+
return BuilderM.CreateBitCast(dif, IntToFloatTy(diffType));
24162420
};
24172421

2418-
dif = applyChainRule(diffType, BuilderM, rule2, dif);
2422+
dif = applyChainRule(IntToFloatTy(diffType), BuilderM, rule2, dif);
24192423
}
24202424
#if LLVM_VERSION_MAJOR >= 9
24212425
AtomicRMWInst::BinOp op = AtomicRMWInst::FAdd;
2422-
if (auto vt = dyn_cast<VectorType>(dif->getType())) {
2426+
if (auto vt = dyn_cast<VectorType>(diffType)) {
24232427
#if LLVM_VERSION_MAJOR >= 12
24242428
assert(!vt->getElementCount().isScalable());
24252429
size_t numElems = vt->getElementCount().getKnownMinValue();
@@ -2499,7 +2503,7 @@ class DiffeGradientUtils : public GradientUtils {
24992503
};
25002504
old = applyChainRule(diffType, BuilderM, rule, ptr);
25012505
} else {
2502-
Type *tys[] = {dif->getType(), origptr->getType()};
2506+
Type *tys[] = {diffType, origptr->getType()};
25032507
auto F = Intrinsic::getDeclaration(oldFunc->getParent(),
25042508
Intrinsic::masked_load, tys);
25052509
#if LLVM_VERSION_MAJOR >= 10
@@ -2516,7 +2520,7 @@ class DiffeGradientUtils : public GradientUtils {
25162520
Constant::getNullValue(dif->getType())};
25172521
return BuilderM.CreateCall(F, args);
25182522
};
2519-
old = applyChainRule(dif->getType(), BuilderM, rule, ip);
2523+
old = applyChainRule(diffType, BuilderM, rule, ip);
25202524
}
25212525

25222526
auto rule = [&](Value *dif, Value *old) {
@@ -2552,7 +2556,7 @@ class DiffeGradientUtils : public GradientUtils {
25522556
};
25532557
applyChainRule(BuilderM, rule, ptr, res);
25542558
} else {
2555-
Type *tys[] = {dif->getType(), origptr->getType()};
2559+
Type *tys[] = {diffType, origptr->getType()};
25562560
auto F = Intrinsic::getDeclaration(oldFunc->getParent(),
25572561
Intrinsic::masked_store, tys);
25582562
assert(align);
Lines changed: 117 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,117 @@
1+
; RUN: if [ %llvmver -ge 9 ]; then %opt < %s %loadEnzyme -enzyme -enzyme-preopt=false -mem2reg -simplifycfg -adce -instsimplify -S | FileCheck %s; fi
2+
3+
source_filename = "text"
4+
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128-ni:10:11:12:13"
5+
target triple = "x86_64-pc-linux-gnu"
6+
7+
define void @tester(i64 addrspace(12)* %i2, i64 addrspace(13)* %i7) {
8+
entry:
9+
%i3 = load i64, i64 addrspace(12)* %i2, align 8, !dbg !5, !tbaa !15
10+
store i64 %i3, i64 addrspace(13)* %i7, align 8, !dbg !35, !tbaa !40
11+
ret void
12+
}
13+
14+
declare void @__enzyme_reverse(...)
15+
16+
define void @test_derivative(i64 addrspace(12)* %x, i64 addrspace(12)* %dx1, i64 addrspace(12)* %dx2, {} addrspace(13)* %y, {} addrspace(13)* %dy1, {} addrspace(13)* %dy2, i8* %tape) {
17+
entry:
18+
call void (...) @__enzyme_reverse(void (i64 addrspace(12)*, i64 addrspace(13)*)* nonnull @tester, metadata !"enzyme_width", i64 2, metadata !"enzyme_dup", i64 addrspace(12)* %x, i64 addrspace(12)* %dx1, i64 addrspace(12)* %dx2, metadata !"enzyme_dup", {} addrspace(13)* %y, {} addrspace(13)* %dy1, {} addrspace(13)* %dy2, i8* %tape)
19+
ret void
20+
}
21+
22+
!llvm.module.flags = !{!0, !1}
23+
!llvm.dbg.cu = !{!2}
24+
25+
!0 = !{i32 2, !"Dwarf Version", i32 4}
26+
!1 = !{i32 2, !"Debug Info Version", i32 3}
27+
!2 = distinct !DICompileUnit(language: DW_LANG_Julia, file: !3, producer: "julia", isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, enums: !4, nameTableKind: None)
28+
!3 = !DIFile(filename: "/mnt/Data/git/Enzyme.jl/revjac.jl", directory: ".")
29+
!4 = !{}
30+
!5 = !DILocation(line: 33, scope: !6, inlinedAt: !9)
31+
!6 = distinct !DISubprogram(name: "getproperty;", linkageName: "getproperty", scope: !7, file: !7, type: !8, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !4)
32+
!7 = !DIFile(filename: "Base.jl", directory: ".")
33+
!8 = !DISubroutineType(types: !4)
34+
!9 = distinct !DILocation(line: 56, scope: !10, inlinedAt: !12)
35+
!10 = distinct !DISubprogram(name: "getindex;", linkageName: "getindex", scope: !11, file: !11, type: !8, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !4)
36+
!11 = !DIFile(filename: "refvalue.jl", directory: ".")
37+
!12 = distinct !DILocation(line: 6, scope: !13, inlinedAt: !14)
38+
!13 = distinct !DISubprogram(name: "batchbwd", linkageName: "julia_batchbwd_1599", scope: null, file: !3, line: 5, type: !8, scopeLine: 5, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !4)
39+
!14 = distinct !DILocation(line: 0, scope: !13)
40+
!15 = !{!16, !16, i64 0}
41+
!16 = !{!"double", !17, i64 0}
42+
!17 = !{!"jtbaa_value", !18, i64 0}
43+
!18 = !{!"jtbaa_data", !19, i64 0}
44+
!19 = !{!"jtbaa", !20, i64 0}
45+
!20 = !{!"jtbaa"}
46+
!21 = !DILocation(line: 448, scope: !22, inlinedAt: !24)
47+
!22 = distinct !DISubprogram(name: "Array;", linkageName: "Array", scope: !23, file: !23, type: !8, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !4)
48+
!23 = !DIFile(filename: "boot.jl", directory: ".")
49+
!24 = distinct !DILocation(line: 457, scope: !22, inlinedAt: !25)
50+
!25 = distinct !DILocation(line: 785, scope: !26, inlinedAt: !28)
51+
!26 = distinct !DISubprogram(name: "similar;", linkageName: "similar", scope: !27, file: !27, type: !8, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !4)
52+
!27 = !DIFile(filename: "abstractarray.jl", directory: ".")
53+
!28 = distinct !DILocation(line: 784, scope: !26, inlinedAt: !29)
54+
!29 = distinct !DILocation(line: 672, scope: !30, inlinedAt: !32)
55+
!30 = distinct !DISubprogram(name: "_array_for;", linkageName: "_array_for", scope: !31, file: !31, type: !8, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !4)
56+
!31 = !DIFile(filename: "array.jl", directory: ".")
57+
!32 = distinct !DILocation(line: 670, scope: !30, inlinedAt: !33)
58+
!33 = distinct !DILocation(line: 108, scope: !34, inlinedAt: !12)
59+
!34 = distinct !DISubprogram(name: "vect;", linkageName: "vect", scope: !31, file: !31, type: !8, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !4)
60+
!35 = !DILocation(line: 843, scope: !36, inlinedAt: !33)
61+
!36 = distinct !DISubprogram(name: "setindex!;", linkageName: "setindex!", scope: !31, file: !31, type: !8, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !4)
62+
!37 = !{!38, !38, i64 0}
63+
!38 = !{!"jtbaa_arrayptr", !39, i64 0}
64+
!39 = !{!"jtbaa_array", !19, i64 0}
65+
!40 = !{!41, !41, i64 0}
66+
!41 = !{!"jtbaa_arraybuf", !18, i64 0}
67+
68+
; CHECK: define internal void @diffe2tester(i64 addrspace(12)* %i2, [2 x i64 addrspace(12)*] %"i2'", i64 addrspace(13)* %i7, [2 x i64 addrspace(13)*] %"i7'", i8* %tapeArg)
69+
; CHECK-NEXT: entry:
70+
; CHECK-NEXT: tail call void @free(i8* nonnull %tapeArg)
71+
; CHECK-NEXT: %"i3'de" = alloca [2 x i64]
72+
; CHECK-NEXT: store [2 x i64] zeroinitializer, [2 x i64]* %"i3'de"
73+
; CHECK-NEXT: %0 = extractvalue [2 x i64 addrspace(13)*] %"i7'", 0
74+
; CHECK-NEXT: %1 = load i64, i64 addrspace(13)* %0
75+
; CHECK-NEXT: %2 = extractvalue [2 x i64 addrspace(13)*] %"i7'", 1
76+
; CHECK-NEXT: %3 = load i64, i64 addrspace(13)* %2
77+
; CHECK-NEXT: %4 = extractvalue [2 x i64 addrspace(13)*] %"i7'", 0
78+
; CHECK-NEXT: store i64 0, i64 addrspace(13)* %4
79+
; CHECK-NEXT: %5 = extractvalue [2 x i64 addrspace(13)*] %"i7'", 1
80+
; CHECK-NEXT: store i64 0, i64 addrspace(13)* %5
81+
; CHECK-NEXT: %6 = getelementptr inbounds [2 x i64], [2 x i64]* %"i3'de", i32 0, i32 0
82+
; CHECK-NEXT: %7 = load i64, i64* %6
83+
; CHECK-NEXT: %8 = bitcast i64 %7 to double
84+
; CHECK-NEXT: %9 = bitcast i64 %1 to double
85+
; CHECK-NEXT: %10 = fadd fast double %8, %9
86+
; CHECK-NEXT: %11 = bitcast double %10 to i64
87+
; CHECK-NEXT: store i64 %11, i64* %6
88+
; CHECK-NEXT: %12 = getelementptr inbounds [2 x i64], [2 x i64]* %"i3'de", i32 0, i32 1
89+
; CHECK-NEXT: %13 = load i64, i64* %12
90+
; CHECK-NEXT: %14 = bitcast i64 %13 to double
91+
; CHECK-NEXT: %15 = bitcast i64 %3 to double
92+
; CHECK-NEXT: %16 = fadd fast double %14, %15
93+
; CHECK-NEXT: %17 = bitcast double %16 to i64
94+
; CHECK-NEXT: store i64 %17, i64* %12
95+
; CHECK-NEXT: %18 = load [2 x i64], [2 x i64]* %"i3'de"
96+
; CHECK-NEXT: store [2 x i64] zeroinitializer, [2 x i64]* %"i3'de"
97+
; CHECK-NEXT: %19 = extractvalue [2 x i64 addrspace(12)*] %"i2'", 0
98+
; CHECK-NEXT: %20 = load i64, i64 addrspace(12)* %19
99+
; CHECK-NEXT: %21 = extractvalue [2 x i64 addrspace(12)*] %"i2'", 1
100+
; CHECK-NEXT: %22 = load i64, i64 addrspace(12)* %21
101+
; CHECK-NEXT: %23 = extractvalue [2 x i64] %18, 0
102+
; CHECK-DAG: %[[i24:.+]] = bitcast i64 %23 to double
103+
; CHECK-DAG: %[[i25:.+]] = bitcast i64 %20 to double
104+
; CHECK-NEXT: %26 = fadd fast double %[[i25]], %[[i24]]
105+
; CHECK-NEXT: %27 = bitcast double %26 to i64
106+
; CHECK-NEXT: %28 = extractvalue [2 x i64] %18, 1
107+
; CHECK-DAG: %[[i29:.+]] = bitcast i64 %28 to double
108+
; CHECK-DAG: %[[i30:.+]] = bitcast i64 %22 to double
109+
; CHECK-NEXT: %31 = fadd fast double %[[i30]], %[[i29]]
110+
; CHECK-NEXT: %32 = bitcast double %31 to i64
111+
; CHECK-NEXT: %33 = extractvalue [2 x i64 addrspace(12)*] %"i2'", 0
112+
; CHECK-NEXT: store i64 %27, i64 addrspace(12)* %33
113+
; CHECK-NEXT: %34 = extractvalue [2 x i64 addrspace(12)*] %"i2'", 1
114+
; CHECK-NEXT: store i64 %32, i64 addrspace(12)* %34
115+
; CHECK-NEXT: ret void
116+
; CHECK-NEXT: }
117+
Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
; RUN: if [ %llvmver -ge 9 ]; then %opt < %s %loadEnzyme -enzyme -enzyme-preopt=false -mem2reg -simplifycfg -adce -instsimplify -S | FileCheck %s; fi
2+
3+
source_filename = "text"
4+
target datalayout = "e-i64:64-i128:128-v16:16-v32:32-n16:32:64"
5+
target triple = "nvptx64-nvidia-cuda"
6+
7+
define void @tester(i64 addrspace(12)* %i2, i64 addrspace(13)* %i7) {
8+
entry:
9+
%i3 = load i64, i64 addrspace(12)* %i2, align 8, !dbg !5, !tbaa !15
10+
store i64 %i3, i64 addrspace(13)* %i7, align 8, !dbg !35, !tbaa !40
11+
ret void
12+
}
13+
14+
declare void @__enzyme_reverse(...)
15+
16+
define void @test_derivative(i64 addrspace(12)* %x, i64 addrspace(12)* %dx1, i64 addrspace(12)* %dx2, {} addrspace(13)* %y, {} addrspace(13)* %dy1, {} addrspace(13)* %dy2, i8* %tape) {
17+
entry:
18+
call void (...) @__enzyme_reverse(void (i64 addrspace(12)*, i64 addrspace(13)*)* nonnull @tester, metadata !"enzyme_width", i64 2, metadata !"enzyme_dup", i64 addrspace(12)* %x, i64 addrspace(12)* %dx1, i64 addrspace(12)* %dx2, metadata !"enzyme_dup", {} addrspace(13)* %y, {} addrspace(13)* %dy1, {} addrspace(13)* %dy2, i8* %tape)
19+
ret void
20+
}
21+
22+
!llvm.module.flags = !{!0, !1}
23+
!llvm.dbg.cu = !{!2}
24+
25+
!0 = !{i32 2, !"Dwarf Version", i32 4}
26+
!1 = !{i32 2, !"Debug Info Version", i32 3}
27+
!2 = distinct !DICompileUnit(language: DW_LANG_Julia, file: !3, producer: "julia", isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, enums: !4, nameTableKind: None)
28+
!3 = !DIFile(filename: "/mnt/Data/git/Enzyme.jl/revjac.jl", directory: ".")
29+
!4 = !{}
30+
!5 = !DILocation(line: 33, scope: !6, inlinedAt: !9)
31+
!6 = distinct !DISubprogram(name: "getproperty;", linkageName: "getproperty", scope: !7, file: !7, type: !8, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !4)
32+
!7 = !DIFile(filename: "Base.jl", directory: ".")
33+
!8 = !DISubroutineType(types: !4)
34+
!9 = distinct !DILocation(line: 56, scope: !10, inlinedAt: !12)
35+
!10 = distinct !DISubprogram(name: "getindex;", linkageName: "getindex", scope: !11, file: !11, type: !8, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !4)
36+
!11 = !DIFile(filename: "refvalue.jl", directory: ".")
37+
!12 = distinct !DILocation(line: 6, scope: !13, inlinedAt: !14)
38+
!13 = distinct !DISubprogram(name: "batchbwd", linkageName: "julia_batchbwd_1599", scope: null, file: !3, line: 5, type: !8, scopeLine: 5, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !4)
39+
!14 = distinct !DILocation(line: 0, scope: !13)
40+
!15 = !{!16, !16, i64 0}
41+
!16 = !{!"double", !17, i64 0}
42+
!17 = !{!"jtbaa_value", !18, i64 0}
43+
!18 = !{!"jtbaa_data", !19, i64 0}
44+
!19 = !{!"jtbaa", !20, i64 0}
45+
!20 = !{!"jtbaa"}
46+
!21 = !DILocation(line: 448, scope: !22, inlinedAt: !24)
47+
!22 = distinct !DISubprogram(name: "Array;", linkageName: "Array", scope: !23, file: !23, type: !8, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !4)
48+
!23 = !DIFile(filename: "boot.jl", directory: ".")
49+
!24 = distinct !DILocation(line: 457, scope: !22, inlinedAt: !25)
50+
!25 = distinct !DILocation(line: 785, scope: !26, inlinedAt: !28)
51+
!26 = distinct !DISubprogram(name: "similar;", linkageName: "similar", scope: !27, file: !27, type: !8, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !4)
52+
!27 = !DIFile(filename: "abstractarray.jl", directory: ".")
53+
!28 = distinct !DILocation(line: 784, scope: !26, inlinedAt: !29)
54+
!29 = distinct !DILocation(line: 672, scope: !30, inlinedAt: !32)
55+
!30 = distinct !DISubprogram(name: "_array_for;", linkageName: "_array_for", scope: !31, file: !31, type: !8, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !4)
56+
!31 = !DIFile(filename: "array.jl", directory: ".")
57+
!32 = distinct !DILocation(line: 670, scope: !30, inlinedAt: !33)
58+
!33 = distinct !DILocation(line: 108, scope: !34, inlinedAt: !12)
59+
!34 = distinct !DISubprogram(name: "vect;", linkageName: "vect", scope: !31, file: !31, type: !8, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !4)
60+
!35 = !DILocation(line: 843, scope: !36, inlinedAt: !33)
61+
!36 = distinct !DISubprogram(name: "setindex!;", linkageName: "setindex!", scope: !31, file: !31, type: !8, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !4)
62+
!37 = !{!38, !38, i64 0}
63+
!38 = !{!"jtbaa_arrayptr", !39, i64 0}
64+
!39 = !{!"jtbaa_array", !19, i64 0}
65+
!40 = !{!41, !41, i64 0}
66+
!41 = !{!"jtbaa_arraybuf", !18, i64 0}
67+
68+
; CHECK: define internal void @diffe2tester(i64 addrspace(12)* %i2, [2 x i64 addrspace(12)*] %"i2'", i64 addrspace(13)* %i7, [2 x i64 addrspace(13)*] %"i7'", i8* %tapeArg)
69+
; CHECK-NEXT: entry:
70+
; CHECK-NEXT: tail call void @free(i8* nonnull %tapeArg)
71+
; CHECK-NEXT: %"i3'de" = alloca [2 x i64]
72+
; CHECK-NEXT: store [2 x i64] zeroinitializer, [2 x i64]* %"i3'de"
73+
; CHECK-NEXT: %0 = extractvalue [2 x i64 addrspace(13)*] %"i7'", 0
74+
; CHECK-NEXT: %1 = load i64, i64 addrspace(13)* %0
75+
; CHECK-NEXT: %2 = extractvalue [2 x i64 addrspace(13)*] %"i7'", 1
76+
; CHECK-NEXT: %3 = load i64, i64 addrspace(13)* %2
77+
; CHECK-NEXT: %4 = extractvalue [2 x i64 addrspace(13)*] %"i7'", 0
78+
; CHECK-NEXT: store i64 0, i64 addrspace(13)* %4
79+
; CHECK-NEXT: %5 = extractvalue [2 x i64 addrspace(13)*] %"i7'", 1
80+
; CHECK-NEXT: store i64 0, i64 addrspace(13)* %5
81+
; CHECK-NEXT: %6 = getelementptr inbounds [2 x i64], [2 x i64]* %"i3'de", i32 0, i32 0
82+
; CHECK-NEXT: %7 = load i64, i64* %6
83+
; CHECK-NEXT: %8 = bitcast i64 %7 to double
84+
; CHECK-NEXT: %9 = bitcast i64 %1 to double
85+
; CHECK-NEXT: %10 = fadd fast double %8, %9
86+
; CHECK-NEXT: %11 = bitcast double %10 to i64
87+
; CHECK-NEXT: store i64 %11, i64* %6
88+
; CHECK-NEXT: %12 = getelementptr inbounds [2 x i64], [2 x i64]* %"i3'de", i32 0, i32 1
89+
; CHECK-NEXT: %13 = load i64, i64* %12
90+
; CHECK-NEXT: %14 = bitcast i64 %13 to double
91+
; CHECK-NEXT: %15 = bitcast i64 %3 to double
92+
; CHECK-NEXT: %16 = fadd fast double %14, %15
93+
; CHECK-NEXT: %17 = bitcast double %16 to i64
94+
; CHECK-NEXT: store i64 %17, i64* %12
95+
; CHECK-NEXT: %18 = load [2 x i64], [2 x i64]* %"i3'de"
96+
; CHECK-NEXT: store [2 x i64] zeroinitializer, [2 x i64]* %"i3'de"
97+
; CHECK-NEXT: %19 = extractvalue [2 x i64 addrspace(12)*] %"i2'", 0
98+
; CHECK-NEXT: %20 = bitcast i64 addrspace(12)* %19 to double addrspace(12)*
99+
; CHECK-NEXT: %21 = extractvalue [2 x i64 addrspace(12)*] %"i2'", 1
100+
; CHECK-NEXT: %22 = bitcast i64 addrspace(12)* %21 to double addrspace(12)*
101+
; CHECK-NEXT: %23 = extractvalue [2 x i64] %18, 0
102+
; CHECK-NEXT: %24 = bitcast i64 %23 to double
103+
; CHECK-NEXT: %25 = extractvalue [2 x i64] %18, 1
104+
; CHECK-NEXT: %26 = bitcast i64 %25 to double
105+
; CHECK-NEXT: %27 = atomicrmw fadd double addrspace(12)* %20, double %24 monotonic
106+
; CHECK-NEXT: %28 = atomicrmw fadd double addrspace(12)* %22, double %26 monotonic
107+
; CHECK-NEXT: ret void
108+
; CHECK-NEXT: }

0 commit comments

Comments
 (0)