Skip to content

Commit 34fa965

Browse files
Move to DAGCombiner
1 parent 04366fa commit 34fa965

File tree

11 files changed

+1070
-145
lines changed

11 files changed

+1070
-145
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,7 @@
7171
#include <functional>
7272
#include <iterator>
7373
#include <optional>
74+
#include <queue>
7475
#include <string>
7576
#include <tuple>
7677
#include <utility>
@@ -22451,12 +22452,81 @@ SDValue DAGCombiner::visitATOMIC_STORE(SDNode *N) {
2245122452
return SDValue();
2245222453
}
2245322454

22455+
static SDValue foldToMaskedStore(StoreSDNode *Store, SelectionDAG &DAG,
22456+
const SDLoc &Dl) {
22457+
using namespace llvm::SDPatternMatch;
22458+
22459+
if (!Store->isSimple() || Store->isTruncatingStore())
22460+
return SDValue();
22461+
22462+
SDValue StoredVal = Store->getValue();
22463+
SDValue StorePtr = Store->getBasePtr();
22464+
SDValue StoreOffset = Store->getOffset();
22465+
EVT VT = Store->getMemoryVT();
22466+
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
22467+
22468+
if (!TLI.isTypeLegal(VT) || !TLI.isOperationLegalOrCustom(ISD::MSTORE, VT))
22469+
return SDValue();
22470+
22471+
SDValue Mask, TrueVec, LoadCh;
22472+
if (!sd_match(StoredVal,
22473+
m_VSelect(m_Value(Mask), m_Value(TrueVec),
22474+
m_Load(m_Value(LoadCh), m_Specific(StorePtr),
22475+
m_Specific(StoreOffset)))))
22476+
return SDValue();
22477+
22478+
LoadSDNode *Load = cast<LoadSDNode>(StoredVal.getOperand(2));
22479+
if (!Load->isSimple())
22480+
return SDValue();
22481+
22482+
auto IsSafeToFold = [](StoreSDNode *Store, LoadSDNode *Load) {
22483+
std::queue<SDValue> Worklist;
22484+
22485+
Worklist.push(Store->getChain());
22486+
22487+
while (!Worklist.empty()) {
22488+
SDValue Chain = Worklist.front();
22489+
Worklist.pop();
22490+
22491+
SDNode *Node = Chain.getNode();
22492+
if (!Node)
22493+
return false;
22494+
22495+
if (Node == Load)
22496+
return true;
22497+
22498+
if (const auto *MemNode = dyn_cast<MemSDNode>(Node))
22499+
if (!MemNode->isSimple() || MemNode->writeMem())
22500+
return false;
22501+
22502+
if (Node->getOpcode() == ISD::TokenFactor) {
22503+
for (unsigned i = 0; i < Node->getNumOperands(); ++i)
22504+
Worklist.push(Node->getOperand(i));
22505+
} else {
22506+
Worklist.push(Node->getOperand(0));
22507+
}
22508+
}
22509+
22510+
return false;
22511+
};
22512+
22513+
if (!IsSafeToFold(Store, Load))
22514+
return SDValue();
22515+
22516+
return DAG.getMaskedStore(Store->getChain(), Dl, TrueVec, StorePtr,
22517+
StoreOffset, Mask, VT, Store->getMemOperand(),
22518+
Store->getAddressingMode());
22519+
}
22520+
2245422521
SDValue DAGCombiner::visitSTORE(SDNode *N) {
2245522522
StoreSDNode *ST = cast<StoreSDNode>(N);
2245622523
SDValue Chain = ST->getChain();
2245722524
SDValue Value = ST->getValue();
2245822525
SDValue Ptr = ST->getBasePtr();
2245922526

22527+
if (SDValue MaskedStore = foldToMaskedStore(ST, DAG, SDLoc(N)))
22528+
return MaskedStore;
22529+
2246022530
// If this is a store of a bit convert, store the input value if the
2246122531
// resultant store does not need a higher alignment than the original.
2246222532
if (Value.getOpcode() == ISD::BITCAST && !ST->isTruncatingStore() &&

llvm/lib/CodeGen/TargetLoweringBase.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -691,6 +691,12 @@ void TargetLoweringBase::initActions() {
691691
setAtomicLoadExtAction({ISD::SEXTLOAD, ISD::ZEXTLOAD}, ValVT, MemVT,
692692
Expand);
693693

694+
for (MVT VT : MVT::all_valuetypes()) {
695+
if (VT == MVT::Other)
696+
continue;
697+
setOperationAction(ISD::MSTORE, VT, Expand);
698+
}
699+
694700
// We're somewhat special casing MVT::i2 and MVT::i4. Ideally we want to
695701
// remove this and targets should individually set these types if not legal.
696702
for (ISD::NodeType NT : enum_seq(ISD::DELETED_NODE, ISD::BUILTIN_OP_END,
Lines changed: 282 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,282 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc < %s -mtriple=aarch64-- -mattr=+neon | FileCheck %s -check-prefix=AARCH64
3+
; RUN: llc < %s -mtriple=aarch64-- -mattr=+sve | FileCheck %s -check-prefix=SVE
4+
5+
define void @test_masked_store_success(<8 x i32> %x, ptr %ptr, <8 x i1> %cmp) {
6+
; AARCH64-LABEL: test_masked_store_success:
7+
; AARCH64: // %bb.0:
8+
; AARCH64-NEXT: zip1 v3.8b, v2.8b, v0.8b
9+
; AARCH64-NEXT: zip2 v2.8b, v2.8b, v0.8b
10+
; AARCH64-NEXT: ldp q4, q5, [x0]
11+
; AARCH64-NEXT: ushll v3.4s, v3.4h, #0
12+
; AARCH64-NEXT: ushll v2.4s, v2.4h, #0
13+
; AARCH64-NEXT: shl v3.4s, v3.4s, #31
14+
; AARCH64-NEXT: shl v2.4s, v2.4s, #31
15+
; AARCH64-NEXT: cmlt v3.4s, v3.4s, #0
16+
; AARCH64-NEXT: cmlt v2.4s, v2.4s, #0
17+
; AARCH64-NEXT: bif v0.16b, v4.16b, v3.16b
18+
; AARCH64-NEXT: bif v1.16b, v5.16b, v2.16b
19+
; AARCH64-NEXT: stp q0, q1, [x0]
20+
; AARCH64-NEXT: ret
21+
;
22+
; SVE-LABEL: test_masked_store_success:
23+
; SVE: // %bb.0:
24+
; SVE-NEXT: // kill: def $q0 killed $q0 def $z0
25+
; SVE-NEXT: zip2 v3.8b, v2.8b, v0.8b
26+
; SVE-NEXT: zip1 v2.8b, v2.8b, v0.8b
27+
; SVE-NEXT: mov x8, #4 // =0x4
28+
; SVE-NEXT: ptrue p0.s, vl4
29+
; SVE-NEXT: // kill: def $q1 killed $q1 def $z1
30+
; SVE-NEXT: ushll v3.4s, v3.4h, #0
31+
; SVE-NEXT: ushll v2.4s, v2.4h, #0
32+
; SVE-NEXT: shl v3.4s, v3.4s, #31
33+
; SVE-NEXT: shl v2.4s, v2.4s, #31
34+
; SVE-NEXT: cmlt v3.4s, v3.4s, #0
35+
; SVE-NEXT: cmlt v2.4s, v2.4s, #0
36+
; SVE-NEXT: cmpne p1.s, p0/z, z3.s, #0
37+
; SVE-NEXT: cmpne p0.s, p0/z, z2.s, #0
38+
; SVE-NEXT: st1w { z1.s }, p1, [x0, x8, lsl #2]
39+
; SVE-NEXT: st1w { z0.s }, p0, [x0]
40+
; SVE-NEXT: ret
41+
%load = load <8 x i32>, ptr %ptr, align 32
42+
%sel = select <8 x i1> %cmp, <8 x i32> %x, <8 x i32> %load
43+
store <8 x i32> %sel, ptr %ptr, align 32
44+
ret void
45+
}
46+
47+
define void @test_masked_store_volatile_load(<8 x i32> %x, ptr %ptr, <8 x i1> %cmp) {
48+
; AARCH64-LABEL: test_masked_store_volatile_load:
49+
; AARCH64: // %bb.0:
50+
; AARCH64-NEXT: zip1 v3.8b, v2.8b, v0.8b
51+
; AARCH64-NEXT: zip2 v2.8b, v2.8b, v0.8b
52+
; AARCH64-NEXT: ldr q4, [x0]
53+
; AARCH64-NEXT: ldr q5, [x0, #16]
54+
; AARCH64-NEXT: ushll v3.4s, v3.4h, #0
55+
; AARCH64-NEXT: ushll v2.4s, v2.4h, #0
56+
; AARCH64-NEXT: shl v3.4s, v3.4s, #31
57+
; AARCH64-NEXT: shl v2.4s, v2.4s, #31
58+
; AARCH64-NEXT: cmlt v3.4s, v3.4s, #0
59+
; AARCH64-NEXT: cmlt v2.4s, v2.4s, #0
60+
; AARCH64-NEXT: bif v0.16b, v4.16b, v3.16b
61+
; AARCH64-NEXT: bif v1.16b, v5.16b, v2.16b
62+
; AARCH64-NEXT: stp q0, q1, [x0]
63+
; AARCH64-NEXT: ret
64+
;
65+
; SVE-LABEL: test_masked_store_volatile_load:
66+
; SVE: // %bb.0:
67+
; SVE-NEXT: zip1 v3.8b, v2.8b, v0.8b
68+
; SVE-NEXT: zip2 v2.8b, v2.8b, v0.8b
69+
; SVE-NEXT: ldr q4, [x0]
70+
; SVE-NEXT: ldr q5, [x0, #16]
71+
; SVE-NEXT: ushll v3.4s, v3.4h, #0
72+
; SVE-NEXT: ushll v2.4s, v2.4h, #0
73+
; SVE-NEXT: shl v3.4s, v3.4s, #31
74+
; SVE-NEXT: shl v2.4s, v2.4s, #31
75+
; SVE-NEXT: cmlt v3.4s, v3.4s, #0
76+
; SVE-NEXT: cmlt v2.4s, v2.4s, #0
77+
; SVE-NEXT: bif v0.16b, v4.16b, v3.16b
78+
; SVE-NEXT: bif v1.16b, v5.16b, v2.16b
79+
; SVE-NEXT: stp q0, q1, [x0]
80+
; SVE-NEXT: ret
81+
%load = load volatile <8 x i32>, ptr %ptr, align 32
82+
%sel = select <8 x i1> %cmp, <8 x i32> %x, <8 x i32> %load
83+
store <8 x i32> %sel, ptr %ptr, align 32
84+
ret void
85+
}
86+
87+
define void @test_masked_store_volatile_store(<8 x i32> %x, ptr %ptr, <8 x i1> %cmp) {
88+
; AARCH64-LABEL: test_masked_store_volatile_store:
89+
; AARCH64: // %bb.0:
90+
; AARCH64-NEXT: zip1 v3.8b, v2.8b, v0.8b
91+
; AARCH64-NEXT: zip2 v2.8b, v2.8b, v0.8b
92+
; AARCH64-NEXT: ldp q4, q5, [x0]
93+
; AARCH64-NEXT: ushll v3.4s, v3.4h, #0
94+
; AARCH64-NEXT: ushll v2.4s, v2.4h, #0
95+
; AARCH64-NEXT: shl v3.4s, v3.4s, #31
96+
; AARCH64-NEXT: shl v2.4s, v2.4s, #31
97+
; AARCH64-NEXT: cmlt v3.4s, v3.4s, #0
98+
; AARCH64-NEXT: cmlt v2.4s, v2.4s, #0
99+
; AARCH64-NEXT: bif v0.16b, v4.16b, v3.16b
100+
; AARCH64-NEXT: bif v1.16b, v5.16b, v2.16b
101+
; AARCH64-NEXT: str q0, [x0]
102+
; AARCH64-NEXT: str q1, [x0, #16]
103+
; AARCH64-NEXT: ret
104+
;
105+
; SVE-LABEL: test_masked_store_volatile_store:
106+
; SVE: // %bb.0:
107+
; SVE-NEXT: zip1 v3.8b, v2.8b, v0.8b
108+
; SVE-NEXT: zip2 v2.8b, v2.8b, v0.8b
109+
; SVE-NEXT: ldp q4, q5, [x0]
110+
; SVE-NEXT: ushll v3.4s, v3.4h, #0
111+
; SVE-NEXT: ushll v2.4s, v2.4h, #0
112+
; SVE-NEXT: shl v3.4s, v3.4s, #31
113+
; SVE-NEXT: shl v2.4s, v2.4s, #31
114+
; SVE-NEXT: cmlt v3.4s, v3.4s, #0
115+
; SVE-NEXT: cmlt v2.4s, v2.4s, #0
116+
; SVE-NEXT: bif v0.16b, v4.16b, v3.16b
117+
; SVE-NEXT: bif v1.16b, v5.16b, v2.16b
118+
; SVE-NEXT: str q0, [x0]
119+
; SVE-NEXT: str q1, [x0, #16]
120+
; SVE-NEXT: ret
121+
%load = load <8 x i32>, ptr %ptr, align 32
122+
%sel = select <8 x i1> %cmp, <8 x i32> %x, <8 x i32> %load
123+
store volatile <8 x i32> %sel, ptr %ptr, align 32
124+
ret void
125+
}
126+
127+
declare void @use_vec(<8 x i32>)
128+
129+
define void @test_masked_store_intervening(<8 x i32> %x, ptr %ptr, <8 x i1> %cmp) {
130+
; AARCH64-LABEL: test_masked_store_intervening:
131+
; AARCH64: // %bb.0:
132+
; AARCH64-NEXT: sub sp, sp, #96
133+
; AARCH64-NEXT: str d8, [sp, #64] // 8-byte Folded Spill
134+
; AARCH64-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill
135+
; AARCH64-NEXT: .cfi_def_cfa_offset 96
136+
; AARCH64-NEXT: .cfi_offset w19, -8
137+
; AARCH64-NEXT: .cfi_offset w30, -16
138+
; AARCH64-NEXT: .cfi_offset b8, -32
139+
; AARCH64-NEXT: stp q1, q0, [sp, #32] // 32-byte Folded Spill
140+
; AARCH64-NEXT: ldp q1, q3, [x0]
141+
; AARCH64-NEXT: movi v0.2d, #0000000000000000
142+
; AARCH64-NEXT: fmov d8, d2
143+
; AARCH64-NEXT: mov x19, x0
144+
; AARCH64-NEXT: stp q1, q3, [sp] // 32-byte Folded Spill
145+
; AARCH64-NEXT: movi v1.2d, #0000000000000000
146+
; AARCH64-NEXT: stp q0, q0, [x0]
147+
; AARCH64-NEXT: bl use_vec
148+
; AARCH64-NEXT: zip2 v0.8b, v8.8b, v0.8b
149+
; AARCH64-NEXT: ldp q3, q2, [sp, #16] // 32-byte Folded Reload
150+
; AARCH64-NEXT: zip1 v1.8b, v8.8b, v0.8b
151+
; AARCH64-NEXT: ushll v0.4s, v0.4h, #0
152+
; AARCH64-NEXT: ldr d8, [sp, #64] // 8-byte Folded Reload
153+
; AARCH64-NEXT: shl v0.4s, v0.4s, #31
154+
; AARCH64-NEXT: ushll v1.4s, v1.4h, #0
155+
; AARCH64-NEXT: cmlt v0.4s, v0.4s, #0
156+
; AARCH64-NEXT: shl v1.4s, v1.4s, #31
157+
; AARCH64-NEXT: bsl v0.16b, v2.16b, v3.16b
158+
; AARCH64-NEXT: ldr q2, [sp, #48] // 16-byte Folded Reload
159+
; AARCH64-NEXT: ldr q3, [sp] // 16-byte Folded Reload
160+
; AARCH64-NEXT: cmlt v1.4s, v1.4s, #0
161+
; AARCH64-NEXT: bsl v1.16b, v2.16b, v3.16b
162+
; AARCH64-NEXT: stp q1, q0, [x19]
163+
; AARCH64-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload
164+
; AARCH64-NEXT: add sp, sp, #96
165+
; AARCH64-NEXT: ret
166+
;
167+
; SVE-LABEL: test_masked_store_intervening:
168+
; SVE: // %bb.0:
169+
; SVE-NEXT: sub sp, sp, #96
170+
; SVE-NEXT: str d8, [sp, #64] // 8-byte Folded Spill
171+
; SVE-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill
172+
; SVE-NEXT: .cfi_def_cfa_offset 96
173+
; SVE-NEXT: .cfi_offset w19, -8
174+
; SVE-NEXT: .cfi_offset w30, -16
175+
; SVE-NEXT: .cfi_offset b8, -32
176+
; SVE-NEXT: stp q1, q0, [sp, #32] // 32-byte Folded Spill
177+
; SVE-NEXT: ldp q1, q3, [x0]
178+
; SVE-NEXT: movi v0.2d, #0000000000000000
179+
; SVE-NEXT: fmov d8, d2
180+
; SVE-NEXT: mov x19, x0
181+
; SVE-NEXT: stp q1, q3, [sp] // 32-byte Folded Spill
182+
; SVE-NEXT: movi v1.2d, #0000000000000000
183+
; SVE-NEXT: stp q0, q0, [x0]
184+
; SVE-NEXT: bl use_vec
185+
; SVE-NEXT: zip2 v0.8b, v8.8b, v0.8b
186+
; SVE-NEXT: ldp q3, q2, [sp, #16] // 32-byte Folded Reload
187+
; SVE-NEXT: zip1 v1.8b, v8.8b, v0.8b
188+
; SVE-NEXT: ushll v0.4s, v0.4h, #0
189+
; SVE-NEXT: ldr d8, [sp, #64] // 8-byte Folded Reload
190+
; SVE-NEXT: shl v0.4s, v0.4s, #31
191+
; SVE-NEXT: ushll v1.4s, v1.4h, #0
192+
; SVE-NEXT: cmlt v0.4s, v0.4s, #0
193+
; SVE-NEXT: shl v1.4s, v1.4s, #31
194+
; SVE-NEXT: bsl v0.16b, v2.16b, v3.16b
195+
; SVE-NEXT: ldr q2, [sp, #48] // 16-byte Folded Reload
196+
; SVE-NEXT: ldr q3, [sp] // 16-byte Folded Reload
197+
; SVE-NEXT: cmlt v1.4s, v1.4s, #0
198+
; SVE-NEXT: bsl v1.16b, v2.16b, v3.16b
199+
; SVE-NEXT: stp q1, q0, [x19]
200+
; SVE-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload
201+
; SVE-NEXT: add sp, sp, #96
202+
; SVE-NEXT: ret
203+
%load = load <8 x i32>, ptr %ptr, align 32
204+
store <8 x i32> zeroinitializer, ptr %ptr, align 32
205+
%tmp = load <8 x i32>, ptr %ptr
206+
call void @use_vec(<8 x i32> %tmp)
207+
%sel = select <8 x i1> %cmp, <8 x i32> %x, <8 x i32> %load
208+
store <8 x i32> %sel, ptr %ptr, align 32
209+
ret void
210+
}
211+
212+
213+
define void @test_masked_store_multiple(<8 x i32> %x, <8 x i32> %y, ptr %ptr1, ptr %ptr2, <8 x i1> %cmp, <8 x i1> %cmp2) {
214+
; AARCH64-LABEL: test_masked_store_multiple:
215+
; AARCH64: // %bb.0:
216+
; AARCH64-NEXT: zip1 v6.8b, v4.8b, v0.8b
217+
; AARCH64-NEXT: zip2 v4.8b, v4.8b, v0.8b
218+
; AARCH64-NEXT: zip1 v7.8b, v5.8b, v0.8b
219+
; AARCH64-NEXT: zip2 v5.8b, v5.8b, v0.8b
220+
; AARCH64-NEXT: ldp q16, q17, [x0]
221+
; AARCH64-NEXT: ushll v6.4s, v6.4h, #0
222+
; AARCH64-NEXT: ushll v4.4s, v4.4h, #0
223+
; AARCH64-NEXT: ushll v7.4s, v7.4h, #0
224+
; AARCH64-NEXT: ushll v5.4s, v5.4h, #0
225+
; AARCH64-NEXT: shl v6.4s, v6.4s, #31
226+
; AARCH64-NEXT: shl v4.4s, v4.4s, #31
227+
; AARCH64-NEXT: shl v7.4s, v7.4s, #31
228+
; AARCH64-NEXT: shl v5.4s, v5.4s, #31
229+
; AARCH64-NEXT: cmlt v6.4s, v6.4s, #0
230+
; AARCH64-NEXT: cmlt v4.4s, v4.4s, #0
231+
; AARCH64-NEXT: cmlt v7.4s, v7.4s, #0
232+
; AARCH64-NEXT: cmlt v5.4s, v5.4s, #0
233+
; AARCH64-NEXT: bif v0.16b, v16.16b, v6.16b
234+
; AARCH64-NEXT: ldp q6, q16, [x1]
235+
; AARCH64-NEXT: bif v1.16b, v17.16b, v4.16b
236+
; AARCH64-NEXT: bif v2.16b, v6.16b, v7.16b
237+
; AARCH64-NEXT: bif v3.16b, v16.16b, v5.16b
238+
; AARCH64-NEXT: stp q0, q1, [x0]
239+
; AARCH64-NEXT: stp q2, q3, [x1]
240+
; AARCH64-NEXT: ret
241+
;
242+
; SVE-LABEL: test_masked_store_multiple:
243+
; SVE: // %bb.0:
244+
; SVE-NEXT: // kill: def $q0 killed $q0 def $z0
245+
; SVE-NEXT: zip2 v6.8b, v4.8b, v0.8b
246+
; SVE-NEXT: zip1 v4.8b, v4.8b, v0.8b
247+
; SVE-NEXT: mov x8, #4 // =0x4
248+
; SVE-NEXT: zip2 v7.8b, v5.8b, v0.8b
249+
; SVE-NEXT: zip1 v5.8b, v5.8b, v0.8b
250+
; SVE-NEXT: // kill: def $q3 killed $q3 def $z3
251+
; SVE-NEXT: // kill: def $q1 killed $q1 def $z1
252+
; SVE-NEXT: ptrue p0.s, vl4
253+
; SVE-NEXT: ushll v6.4s, v6.4h, #0
254+
; SVE-NEXT: ushll v4.4s, v4.4h, #0
255+
; SVE-NEXT: ushll v7.4s, v7.4h, #0
256+
; SVE-NEXT: ushll v5.4s, v5.4h, #0
257+
; SVE-NEXT: shl v6.4s, v6.4s, #31
258+
; SVE-NEXT: shl v4.4s, v4.4s, #31
259+
; SVE-NEXT: shl v7.4s, v7.4s, #31
260+
; SVE-NEXT: shl v5.4s, v5.4s, #31
261+
; SVE-NEXT: cmlt v6.4s, v6.4s, #0
262+
; SVE-NEXT: cmlt v4.4s, v4.4s, #0
263+
; SVE-NEXT: cmlt v7.4s, v7.4s, #0
264+
; SVE-NEXT: cmlt v5.4s, v5.4s, #0
265+
; SVE-NEXT: cmpne p1.s, p0/z, z6.s, #0
266+
; SVE-NEXT: ldr q6, [x1]
267+
; SVE-NEXT: cmpne p2.s, p0/z, z4.s, #0
268+
; SVE-NEXT: cmpne p0.s, p0/z, z7.s, #0
269+
; SVE-NEXT: bif v2.16b, v6.16b, v5.16b
270+
; SVE-NEXT: st1w { z1.s }, p1, [x0, x8, lsl #2]
271+
; SVE-NEXT: st1w { z0.s }, p2, [x0]
272+
; SVE-NEXT: st1w { z3.s }, p0, [x1, x8, lsl #2]
273+
; SVE-NEXT: str q2, [x1]
274+
; SVE-NEXT: ret
275+
%load = load <8 x i32>, ptr %ptr1, align 32
276+
%load2 = load <8 x i32>, ptr %ptr2, align 32
277+
%sel = select <8 x i1> %cmp, <8 x i32> %x, <8 x i32> %load
278+
%sel2 = select <8 x i1> %cmp2, <8 x i32> %y, <8 x i32> %load2
279+
store <8 x i32> %sel, ptr %ptr1, align 32
280+
store <8 x i32> %sel2, ptr %ptr2, align 32
281+
ret void
282+
}

0 commit comments

Comments
 (0)