Skip to content

Commit 637aa61

Browse files
authored
[ARM] Fix VBICimm and VORRimm generation under Big endian. (#107813)
This is a smaller follow on to #105519 that fixes VBICimm and VORRimm too. The logic behind lowering vector immediates under big endian Neon/MVE is to treat them in natural lane ordering (same as little endian), and VECTOR_REG_CAST them to the correct type (as opposed to creating the constants in big endian form and bitcasting them). This makes sure that is done when creating VORRIMM and VBICIMM.
1 parent 3356208 commit 637aa61

File tree

3 files changed

+12
-16
lines changed

3 files changed

+12
-16
lines changed

llvm/lib/Target/ARM/ARMISelLowering.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -14443,9 +14443,9 @@ static SDValue PerformANDCombine(SDNode *N,
1444314443
DAG, dl, VbicVT, VT, OtherModImm);
1444414444
if (Val.getNode()) {
1444514445
SDValue Input =
14446-
DAG.getNode(ISD::BITCAST, dl, VbicVT, N->getOperand(0));
14446+
DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, VbicVT, N->getOperand(0));
1444714447
SDValue Vbic = DAG.getNode(ARMISD::VBICIMM, dl, VbicVT, Input, Val);
14448-
return DAG.getNode(ISD::BITCAST, dl, VT, Vbic);
14448+
return DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, VT, Vbic);
1444914449
}
1445014450
}
1445114451
}
@@ -14739,9 +14739,9 @@ static SDValue PerformORCombine(SDNode *N,
1473914739
SplatBitSize, DAG, dl, VorrVT, VT, OtherModImm);
1474014740
if (Val.getNode()) {
1474114741
SDValue Input =
14742-
DAG.getNode(ISD::BITCAST, dl, VorrVT, N->getOperand(0));
14742+
DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, VorrVT, N->getOperand(0));
1474314743
SDValue Vorr = DAG.getNode(ARMISD::VORRIMM, dl, VorrVT, Input, Val);
14744-
return DAG.getNode(ISD::BITCAST, dl, VT, Vorr);
14744+
return DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, VT, Vorr);
1474514745
}
1474614746
}
1474714747
}

llvm/test/CodeGen/ARM/big-endian-vmov.ll

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -219,7 +219,6 @@ define arm_aapcs_vfpcc <8 x i16> @vmvn_v16i8_m1() {
219219
ret <8 x i16> <i16 65535, i16 65534, i16 65535, i16 65534, i16 65535, i16 65534, i16 65535, i16 65534>
220220
}
221221

222-
; FIXME: This is incorrect for BE
223222
define arm_aapcs_vfpcc <8 x i16> @and_v8i16_m1(<8 x i16> %a) {
224223
; CHECK-LE-LABEL: and_v8i16_m1:
225224
; CHECK-LE: @ %bb.0:
@@ -228,15 +227,14 @@ define arm_aapcs_vfpcc <8 x i16> @and_v8i16_m1(<8 x i16> %a) {
228227
;
229228
; CHECK-BE-LABEL: and_v8i16_m1:
230229
; CHECK-BE: @ %bb.0:
231-
; CHECK-BE-NEXT: vrev64.32 q8, q0
230+
; CHECK-BE-NEXT: vrev64.16 q8, q0
232231
; CHECK-BE-NEXT: vbic.i32 q8, #0x10000
233-
; CHECK-BE-NEXT: vrev64.32 q0, q8
232+
; CHECK-BE-NEXT: vrev64.16 q0, q8
234233
; CHECK-BE-NEXT: bx lr
235234
%b = and <8 x i16> %a, <i16 65535, i16 65534, i16 65535, i16 65534, i16 65535, i16 65534, i16 65535, i16 65534>
236235
ret <8 x i16> %b
237236
}
238237

239-
; FIXME: This is incorrect for BE
240238
define arm_aapcs_vfpcc <8 x i16> @or_v8i16_1(<8 x i16> %a) {
241239
; CHECK-LE-LABEL: or_v8i16_1:
242240
; CHECK-LE: @ %bb.0:
@@ -245,9 +243,9 @@ define arm_aapcs_vfpcc <8 x i16> @or_v8i16_1(<8 x i16> %a) {
245243
;
246244
; CHECK-BE-LABEL: or_v8i16_1:
247245
; CHECK-BE: @ %bb.0:
248-
; CHECK-BE-NEXT: vrev64.32 q8, q0
246+
; CHECK-BE-NEXT: vrev64.16 q8, q0
249247
; CHECK-BE-NEXT: vorr.i32 q8, #0x10000
250-
; CHECK-BE-NEXT: vrev64.32 q0, q8
248+
; CHECK-BE-NEXT: vrev64.16 q0, q8
251249
; CHECK-BE-NEXT: bx lr
252250
%b = or <8 x i16> %a, <i16 0, i16 1, i16 0, i16 1, i16 0, i16 1, i16 0, i16 1>
253251
ret <8 x i16> %b

llvm/test/CodeGen/Thumb2/mve-vmovimm.ll

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1331,7 +1331,6 @@ entry:
13311331
ret <2 x i64> %s
13321332
}
13331333

1334-
; FIXME: This is incorrect for BE
13351334
define arm_aapcs_vfpcc <8 x i16> @and_v8i16_m1(<8 x i16> %a) {
13361335
; CHECKLE-LABEL: and_v8i16_m1:
13371336
; CHECKLE: @ %bb.0:
@@ -1340,15 +1339,14 @@ define arm_aapcs_vfpcc <8 x i16> @and_v8i16_m1(<8 x i16> %a) {
13401339
;
13411340
; CHECKBE-LABEL: and_v8i16_m1:
13421341
; CHECKBE: @ %bb.0:
1343-
; CHECKBE-NEXT: vrev64.32 q1, q0
1342+
; CHECKBE-NEXT: vrev64.16 q1, q0
13441343
; CHECKBE-NEXT: vbic.i32 q1, #0x10000
1345-
; CHECKBE-NEXT: vrev64.32 q0, q1
1344+
; CHECKBE-NEXT: vrev64.16 q0, q1
13461345
; CHECKBE-NEXT: bx lr
13471346
%b = and <8 x i16> %a, <i16 65535, i16 65534, i16 65535, i16 65534, i16 65535, i16 65534, i16 65535, i16 65534>
13481347
ret <8 x i16> %b
13491348
}
13501349

1351-
; FIXME: This is incorrect for BE
13521350
define arm_aapcs_vfpcc <8 x i16> @or_v8i16_1(<8 x i16> %a) {
13531351
; CHECKLE-LABEL: or_v8i16_1:
13541352
; CHECKLE: @ %bb.0:
@@ -1357,9 +1355,9 @@ define arm_aapcs_vfpcc <8 x i16> @or_v8i16_1(<8 x i16> %a) {
13571355
;
13581356
; CHECKBE-LABEL: or_v8i16_1:
13591357
; CHECKBE: @ %bb.0:
1360-
; CHECKBE-NEXT: vrev64.32 q1, q0
1358+
; CHECKBE-NEXT: vrev64.16 q1, q0
13611359
; CHECKBE-NEXT: vorr.i32 q1, #0x10000
1362-
; CHECKBE-NEXT: vrev64.32 q0, q1
1360+
; CHECKBE-NEXT: vrev64.16 q0, q1
13631361
; CHECKBE-NEXT: bx lr
13641362
%b = or <8 x i16> %a, <i16 0, i16 1, i16 0, i16 1, i16 0, i16 1, i16 0, i16 1>
13651363
ret <8 x i16> %b

0 commit comments

Comments
 (0)