Skip to content
This repository was archived by the owner on Sep 2, 2018. It is now read-only.

Commit becd93f

Browse files
committed
[AArch64] Lower 2-CC FCCMPs (one/ueq) using AND'ed CCs.
The current behavior is incorrect, as the two CCs returned by changeFPCCToAArch64CC, intended to be OR'ed, are instead used in an AND ccmp chain. Consider: define i32 @t(float %a, float %b, float %c, float %d, i32 %e, i32 %f) { %cc1 = fcmp one float %a, %b %cc2 = fcmp olt float %c, %d %and = and i1 %cc1, %cc2 %r = select i1 %and, i32 %e, i32 %f ret i32 %r } Assuming (%a < %b) and (%c < %d); we used to do: fcmp s0, s1 # nzcv <- 1000 orr w8, wzr, #0x1 # w8 <- 1 csel w9, w8, wzr, mi # w9 <- 1 csel w8, w8, w9, gt # w8 <- 1 fcmp s2, s3 # nzcv <- 1000 cset w9, mi # w9 <- 1 tst w8, w9 # (w8 & w9) == 1, so: nzcv <- 0000 csel w0, w0, w1, ne # w0 <- w0 We now do: fcmp s2, s3 # nzcv <- 1000 fccmp s0, s1, #0, mi # mi, so: nzcv <- 1000 fccmp s0, s1, #8, le # !le, so: nzcv <- 1000 csel w0, w0, w1, pl # !pl, so: w0 <- w1 In other words, we transformed: (c < d) && ((a < b) || (a > b)) into: (c < d) && (a u>= b) && (a u<= b) whereas, per De Morgan's, we wanted: (c < d) && !((a u>= b) && (a u<= b)) Note that this problem doesn't occur in the test-suite. changeFPCCToAArch64CC produces disjunct CCs; here, one -> mi/gt. We can't represent that in the fccmp chain; it can't express arbitrary OR sequences, as one comment explains: In general we can create code for arbitrary "... (and (and A B) C)" sequences. We can also implement some "or" expressions, because "(or A B)" is equivalent to "not (and (not A) (not B))" and we can implement some negation operations. [...] However there is no way to negate the result of a partial sequence. Instead, introduce changeFPCCToANDAArch64CC, which produces the conjunct cond codes: - (a one b) == ((a olt b) || (a ogt b)) == ((a ord b) && (a une b)) - (a ueq b) == ((a uno b) || (a oeq b)) == ((a ule b) && (a uge b)) Note that, at first, one might think that, when PushNegate is true, we should use the disjunct CCs, in effect doing: (a || b) = !(!a && !(b)) = !(!a && !(b1 || b2)) <- changeFPCCToAArch64CC(b, b1, b2) = !(!a && !b1 && !b2) However, we can take advantage of the fact that the CC is already negated, which lets us avoid special-casing PushNegate and doing the simpler to reason about: (a || b) = !(!a && (!b)) = !(!a && (b1 && b2)) <- changeFPCCToANDAArch64CC(!b, b1, b2) = !(!a && b1 && b2) This makes both emitConditionalCompare cases behave identically, and produces correct ccmp sequences for the 2-CC fcmps. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@258533 91177308-0d34-0410-b5e6-96231b3b80d8
1 parent 5156091 commit becd93f

File tree

2 files changed

+196
-26
lines changed

2 files changed

+196
-26
lines changed

lib/Target/AArch64/AArch64ISelLowering.cpp

+36-8
Original file line numberDiff line numberDiff line change
@@ -1135,6 +1135,35 @@ static void changeFPCCToAArch64CC(ISD::CondCode CC,
11351135
}
11361136
}
11371137

1138+
/// Convert a DAG fp condition code to an AArch64 CC.
1139+
/// This differs from changeFPCCToAArch64CC in that it returns cond codes that
1140+
/// should be AND'ed instead of OR'ed.
1141+
static void changeFPCCToANDAArch64CC(ISD::CondCode CC,
1142+
AArch64CC::CondCode &CondCode,
1143+
AArch64CC::CondCode &CondCode2) {
1144+
CondCode2 = AArch64CC::AL;
1145+
switch (CC) {
1146+
default:
1147+
changeFPCCToAArch64CC(CC, CondCode, CondCode2);
1148+
assert(CondCode2 == AArch64CC::AL);
1149+
break;
1150+
case ISD::SETONE:
1151+
// (a one b)
1152+
// == ((a olt b) || (a ogt b))
1153+
// == ((a ord b) && (a une b))
1154+
CondCode = AArch64CC::VC;
1155+
CondCode2 = AArch64CC::NE;
1156+
break;
1157+
case ISD::SETUEQ:
1158+
// (a ueq b)
1159+
// == ((a uno b) || (a oeq b))
1160+
// == ((a ule b) && (a uge b))
1161+
CondCode = AArch64CC::PL;
1162+
CondCode2 = AArch64CC::LE;
1163+
break;
1164+
}
1165+
}
1166+
11381167
/// changeVectorFPCCToAArch64CC - Convert a DAG fp condition code to an AArch64
11391168
/// CC usable with the vector instructions. Fewer operations are available
11401169
/// without a real NZCV register, so we have to use less efficient combinations
@@ -1344,24 +1373,23 @@ static SDValue emitConjunctionDisjunctionTree(SelectionDAG &DAG, SDValue Val,
13441373
} else {
13451374
assert(LHS.getValueType().isFloatingPoint());
13461375
AArch64CC::CondCode ExtraCC;
1347-
changeFPCCToAArch64CC(CC, OutCC, ExtraCC);
1348-
// Surpisingly some floating point conditions can't be tested with a
1349-
// single condition code. Construct an additional comparison in this case.
1350-
// See comment below on how we deal with OR conditions.
1376+
changeFPCCToANDAArch64CC(CC, OutCC, ExtraCC);
1377+
// Some floating point conditions can't be tested with a single condition
1378+
// code. Construct an additional comparison in this case.
13511379
if (ExtraCC != AArch64CC::AL) {
13521380
SDValue ExtraCmp;
13531381
if (!CCOp.getNode())
13541382
ExtraCmp = emitComparison(LHS, RHS, CC, DL, DAG);
13551383
else {
13561384
SDValue ConditionOp = DAG.getConstant(Predicate, DL, MVT_CC);
1357-
// Note that we want the inverse of ExtraCC, so NZCV is not inversed.
1358-
unsigned NZCV = AArch64CC::getNZCVToSatisfyCondCode(ExtraCC);
1385+
AArch64CC::CondCode InvExtraCC =
1386+
AArch64CC::getInvertedCondCode(ExtraCC);
1387+
unsigned NZCV = AArch64CC::getNZCVToSatisfyCondCode(InvExtraCC);
13591388
ExtraCmp = emitConditionalComparison(LHS, RHS, CC, CCOp, ConditionOp,
13601389
NZCV, DL, DAG);
13611390
}
13621391
CCOp = ExtraCmp;
1363-
Predicate = AArch64CC::getInvertedCondCode(ExtraCC);
1364-
OutCC = AArch64CC::getInvertedCondCode(OutCC);
1392+
Predicate = ExtraCC;
13651393
}
13661394
}
13671395

test/CodeGen/AArch64/arm64-ccmp.ll

+160-18
Original file line numberDiff line numberDiff line change
@@ -317,24 +317,6 @@ define i64 @select_or(i32 %w0, i32 %w1, i64 %x2, i64 %x3) {
317317
ret i64 %sel
318318
}
319319

320-
; CHECK-LABEL: select_complicated
321-
define i16 @select_complicated(double %v1, double %v2, i16 %a, i16 %b) {
322-
; CHECK: ldr [[REG:d[0-9]+]],
323-
; CHECK: fcmp d0, d2
324-
; CHECK-NEXT: fmov d2, #13.00000000
325-
; CHECK-NEXT: fccmp d1, d2, #4, ne
326-
; CHECK-NEXT: fccmp d0, d1, #1, ne
327-
; CHECK-NEXT: fccmp d0, d1, #4, vc
328-
; CEHCK-NEXT: csel w0, w0, w1, eq
329-
%1 = fcmp one double %v1, %v2
330-
%2 = fcmp oeq double %v2, 13.0
331-
%3 = fcmp oeq double %v1, 42.0
332-
%or0 = or i1 %2, %3
333-
%or1 = or i1 %1, %or0
334-
%sel = select i1 %or1, i16 %a, i16 %b
335-
ret i16 %sel
336-
}
337-
338320
; CHECK-LABEL: gccbug
339321
define i64 @gccbug(i64 %x0, i64 %x1) {
340322
; CHECK: cmp x0, #2
@@ -443,3 +425,163 @@ define i64 @select_noccmp2(i64 %v1, i64 %v2, i64 %v3, i64 %r) {
443425
store volatile i32 %ext, i32* @g
444426
ret i64 %sel
445427
}
428+
429+
; Test the IR CCs that expand to two cond codes.
430+
431+
; CHECK-LABEL: _select_and_olt_one:
432+
; CHECK-LABEL: ; BB#0:
433+
; CHECK-NEXT: fcmp d0, d1
434+
; CHECK-NEXT: fccmp d2, d3, #4, mi
435+
; CHECK-NEXT: fccmp d2, d3, #1, ne
436+
; CHECK-NEXT: csel w0, w0, w1, vc
437+
; CHECK-NEXT: ret
438+
define i32 @select_and_olt_one(double %v0, double %v1, double %v2, double %v3, i32 %a, i32 %b) #0 {
439+
%c0 = fcmp olt double %v0, %v1
440+
%c1 = fcmp one double %v2, %v3
441+
%cr = and i1 %c1, %c0
442+
%sel = select i1 %cr, i32 %a, i32 %b
443+
ret i32 %sel
444+
}
445+
446+
; CHECK-LABEL: _select_and_one_olt:
447+
; CHECK-LABEL: ; BB#0:
448+
; CHECK-NEXT: fcmp d0, d1
449+
; CHECK-NEXT: fccmp d0, d1, #1, ne
450+
; CHECK-NEXT: fccmp d2, d3, #0, vc
451+
; CHECK-NEXT: csel w0, w0, w1, mi
452+
; CHECK-NEXT: ret
453+
define i32 @select_and_one_olt(double %v0, double %v1, double %v2, double %v3, i32 %a, i32 %b) #0 {
454+
%c0 = fcmp one double %v0, %v1
455+
%c1 = fcmp olt double %v2, %v3
456+
%cr = and i1 %c1, %c0
457+
%sel = select i1 %cr, i32 %a, i32 %b
458+
ret i32 %sel
459+
}
460+
461+
; CHECK-LABEL: _select_and_olt_ueq:
462+
; CHECK-LABEL: ; BB#0:
463+
; CHECK-NEXT: fcmp d0, d1
464+
; CHECK-NEXT: fccmp d2, d3, #0, mi
465+
; CHECK-NEXT: fccmp d2, d3, #8, le
466+
; CHECK-NEXT: csel w0, w0, w1, pl
467+
; CHECK-NEXT: ret
468+
define i32 @select_and_olt_ueq(double %v0, double %v1, double %v2, double %v3, i32 %a, i32 %b) #0 {
469+
%c0 = fcmp olt double %v0, %v1
470+
%c1 = fcmp ueq double %v2, %v3
471+
%cr = and i1 %c1, %c0
472+
%sel = select i1 %cr, i32 %a, i32 %b
473+
ret i32 %sel
474+
}
475+
476+
; CHECK-LABEL: _select_and_ueq_olt:
477+
; CHECK-LABEL: ; BB#0:
478+
; CHECK-NEXT: fcmp d0, d1
479+
; CHECK-NEXT: fccmp d0, d1, #8, le
480+
; CHECK-NEXT: fccmp d2, d3, #0, pl
481+
; CHECK-NEXT: csel w0, w0, w1, mi
482+
; CHECK-NEXT: ret
483+
define i32 @select_and_ueq_olt(double %v0, double %v1, double %v2, double %v3, i32 %a, i32 %b) #0 {
484+
%c0 = fcmp ueq double %v0, %v1
485+
%c1 = fcmp olt double %v2, %v3
486+
%cr = and i1 %c1, %c0
487+
%sel = select i1 %cr, i32 %a, i32 %b
488+
ret i32 %sel
489+
}
490+
491+
; CHECK-LABEL: _select_or_olt_one:
492+
; CHECK-LABEL: ; BB#0:
493+
; CHECK-NEXT: fcmp d0, d1
494+
; CHECK-NEXT: fccmp d2, d3, #0, pl
495+
; CHECK-NEXT: fccmp d2, d3, #8, le
496+
; CHECK-NEXT: csel w0, w0, w1, mi
497+
; CHECK-NEXT: ret
498+
define i32 @select_or_olt_one(double %v0, double %v1, double %v2, double %v3, i32 %a, i32 %b) #0 {
499+
%c0 = fcmp olt double %v0, %v1
500+
%c1 = fcmp one double %v2, %v3
501+
%cr = or i1 %c1, %c0
502+
%sel = select i1 %cr, i32 %a, i32 %b
503+
ret i32 %sel
504+
}
505+
506+
; CHECK-LABEL: _select_or_one_olt:
507+
; CHECK-LABEL: ; BB#0:
508+
; CHECK-NEXT: fcmp d0, d1
509+
; CHECK-NEXT: fccmp d0, d1, #1, ne
510+
; CHECK-NEXT: fccmp d2, d3, #8, vs
511+
; CHECK-NEXT: csel w0, w0, w1, mi
512+
; CHECK-NEXT: ret
513+
define i32 @select_or_one_olt(double %v0, double %v1, double %v2, double %v3, i32 %a, i32 %b) #0 {
514+
%c0 = fcmp one double %v0, %v1
515+
%c1 = fcmp olt double %v2, %v3
516+
%cr = or i1 %c1, %c0
517+
%sel = select i1 %cr, i32 %a, i32 %b
518+
ret i32 %sel
519+
}
520+
521+
; CHECK-LABEL: _select_or_olt_ueq:
522+
; CHECK-LABEL: ; BB#0:
523+
; CHECK-NEXT: fcmp d0, d1
524+
; CHECK-NEXT: fccmp d2, d3, #4, pl
525+
; CHECK-NEXT: fccmp d2, d3, #1, ne
526+
; CHECK-NEXT: csel w0, w0, w1, vs
527+
; CHECK-NEXT: ret
528+
define i32 @select_or_olt_ueq(double %v0, double %v1, double %v2, double %v3, i32 %a, i32 %b) #0 {
529+
%c0 = fcmp olt double %v0, %v1
530+
%c1 = fcmp ueq double %v2, %v3
531+
%cr = or i1 %c1, %c0
532+
%sel = select i1 %cr, i32 %a, i32 %b
533+
ret i32 %sel
534+
}
535+
536+
; CHECK-LABEL: _select_or_ueq_olt:
537+
; CHECK-LABEL: ; BB#0:
538+
; CHECK-NEXT: fcmp d0, d1
539+
; CHECK-NEXT: fccmp d0, d1, #8, le
540+
; CHECK-NEXT: fccmp d2, d3, #8, mi
541+
; CHECK-NEXT: csel w0, w0, w1, mi
542+
; CHECK-NEXT: ret
543+
define i32 @select_or_ueq_olt(double %v0, double %v1, double %v2, double %v3, i32 %a, i32 %b) #0 {
544+
%c0 = fcmp ueq double %v0, %v1
545+
%c1 = fcmp olt double %v2, %v3
546+
%cr = or i1 %c1, %c0
547+
%sel = select i1 %cr, i32 %a, i32 %b
548+
ret i32 %sel
549+
}
550+
551+
; CHECK-LABEL: _select_or_olt_ogt_ueq:
552+
; CHECK-LABEL: ; BB#0:
553+
; CHECK-NEXT: fcmp d0, d1
554+
; CHECK-NEXT: fccmp d2, d3, #0, pl
555+
; CHECK-NEXT: fccmp d4, d5, #4, le
556+
; CHECK-NEXT: fccmp d4, d5, #1, ne
557+
; CHECK-NEXT: csel w0, w0, w1, vs
558+
; CHECK-NEXT: ret
559+
define i32 @select_or_olt_ogt_ueq(double %v0, double %v1, double %v2, double %v3, double %v4, double %v5, i32 %a, i32 %b) #0 {
560+
%c0 = fcmp olt double %v0, %v1
561+
%c1 = fcmp ogt double %v2, %v3
562+
%c2 = fcmp ueq double %v4, %v5
563+
%c3 = or i1 %c1, %c0
564+
%cr = or i1 %c2, %c3
565+
%sel = select i1 %cr, i32 %a, i32 %b
566+
ret i32 %sel
567+
}
568+
569+
; CHECK-LABEL: _select_or_olt_ueq_ogt:
570+
; CHECK-LABEL: ; BB#0:
571+
; CHECK-NEXT: fcmp d0, d1
572+
; CHECK-NEXT: fccmp d2, d3, #4, pl
573+
; CHECK-NEXT: fccmp d2, d3, #1, ne
574+
; CHECK-NEXT: fccmp d4, d5, #0, vc
575+
; CHECK-NEXT: csel w0, w0, w1, gt
576+
; CHECK-NEXT: ret
577+
define i32 @select_or_olt_ueq_ogt(double %v0, double %v1, double %v2, double %v3, double %v4, double %v5, i32 %a, i32 %b) #0 {
578+
%c0 = fcmp olt double %v0, %v1
579+
%c1 = fcmp ueq double %v2, %v3
580+
%c2 = fcmp ogt double %v4, %v5
581+
%c3 = or i1 %c1, %c0
582+
%cr = or i1 %c2, %c3
583+
%sel = select i1 %cr, i32 %a, i32 %b
584+
ret i32 %sel
585+
}
586+
587+
attributes #0 = { nounwind }

0 commit comments

Comments
 (0)