Skip to content

Commit fc15752

Browse files
authored
[LICM] Prevent fold and hoist of binary ops with over 2 uses (#102114)
This limits folding and hoisting associative binary ops to cases where the intermediate op has at most two uses. The more uses the intermediate op has, the more new ops we have to create to potentially reduce the loop's critical path. We keep the limit to two uses to minimise undesirable increases in code size.
1 parent 9c51e51 commit fc15752

File tree

3 files changed

+185
-160
lines changed

3 files changed

+185
-160
lines changed

llvm/lib/Transforms/Scalar/LICM.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2806,7 +2806,8 @@ static bool hoistBOAssociation(Instruction &I, Loop &L,
28062806
return false;
28072807

28082808
auto *BO0 = dyn_cast<BinaryOperator>(BO->getOperand(0));
2809-
if (!BO0 || BO0->getOpcode() != Opcode || !BO0->isAssociative())
2809+
if (!BO0 || BO0->getOpcode() != Opcode || !BO0->isAssociative() ||
2810+
BO0->hasNUsesOrMore(3))
28102811
return false;
28112812

28122813
// Transform: "(LV op C1) op C2" ==> "LV op (C1 op C2)"

llvm/test/CodeGen/PowerPC/common-chain.ll

Lines changed: 154 additions & 159 deletions
Original file line numberDiff line numberDiff line change
@@ -743,219 +743,214 @@ define signext i32 @spill_reduce_succ(ptr %input1, ptr %input2, ptr %output, i64
743743
; CHECK-NEXT: std r9, -184(r1) # 8-byte Folded Spill
744744
; CHECK-NEXT: std r8, -176(r1) # 8-byte Folded Spill
745745
; CHECK-NEXT: std r7, -168(r1) # 8-byte Folded Spill
746-
; CHECK-NEXT: std r4, -160(r1) # 8-byte Folded Spill
746+
; CHECK-NEXT: std r3, -160(r1) # 8-byte Folded Spill
747747
; CHECK-NEXT: ble cr0, .LBB7_7
748748
; CHECK-NEXT: # %bb.1: # %for.body.preheader
749-
; CHECK-NEXT: sldi r4, r6, 2
750-
; CHECK-NEXT: li r6, 1
751-
; CHECK-NEXT: mr r0, r10
752-
; CHECK-NEXT: std r10, -192(r1) # 8-byte Folded Spill
753-
; CHECK-NEXT: cmpdi r4, 1
754-
; CHECK-NEXT: iselgt r4, r4, r6
755-
; CHECK-NEXT: addi r7, r4, -1
756-
; CHECK-NEXT: clrldi r6, r4, 63
757-
; CHECK-NEXT: cmpldi r7, 3
749+
; CHECK-NEXT: sldi r6, r6, 2
750+
; CHECK-NEXT: li r7, 1
751+
; CHECK-NEXT: mr r30, r10
752+
; CHECK-NEXT: cmpdi r6, 1
753+
; CHECK-NEXT: iselgt r7, r6, r7
754+
; CHECK-NEXT: addi r8, r7, -1
755+
; CHECK-NEXT: clrldi r6, r7, 63
756+
; CHECK-NEXT: cmpldi r8, 3
758757
; CHECK-NEXT: blt cr0, .LBB7_4
759758
; CHECK-NEXT: # %bb.2: # %for.body.preheader.new
760-
; CHECK-NEXT: ld r0, -192(r1) # 8-byte Folded Reload
761-
; CHECK-NEXT: ld r30, -184(r1) # 8-byte Folded Reload
762-
; CHECK-NEXT: ld r8, -176(r1) # 8-byte Folded Reload
763-
; CHECK-NEXT: rldicl r7, r4, 62, 2
764-
; CHECK-NEXT: ld r9, -168(r1) # 8-byte Folded Reload
765-
; CHECK-NEXT: add r11, r0, r30
766-
; CHECK-NEXT: add r4, r0, r0
767-
; CHECK-NEXT: mulli r23, r0, 24
768-
; CHECK-NEXT: add r14, r0, r8
769-
; CHECK-NEXT: sldi r12, r0, 5
770-
; CHECK-NEXT: add r31, r0, r9
771-
; CHECK-NEXT: sldi r9, r9, 3
772-
; CHECK-NEXT: sldi r18, r0, 4
773-
; CHECK-NEXT: sldi r8, r8, 3
774-
; CHECK-NEXT: add r10, r4, r4
775-
; CHECK-NEXT: sldi r4, r30, 3
776-
; CHECK-NEXT: sldi r11, r11, 3
777-
; CHECK-NEXT: add r26, r12, r9
778-
; CHECK-NEXT: add r16, r18, r9
779-
; CHECK-NEXT: add r29, r12, r8
780-
; CHECK-NEXT: add r19, r18, r8
781-
; CHECK-NEXT: add r30, r12, r4
782-
; CHECK-NEXT: mr r20, r4
783-
; CHECK-NEXT: std r4, -200(r1) # 8-byte Folded Spill
784-
; CHECK-NEXT: ld r4, -160(r1) # 8-byte Folded Reload
785-
; CHECK-NEXT: add r15, r5, r11
786-
; CHECK-NEXT: sldi r11, r14, 3
787-
; CHECK-NEXT: add r29, r5, r29
788-
; CHECK-NEXT: add r28, r3, r26
789-
; CHECK-NEXT: add r19, r5, r19
790-
; CHECK-NEXT: add r21, r23, r9
791-
; CHECK-NEXT: add r24, r23, r8
792-
; CHECK-NEXT: add r14, r5, r11
793-
; CHECK-NEXT: sldi r11, r31, 3
794-
; CHECK-NEXT: add r25, r23, r20
795-
; CHECK-NEXT: add r20, r18, r20
796-
; CHECK-NEXT: add r30, r5, r30
797-
; CHECK-NEXT: add r18, r3, r16
798-
; CHECK-NEXT: add r24, r5, r24
799-
; CHECK-NEXT: add r23, r3, r21
800-
; CHECK-NEXT: add r27, r4, r26
801-
; CHECK-NEXT: add r22, r4, r21
802-
; CHECK-NEXT: add r17, r4, r16
803-
; CHECK-NEXT: add r2, r4, r11
804-
; CHECK-NEXT: rldicl r4, r7, 2, 1
805-
; CHECK-NEXT: sub r7, r8, r9
806-
; CHECK-NEXT: ld r8, -200(r1) # 8-byte Folded Reload
759+
; CHECK-NEXT: ld r14, -168(r1) # 8-byte Folded Reload
760+
; CHECK-NEXT: mulli r24, r30, 24
761+
; CHECK-NEXT: ld r16, -184(r1) # 8-byte Folded Reload
762+
; CHECK-NEXT: ld r15, -176(r1) # 8-byte Folded Reload
763+
; CHECK-NEXT: ld r3, -160(r1) # 8-byte Folded Reload
764+
; CHECK-NEXT: rldicl r0, r7, 62, 2
765+
; CHECK-NEXT: sldi r11, r30, 5
766+
; CHECK-NEXT: sldi r19, r30, 4
767+
; CHECK-NEXT: sldi r7, r14, 3
768+
; CHECK-NEXT: add r14, r30, r14
769+
; CHECK-NEXT: sldi r10, r16, 3
770+
; CHECK-NEXT: sldi r12, r15, 3
771+
; CHECK-NEXT: add r16, r30, r16
772+
; CHECK-NEXT: add r15, r30, r15
773+
; CHECK-NEXT: add r27, r11, r7
774+
; CHECK-NEXT: add r22, r24, r7
775+
; CHECK-NEXT: add r17, r19, r7
776+
; CHECK-NEXT: sldi r2, r14, 3
777+
; CHECK-NEXT: add r26, r24, r10
778+
; CHECK-NEXT: add r25, r24, r12
779+
; CHECK-NEXT: add r21, r19, r10
780+
; CHECK-NEXT: add r20, r19, r12
781+
; CHECK-NEXT: add r8, r11, r10
782+
; CHECK-NEXT: sldi r16, r16, 3
783+
; CHECK-NEXT: add r29, r5, r27
784+
; CHECK-NEXT: add r28, r4, r27
785+
; CHECK-NEXT: add r27, r3, r27
786+
; CHECK-NEXT: add r24, r5, r22
787+
; CHECK-NEXT: add r23, r4, r22
788+
; CHECK-NEXT: add r22, r3, r22
789+
; CHECK-NEXT: add r19, r5, r17
790+
; CHECK-NEXT: add r18, r4, r17
791+
; CHECK-NEXT: add r17, r3, r17
792+
; CHECK-NEXT: add r14, r5, r2
793+
; CHECK-NEXT: add r31, r4, r2
794+
; CHECK-NEXT: add r2, r3, r2
795+
; CHECK-NEXT: add r9, r5, r8
796+
; CHECK-NEXT: add r8, r11, r12
807797
; CHECK-NEXT: add r26, r5, r26
808798
; CHECK-NEXT: add r25, r5, r25
809799
; CHECK-NEXT: add r21, r5, r21
810800
; CHECK-NEXT: add r20, r5, r20
811801
; CHECK-NEXT: add r16, r5, r16
812-
; CHECK-NEXT: add r31, r5, r11
813-
; CHECK-NEXT: add r11, r3, r11
814-
; CHECK-NEXT: addi r4, r4, -4
815-
; CHECK-NEXT: rldicl r4, r4, 62, 2
816-
; CHECK-NEXT: sub r8, r8, r9
817-
; CHECK-NEXT: li r9, 0
818-
; CHECK-NEXT: addi r4, r4, 1
819-
; CHECK-NEXT: mtctr r4
802+
; CHECK-NEXT: add r8, r5, r8
803+
; CHECK-NEXT: rldicl r3, r0, 2, 1
804+
; CHECK-NEXT: addi r3, r3, -4
805+
; CHECK-NEXT: sub r0, r12, r7
806+
; CHECK-NEXT: sub r12, r10, r7
807+
; CHECK-NEXT: li r7, 0
808+
; CHECK-NEXT: mr r10, r30
809+
; CHECK-NEXT: sldi r15, r15, 3
810+
; CHECK-NEXT: add r15, r5, r15
811+
; CHECK-NEXT: rldicl r3, r3, 62, 2
812+
; CHECK-NEXT: addi r3, r3, 1
813+
; CHECK-NEXT: mtctr r3
820814
; CHECK-NEXT: .p2align 4
821815
; CHECK-NEXT: .LBB7_3: # %for.body
822816
; CHECK-NEXT: #
823-
; CHECK-NEXT: lfd f0, 0(r11)
824-
; CHECK-NEXT: lfd f1, 0(r2)
825-
; CHECK-NEXT: add r0, r0, r10
826-
; CHECK-NEXT: xsmuldp f0, f0, f1
817+
; CHECK-NEXT: lfd f0, 0(r2)
827818
; CHECK-NEXT: lfd f1, 0(r31)
819+
; CHECK-NEXT: add r3, r10, r30
820+
; CHECK-NEXT: add r3, r3, r30
821+
; CHECK-NEXT: xsmuldp f0, f0, f1
822+
; CHECK-NEXT: lfd f1, 0(r14)
823+
; CHECK-NEXT: add r3, r3, r30
824+
; CHECK-NEXT: add r10, r3, r30
828825
; CHECK-NEXT: xsadddp f0, f1, f0
829-
; CHECK-NEXT: stfd f0, 0(r31)
830-
; CHECK-NEXT: add r31, r31, r12
831-
; CHECK-NEXT: lfdx f0, r11, r7
832-
; CHECK-NEXT: lfdx f1, r2, r7
826+
; CHECK-NEXT: stfd f0, 0(r14)
827+
; CHECK-NEXT: add r14, r14, r11
828+
; CHECK-NEXT: lfdx f0, r2, r0
829+
; CHECK-NEXT: lfdx f1, r31, r0
833830
; CHECK-NEXT: xsmuldp f0, f0, f1
834-
; CHECK-NEXT: lfdx f1, r14, r9
831+
; CHECK-NEXT: lfdx f1, r15, r7
835832
; CHECK-NEXT: xsadddp f0, f1, f0
836-
; CHECK-NEXT: stfdx f0, r14, r9
837-
; CHECK-NEXT: lfdx f0, r11, r8
838-
; CHECK-NEXT: lfdx f1, r2, r8
839-
; CHECK-NEXT: add r11, r11, r12
840-
; CHECK-NEXT: add r2, r2, r12
833+
; CHECK-NEXT: stfdx f0, r15, r7
834+
; CHECK-NEXT: lfdx f0, r2, r12
835+
; CHECK-NEXT: lfdx f1, r31, r12
836+
; CHECK-NEXT: add r2, r2, r11
837+
; CHECK-NEXT: add r31, r31, r11
841838
; CHECK-NEXT: xsmuldp f0, f0, f1
842-
; CHECK-NEXT: lfdx f1, r15, r9
839+
; CHECK-NEXT: lfdx f1, r16, r7
843840
; CHECK-NEXT: xsadddp f0, f1, f0
844-
; CHECK-NEXT: stfdx f0, r15, r9
845-
; CHECK-NEXT: lfd f0, 0(r18)
846-
; CHECK-NEXT: lfd f1, 0(r17)
841+
; CHECK-NEXT: stfdx f0, r16, r7
842+
; CHECK-NEXT: lfd f0, 0(r17)
843+
; CHECK-NEXT: lfd f1, 0(r18)
847844
; CHECK-NEXT: xsmuldp f0, f0, f1
848-
; CHECK-NEXT: lfdx f1, r16, r9
845+
; CHECK-NEXT: lfdx f1, r19, r7
849846
; CHECK-NEXT: xsadddp f0, f1, f0
850-
; CHECK-NEXT: stfdx f0, r16, r9
851-
; CHECK-NEXT: lfdx f0, r18, r7
852-
; CHECK-NEXT: lfdx f1, r17, r7
847+
; CHECK-NEXT: stfdx f0, r19, r7
848+
; CHECK-NEXT: lfdx f0, r17, r0
849+
; CHECK-NEXT: lfdx f1, r18, r0
853850
; CHECK-NEXT: xsmuldp f0, f0, f1
854-
; CHECK-NEXT: lfdx f1, r19, r9
851+
; CHECK-NEXT: lfdx f1, r20, r7
855852
; CHECK-NEXT: xsadddp f0, f1, f0
856-
; CHECK-NEXT: stfdx f0, r19, r9
857-
; CHECK-NEXT: lfdx f0, r18, r8
858-
; CHECK-NEXT: lfdx f1, r17, r8
859-
; CHECK-NEXT: add r18, r18, r12
860-
; CHECK-NEXT: add r17, r17, r12
853+
; CHECK-NEXT: stfdx f0, r20, r7
854+
; CHECK-NEXT: lfdx f0, r17, r12
855+
; CHECK-NEXT: lfdx f1, r18, r12
856+
; CHECK-NEXT: add r17, r17, r11
857+
; CHECK-NEXT: add r18, r18, r11
861858
; CHECK-NEXT: xsmuldp f0, f0, f1
862-
; CHECK-NEXT: lfdx f1, r20, r9
859+
; CHECK-NEXT: lfdx f1, r21, r7
863860
; CHECK-NEXT: xsadddp f0, f1, f0
864-
; CHECK-NEXT: stfdx f0, r20, r9
865-
; CHECK-NEXT: lfd f0, 0(r23)
866-
; CHECK-NEXT: lfd f1, 0(r22)
861+
; CHECK-NEXT: stfdx f0, r21, r7
862+
; CHECK-NEXT: lfd f0, 0(r22)
863+
; CHECK-NEXT: lfd f1, 0(r23)
867864
; CHECK-NEXT: xsmuldp f0, f0, f1
868-
; CHECK-NEXT: lfdx f1, r21, r9
865+
; CHECK-NEXT: lfdx f1, r24, r7
869866
; CHECK-NEXT: xsadddp f0, f1, f0
870-
; CHECK-NEXT: stfdx f0, r21, r9
871-
; CHECK-NEXT: lfdx f0, r23, r7
872-
; CHECK-NEXT: lfdx f1, r22, r7
867+
; CHECK-NEXT: stfdx f0, r24, r7
868+
; CHECK-NEXT: lfdx f0, r22, r0
869+
; CHECK-NEXT: lfdx f1, r23, r0
873870
; CHECK-NEXT: xsmuldp f0, f0, f1
874-
; CHECK-NEXT: lfdx f1, r24, r9
871+
; CHECK-NEXT: lfdx f1, r25, r7
875872
; CHECK-NEXT: xsadddp f0, f1, f0
876-
; CHECK-NEXT: stfdx f0, r24, r9
877-
; CHECK-NEXT: lfdx f0, r23, r8
878-
; CHECK-NEXT: lfdx f1, r22, r8
879-
; CHECK-NEXT: add r23, r23, r12
880-
; CHECK-NEXT: add r22, r22, r12
873+
; CHECK-NEXT: stfdx f0, r25, r7
874+
; CHECK-NEXT: lfdx f0, r22, r12
875+
; CHECK-NEXT: lfdx f1, r23, r12
876+
; CHECK-NEXT: add r22, r22, r11
877+
; CHECK-NEXT: add r23, r23, r11
881878
; CHECK-NEXT: xsmuldp f0, f0, f1
882-
; CHECK-NEXT: lfdx f1, r25, r9
879+
; CHECK-NEXT: lfdx f1, r26, r7
883880
; CHECK-NEXT: xsadddp f0, f1, f0
884-
; CHECK-NEXT: stfdx f0, r25, r9
885-
; CHECK-NEXT: lfd f0, 0(r28)
886-
; CHECK-NEXT: lfd f1, 0(r27)
881+
; CHECK-NEXT: stfdx f0, r26, r7
882+
; CHECK-NEXT: lfd f0, 0(r27)
883+
; CHECK-NEXT: lfd f1, 0(r28)
887884
; CHECK-NEXT: xsmuldp f0, f0, f1
888-
; CHECK-NEXT: lfdx f1, r26, r9
885+
; CHECK-NEXT: lfdx f1, r29, r7
889886
; CHECK-NEXT: xsadddp f0, f1, f0
890-
; CHECK-NEXT: stfdx f0, r26, r9
891-
; CHECK-NEXT: lfdx f0, r28, r7
892-
; CHECK-NEXT: lfdx f1, r27, r7
887+
; CHECK-NEXT: stfdx f0, r29, r7
888+
; CHECK-NEXT: lfdx f0, r27, r0
889+
; CHECK-NEXT: lfdx f1, r28, r0
893890
; CHECK-NEXT: xsmuldp f0, f0, f1
894-
; CHECK-NEXT: lfdx f1, r29, r9
891+
; CHECK-NEXT: lfdx f1, r8, r7
895892
; CHECK-NEXT: xsadddp f0, f1, f0
896-
; CHECK-NEXT: stfdx f0, r29, r9
897-
; CHECK-NEXT: lfdx f0, r28, r8
898-
; CHECK-NEXT: lfdx f1, r27, r8
899-
; CHECK-NEXT: add r28, r28, r12
900-
; CHECK-NEXT: add r27, r27, r12
893+
; CHECK-NEXT: stfdx f0, r8, r7
894+
; CHECK-NEXT: lfdx f0, r27, r12
895+
; CHECK-NEXT: lfdx f1, r28, r12
896+
; CHECK-NEXT: add r27, r27, r11
897+
; CHECK-NEXT: add r28, r28, r11
901898
; CHECK-NEXT: xsmuldp f0, f0, f1
902-
; CHECK-NEXT: lfdx f1, r30, r9
899+
; CHECK-NEXT: lfdx f1, r9, r7
903900
; CHECK-NEXT: xsadddp f0, f1, f0
904-
; CHECK-NEXT: stfdx f0, r30, r9
905-
; CHECK-NEXT: add r9, r9, r12
901+
; CHECK-NEXT: stfdx f0, r9, r7
902+
; CHECK-NEXT: add r7, r7, r11
906903
; CHECK-NEXT: bdnz .LBB7_3
907904
; CHECK-NEXT: .LBB7_4: # %for.cond.cleanup.loopexit.unr-lcssa
908-
; CHECK-NEXT: ld r7, -192(r1) # 8-byte Folded Reload
909905
; CHECK-NEXT: cmpldi r6, 0
910906
; CHECK-NEXT: beq cr0, .LBB7_7
911907
; CHECK-NEXT: # %bb.5: # %for.body.epil.preheader
912-
; CHECK-NEXT: ld r4, -184(r1) # 8-byte Folded Reload
913-
; CHECK-NEXT: ld r29, -160(r1) # 8-byte Folded Reload
914-
; CHECK-NEXT: mr r30, r3
915-
; CHECK-NEXT: sldi r7, r7, 3
916-
; CHECK-NEXT: add r4, r0, r4
917-
; CHECK-NEXT: sldi r4, r4, 3
918-
; CHECK-NEXT: add r3, r5, r4
919-
; CHECK-NEXT: add r8, r29, r4
920-
; CHECK-NEXT: add r9, r30, r4
921-
; CHECK-NEXT: ld r4, -176(r1) # 8-byte Folded Reload
922-
; CHECK-NEXT: add r4, r0, r4
923-
; CHECK-NEXT: sldi r4, r4, 3
924-
; CHECK-NEXT: add r10, r5, r4
925-
; CHECK-NEXT: add r11, r29, r4
926-
; CHECK-NEXT: add r12, r30, r4
927-
; CHECK-NEXT: ld r4, -168(r1) # 8-byte Folded Reload
928-
; CHECK-NEXT: add r4, r0, r4
929-
; CHECK-NEXT: sldi r0, r4, 3
930-
; CHECK-NEXT: add r5, r5, r0
931-
; CHECK-NEXT: add r4, r29, r0
932-
; CHECK-NEXT: add r30, r30, r0
933-
; CHECK-NEXT: li r0, 0
908+
; CHECK-NEXT: ld r3, -184(r1) # 8-byte Folded Reload
909+
; CHECK-NEXT: ld r0, -160(r1) # 8-byte Folded Reload
910+
; CHECK-NEXT: sldi r8, r30, 3
911+
; CHECK-NEXT: add r3, r10, r3
912+
; CHECK-NEXT: sldi r3, r3, 3
913+
; CHECK-NEXT: add r7, r5, r3
914+
; CHECK-NEXT: add r9, r4, r3
915+
; CHECK-NEXT: add r11, r0, r3
916+
; CHECK-NEXT: ld r3, -176(r1) # 8-byte Folded Reload
917+
; CHECK-NEXT: add r3, r10, r3
918+
; CHECK-NEXT: sldi r3, r3, 3
919+
; CHECK-NEXT: add r12, r5, r3
920+
; CHECK-NEXT: add r30, r4, r3
921+
; CHECK-NEXT: add r29, r0, r3
922+
; CHECK-NEXT: ld r3, -168(r1) # 8-byte Folded Reload
923+
; CHECK-NEXT: add r3, r10, r3
924+
; CHECK-NEXT: li r10, 0
925+
; CHECK-NEXT: sldi r3, r3, 3
926+
; CHECK-NEXT: add r5, r5, r3
927+
; CHECK-NEXT: add r4, r4, r3
928+
; CHECK-NEXT: add r3, r0, r3
934929
; CHECK-NEXT: .p2align 4
935930
; CHECK-NEXT: .LBB7_6: # %for.body.epil
936931
; CHECK-NEXT: #
937-
; CHECK-NEXT: lfdx f0, r30, r0
938-
; CHECK-NEXT: lfdx f1, r4, r0
932+
; CHECK-NEXT: lfdx f0, r3, r10
933+
; CHECK-NEXT: lfdx f1, r4, r10
939934
; CHECK-NEXT: addi r6, r6, -1
940935
; CHECK-NEXT: cmpldi r6, 0
941936
; CHECK-NEXT: xsmuldp f0, f0, f1
942937
; CHECK-NEXT: lfd f1, 0(r5)
943938
; CHECK-NEXT: xsadddp f0, f1, f0
944939
; CHECK-NEXT: stfd f0, 0(r5)
945-
; CHECK-NEXT: add r5, r5, r7
946-
; CHECK-NEXT: lfdx f0, r12, r0
947-
; CHECK-NEXT: lfdx f1, r11, r0
940+
; CHECK-NEXT: add r5, r5, r8
941+
; CHECK-NEXT: lfdx f0, r29, r10
942+
; CHECK-NEXT: lfdx f1, r30, r10
948943
; CHECK-NEXT: xsmuldp f0, f0, f1
949-
; CHECK-NEXT: lfdx f1, r10, r0
944+
; CHECK-NEXT: lfdx f1, r12, r10
950945
; CHECK-NEXT: xsadddp f0, f1, f0
951-
; CHECK-NEXT: stfdx f0, r10, r0
952-
; CHECK-NEXT: lfdx f0, r9, r0
953-
; CHECK-NEXT: lfdx f1, r8, r0
946+
; CHECK-NEXT: stfdx f0, r12, r10
947+
; CHECK-NEXT: lfdx f0, r11, r10
948+
; CHECK-NEXT: lfdx f1, r9, r10
954949
; CHECK-NEXT: xsmuldp f0, f0, f1
955-
; CHECK-NEXT: lfdx f1, r3, r0
950+
; CHECK-NEXT: lfdx f1, r7, r10
956951
; CHECK-NEXT: xsadddp f0, f1, f0
957-
; CHECK-NEXT: stfdx f0, r3, r0
958-
; CHECK-NEXT: add r0, r0, r7
952+
; CHECK-NEXT: stfdx f0, r7, r10
953+
; CHECK-NEXT: add r10, r10, r8
959954
; CHECK-NEXT: bne cr0, .LBB7_6
960955
; CHECK-NEXT: .LBB7_7: # %for.cond.cleanup
961956
; CHECK-NEXT: ld r2, -152(r1) # 8-byte Folded Reload

0 commit comments

Comments
 (0)