Skip to content

Commit a6614ec

Browse files
committed
[SelectionDAG] Use unaligned store to move AVX registers onto stack for extractelement
Prior to this patch, SelectionDAG generated aligned move onto stacks for AVX registers when the function was marked as a no-realign-stack function. This lead to misalignment between the stack and the instruction generated. This patch fixes the issue. Fixes #77730
1 parent 7f1d757 commit a6614ec

File tree

2 files changed

+39
-2
lines changed

2 files changed

+39
-2
lines changed

llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
#include "llvm/Analysis/ConstantFolding.h"
2222
#include "llvm/Analysis/TargetLibraryInfo.h"
2323
#include "llvm/CodeGen/ISDOpcodes.h"
24+
#include "llvm/CodeGen/MachineFrameInfo.h"
2425
#include "llvm/CodeGen/MachineFunction.h"
2526
#include "llvm/CodeGen/MachineJumpTableInfo.h"
2627
#include "llvm/CodeGen/MachineMemOperand.h"
@@ -1377,6 +1378,21 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
13771378
}
13781379
}
13791380

1381+
// Helper function that generates an MMO that considers the alignment of the
1382+
// stack, and the size of the stack object
1383+
static MachineMemOperand *getStackAlignedMMO(SDValue StackPtr,
1384+
MachineFunction &MF,
1385+
bool isObjectScalable) {
1386+
auto &MFI = MF.getFrameInfo();
1387+
int FI = cast<FrameIndexSDNode>(StackPtr)->getIndex();
1388+
MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI);
1389+
uint64_t ObjectSize = isObjectScalable ? ~UINT64_C(0) : MFI.getObjectSize(FI);
1390+
MachineMemOperand *MMO = MF.getMachineMemOperand(
1391+
PtrInfo, MachineMemOperand::MOStore, ObjectSize, MFI.getObjectAlign(FI));
1392+
1393+
return MMO;
1394+
}
1395+
13801396
SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) {
13811397
SDValue Vec = Op.getOperand(0);
13821398
SDValue Idx = Op.getOperand(1);
@@ -1426,8 +1442,9 @@ SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) {
14261442
if (!Ch.getNode()) {
14271443
// Store the value to a temporary stack slot, then LOAD the returned part.
14281444
StackPtr = DAG.CreateStackTemporary(VecVT);
1429-
Ch = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr,
1430-
MachinePointerInfo());
1445+
MachineMemOperand *StoreMMO = getStackAlignedMMO(
1446+
StackPtr, DAG.getMachineFunction(), VecVT.isScalableVector());
1447+
Ch = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, StoreMMO);
14311448
}
14321449

14331450
SDValue NewLoad;
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu | FileCheck %s
3+
4+
define i32 @foo(i32 %arg1) #0 {
5+
; CHECK-LABEL: foo:
6+
; CHECK: # %bb.0: # %entry
7+
; CHECK-NEXT: # kill: def $edi killed $edi def $rdi
8+
; CHECK-NEXT: vxorps %xmm0, %xmm0, %xmm0
9+
; CHECK-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp)
10+
; CHECK-NEXT: andl $31, %edi
11+
; CHECK-NEXT: movzbl -40(%rsp,%rdi), %eax
12+
; CHECK-NEXT: vzeroupper
13+
; CHECK-NEXT: retq
14+
entry:
15+
%a = extractelement <32 x i8> zeroinitializer, i32 %arg1
16+
%b = zext i8 %a to i32
17+
ret i32 %b
18+
}
19+
20+
attributes #0 = { "no-realign-stack" "target-cpu"="skylake-avx512" }

0 commit comments

Comments
 (0)