Skip to content

Commit 044a00d

Browse files
committed
[SelectionDAG] Use unaligned store to move AVX registers onto stack for extractelement
Prior to this patch, SelectionDAG generated aligned move onto stacks for AVX registers when the function was marked as a no-realign-stack function. This lead to misalignment between the stack and the instruction generated. This patch fixes the issue. Fixes llvm#77730
1 parent 7f1d757 commit 044a00d

File tree

2 files changed

+30
-2
lines changed

2 files changed

+30
-2
lines changed

llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
#include "llvm/Analysis/ConstantFolding.h"
2222
#include "llvm/Analysis/TargetLibraryInfo.h"
2323
#include "llvm/CodeGen/ISDOpcodes.h"
24+
#include "llvm/CodeGen/MachineFrameInfo.h"
2425
#include "llvm/CodeGen/MachineFunction.h"
2526
#include "llvm/CodeGen/MachineJumpTableInfo.h"
2627
#include "llvm/CodeGen/MachineMemOperand.h"
@@ -1425,9 +1426,16 @@ SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) {
14251426

14261427
if (!Ch.getNode()) {
14271428
// Store the value to a temporary stack slot, then LOAD the returned part.
1429+
auto &MF = DAG.getMachineFunction();
1430+
auto &MFI = MF.getFrameInfo();
14281431
StackPtr = DAG.CreateStackTemporary(VecVT);
1429-
Ch = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr,
1430-
MachinePointerInfo());
1432+
int FI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
1433+
MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI);
1434+
MachineMemOperand *StoreMMO =
1435+
MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore,
1436+
MFI.getObjectSize(FI), MFI.getObjectAlign(FI));
1437+
1438+
Ch = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, StoreMMO);
14311439
}
14321440

14331441
SDValue NewLoad;
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu | FileCheck %s
3+
4+
define i32 @foo(i32 %arg1) #0 {
5+
; CHECK-LABEL: foo:
6+
; CHECK: # %bb.0: # %entry
7+
; CHECK-NEXT: # kill: def $edi killed $edi def $rdi
8+
; CHECK-NEXT: vxorps %xmm0, %xmm0, %xmm0
9+
; CHECK-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp)
10+
; CHECK-NEXT: andl $31, %edi
11+
; CHECK-NEXT: movzbl -40(%rsp,%rdi), %eax
12+
; CHECK-NEXT: vzeroupper
13+
; CHECK-NEXT: retq
14+
entry:
15+
%a = extractelement <32 x i8> zeroinitializer, i32 %arg1
16+
%b = zext i8 %a to i32
17+
ret i32 %b
18+
}
19+
20+
attributes #0 = { "no-realign-stack" "target-cpu"="skylake-avx512" }

0 commit comments

Comments
 (0)