Skip to content

Commit 25bef68

Browse files
author
Christophe Lyon
committed
arm: Add movmisalign patterns for MVE (PR target/97875)
This patch adds new movmisalign<mode>_mve_load and store patterns for MVE to help vectorization. They are very similar to their Neon counterparts, but use different iterators and instructions. Indeed MVE supports less vectors modes than Neon, so we use the MVE_VLD_ST iterator where Neon uses VQX. Since the supported modes are different from the ones valid for arithmetic operators, we introduce two new sets of macros: ARM_HAVE_NEON_<MODE>_LDST true if Neon has vector load/store instructions for <MODE> ARM_HAVE_<MODE>_LDST true if any vector extension has vector load/store instructions for <MODE> We move the movmisalign<mode> expander from neon.md to vec-commond.md, and replace the TARGET_NEON enabler with ARM_HAVE_<MODE>_LDST. The patch also updates the mve-vneg.c test to scan for the better code generation when loading and storing the vectors involved: it checks that no 'orr' instruction is generated to cope with misalignment at runtime. This test was chosen among the other mve tests, but any other should be OK. Using a plain vector copy loop (dest[i] = a[i]) is not a good test because the compiler chooses to use memcpy. For instance we now generate: test_vneg_s32x4: vldrw.32 q3, [r1] vneg.s32 q3, q3 vstrw.32 q3, [r0] bx lr instead of: test_vneg_s32x4: orr r3, r1, r0 lsls r3, r3, gcc-mirror#28 bne .L15 vldrw.32 q3, [r1] vneg.s32 q3, q3 vstrw.32 q3, [r0] bx lr .L15: push {r4, r5} ldrd r2, r3, [r1, gcc-mirror#8] ldrd r5, r4, [r1] rsbs r2, r2, #0 rsbs r5, r5, #0 rsbs r4, r4, #0 rsbs r3, r3, #0 strd r5, r4, [r0] pop {r4, r5} strd r2, r3, [r0, gcc-mirror#8] bx lr 2021-01-12 Christophe Lyon <[email protected]> PR target/97875 gcc/ * config/arm/arm.h (ARM_HAVE_NEON_V8QI_LDST): New macro. (ARM_HAVE_NEON_V16QI_LDST, ARM_HAVE_NEON_V4HI_LDST): Likewise. (ARM_HAVE_NEON_V8HI_LDST, ARM_HAVE_NEON_V2SI_LDST): Likewise. (ARM_HAVE_NEON_V4SI_LDST, ARM_HAVE_NEON_V4HF_LDST): Likewise. (ARM_HAVE_NEON_V8HF_LDST, ARM_HAVE_NEON_V4BF_LDST): Likewise. (ARM_HAVE_NEON_V8BF_LDST, ARM_HAVE_NEON_V2SF_LDST): Likewise. (ARM_HAVE_NEON_V4SF_LDST, ARM_HAVE_NEON_DI_LDST): Likewise. (ARM_HAVE_NEON_V2DI_LDST): Likewise. (ARM_HAVE_V8QI_LDST, ARM_HAVE_V16QI_LDST): Likewise. (ARM_HAVE_V4HI_LDST, ARM_HAVE_V8HI_LDST): Likewise. (ARM_HAVE_V2SI_LDST, ARM_HAVE_V4SI_LDST, ARM_HAVE_V4HF_LDST): Likewise. (ARM_HAVE_V8HF_LDST, ARM_HAVE_V4BF_LDST, ARM_HAVE_V8BF_LDST): Likewise. (ARM_HAVE_V2SF_LDST, ARM_HAVE_V4SF_LDST, ARM_HAVE_DI_LDST): Likewise. (ARM_HAVE_V2DI_LDST): Likewise. * config/arm/mve.md (*movmisalign<mode>_mve_store): New pattern. (*movmisalign<mode>_mve_load): New pattern. * config/arm/neon.md (movmisalign<mode>): Move to ... * config/arm/vec-common.md: ... here. PR target/97875 gcc/testsuite/ * gcc.target/arm/simd/mve-vneg.c: Update test.
1 parent cf2ac1c commit 25bef68

File tree

5 files changed

+89
-25
lines changed

5 files changed

+89
-25
lines changed

gcc/config/arm/arm.h

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1151,6 +1151,46 @@ extern const int arm_arch_cde_coproc_bits[];
11511151
#define ARM_HAVE_V8HF_ARITH (ARM_HAVE_NEON_V8HF_ARITH || TARGET_HAVE_MVE_FLOAT)
11521152
#define ARM_HAVE_V4SF_ARITH (ARM_HAVE_NEON_V4SF_ARITH || TARGET_HAVE_MVE_FLOAT)
11531153

1154+
/* The conditions under which vector modes are supported by load/store
1155+
instructions using Neon. */
1156+
1157+
#define ARM_HAVE_NEON_V8QI_LDST TARGET_NEON
1158+
#define ARM_HAVE_NEON_V16QI_LDST TARGET_NEON
1159+
#define ARM_HAVE_NEON_V4HI_LDST TARGET_NEON
1160+
#define ARM_HAVE_NEON_V8HI_LDST TARGET_NEON
1161+
#define ARM_HAVE_NEON_V2SI_LDST TARGET_NEON
1162+
#define ARM_HAVE_NEON_V4SI_LDST TARGET_NEON
1163+
#define ARM_HAVE_NEON_V4HF_LDST TARGET_NEON_FP16INST
1164+
#define ARM_HAVE_NEON_V8HF_LDST TARGET_NEON_FP16INST
1165+
#define ARM_HAVE_NEON_V4BF_LDST TARGET_BF16_SIMD
1166+
#define ARM_HAVE_NEON_V8BF_LDST TARGET_BF16_SIMD
1167+
#define ARM_HAVE_NEON_V2SF_LDST TARGET_NEON
1168+
#define ARM_HAVE_NEON_V4SF_LDST TARGET_NEON
1169+
#define ARM_HAVE_NEON_DI_LDST TARGET_NEON
1170+
#define ARM_HAVE_NEON_V2DI_LDST TARGET_NEON
1171+
1172+
/* The conditions under which vector modes are supported by load/store
1173+
instructions by any vector extension. */
1174+
1175+
#define ARM_HAVE_V8QI_LDST (ARM_HAVE_NEON_V8QI_LDST || TARGET_REALLY_IWMMXT)
1176+
#define ARM_HAVE_V4HI_LDST (ARM_HAVE_NEON_V4HI_LDST || TARGET_REALLY_IWMMXT)
1177+
#define ARM_HAVE_V2SI_LDST (ARM_HAVE_NEON_V2SI_LDST || TARGET_REALLY_IWMMXT)
1178+
1179+
#define ARM_HAVE_V16QI_LDST (ARM_HAVE_NEON_V16QI_LDST || TARGET_HAVE_MVE)
1180+
#define ARM_HAVE_V8HI_LDST (ARM_HAVE_NEON_V8HI_LDST || TARGET_HAVE_MVE)
1181+
#define ARM_HAVE_V4SI_LDST (ARM_HAVE_NEON_V4SI_LDST || TARGET_HAVE_MVE)
1182+
#define ARM_HAVE_DI_LDST ARM_HAVE_NEON_DI_LDST
1183+
#define ARM_HAVE_V2DI_LDST ARM_HAVE_NEON_V2DI_LDST
1184+
1185+
#define ARM_HAVE_V4HF_LDST ARM_HAVE_NEON_V4HF_LDST
1186+
#define ARM_HAVE_V2SF_LDST ARM_HAVE_NEON_V2SF_LDST
1187+
1188+
#define ARM_HAVE_V4BF_LDST ARM_HAVE_NEON_V4BF_LDST
1189+
#define ARM_HAVE_V8BF_LDST ARM_HAVE_NEON_V8BF_LDST
1190+
1191+
#define ARM_HAVE_V8HF_LDST (ARM_HAVE_NEON_V8HF_LDST || TARGET_HAVE_MVE_FLOAT)
1192+
#define ARM_HAVE_V4SF_LDST (ARM_HAVE_NEON_V4SF_LDST || TARGET_HAVE_MVE_FLOAT)
1193+
11541194
/* The register numbers in sequence, for passing to arm_gen_load_multiple. */
11551195
extern int arm_regs_in_sequence[];
11561196

gcc/config/arm/mve.md

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10825,3 +10825,26 @@
1082510825
[(set_attr "type" "coproc")
1082610826
(set_attr "length" "8")]
1082710827
)
10828+
10829+
(define_insn "*movmisalign<mode>_mve_store"
10830+
[(set (match_operand:MVE_VLD_ST 0 "neon_permissive_struct_operand" "=Um")
10831+
(unspec:MVE_VLD_ST [(match_operand:MVE_VLD_ST 1 "s_register_operand" " w")]
10832+
UNSPEC_MISALIGNED_ACCESS))]
10833+
"((TARGET_HAVE_MVE && VALID_MVE_SI_MODE (<MODE>mode))
10834+
|| (TARGET_HAVE_MVE_FLOAT && VALID_MVE_SF_MODE (<MODE>mode)))
10835+
&& !BYTES_BIG_ENDIAN && unaligned_access"
10836+
"vstr<V_sz_elem1>.<V_sz_elem>\t%q1, %E0"
10837+
[(set_attr "type" "mve_store")]
10838+
)
10839+
10840+
10841+
(define_insn "*movmisalign<mode>_mve_load"
10842+
[(set (match_operand:MVE_VLD_ST 0 "s_register_operand" "=w")
10843+
(unspec:MVE_VLD_ST [(match_operand:MVE_VLD_ST 1 "neon_permissive_struct_operand" " Um")]
10844+
UNSPEC_MISALIGNED_ACCESS))]
10845+
"((TARGET_HAVE_MVE && VALID_MVE_SI_MODE (<MODE>mode))
10846+
|| (TARGET_HAVE_MVE_FLOAT && VALID_MVE_SF_MODE (<MODE>mode)))
10847+
&& !BYTES_BIG_ENDIAN && unaligned_access"
10848+
"vldr<V_sz_elem1>.<V_sz_elem>\t%q0, %E1"
10849+
[(set_attr "type" "mve_load")]
10850+
)

gcc/config/arm/neon.md

Lines changed: 0 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -280,31 +280,6 @@
280280
neon_disambiguate_copy (operands, dest, src, 4);
281281
})
282282

283-
(define_expand "movmisalign<mode>"
284-
[(set (match_operand:VDQX 0 "neon_perm_struct_or_reg_operand")
285-
(unspec:VDQX [(match_operand:VDQX 1 "neon_perm_struct_or_reg_operand")]
286-
UNSPEC_MISALIGNED_ACCESS))]
287-
"TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
288-
{
289-
rtx adjust_mem;
290-
/* This pattern is not permitted to fail during expansion: if both arguments
291-
are non-registers (e.g. memory := constant, which can be created by the
292-
auto-vectorizer), force operand 1 into a register. */
293-
if (!s_register_operand (operands[0], <MODE>mode)
294-
&& !s_register_operand (operands[1], <MODE>mode))
295-
operands[1] = force_reg (<MODE>mode, operands[1]);
296-
297-
if (s_register_operand (operands[0], <MODE>mode))
298-
adjust_mem = operands[1];
299-
else
300-
adjust_mem = operands[0];
301-
302-
/* Legitimize address. */
303-
if (!neon_vector_mem_operand (adjust_mem, 2, true))
304-
XEXP (adjust_mem, 0) = force_reg (Pmode, XEXP (adjust_mem, 0));
305-
306-
})
307-
308283
(define_insn "*movmisalign<mode>_neon_store"
309284
[(set (match_operand:VDX 0 "neon_permissive_struct_operand" "=Um")
310285
(unspec:VDX [(match_operand:VDX 1 "s_register_operand" " w")]

gcc/config/arm/vec-common.md

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -215,3 +215,26 @@
215215
&& ARM_HAVE_<MODE>_ARITH)) && !BYTES_BIG_ENDIAN"
216216
)
217217

218+
(define_expand "movmisalign<mode>"
219+
[(set (match_operand:VDQX 0 "neon_perm_struct_or_reg_operand")
220+
(unspec:VDQX [(match_operand:VDQX 1 "neon_perm_struct_or_reg_operand")]
221+
UNSPEC_MISALIGNED_ACCESS))]
222+
"ARM_HAVE_<MODE>_LDST && !BYTES_BIG_ENDIAN && unaligned_access"
223+
{
224+
rtx adjust_mem;
225+
/* This pattern is not permitted to fail during expansion: if both arguments
226+
are non-registers (e.g. memory := constant, which can be created by the
227+
auto-vectorizer), force operand 1 into a register. */
228+
if (!s_register_operand (operands[0], <MODE>mode)
229+
&& !s_register_operand (operands[1], <MODE>mode))
230+
operands[1] = force_reg (<MODE>mode, operands[1]);
231+
232+
if (s_register_operand (operands[0], <MODE>mode))
233+
adjust_mem = operands[1];
234+
else
235+
adjust_mem = operands[0];
236+
237+
/* Legitimize address. */
238+
if (!neon_vector_mem_operand (adjust_mem, 2, true))
239+
XEXP (adjust_mem, 0) = force_reg (Pmode, XEXP (adjust_mem, 0));
240+
})

gcc/testsuite/gcc.target/arm/simd/mve-vneg.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,3 +47,6 @@ FUNC(f, float, 16, 8, -, vneg)
4747
functions above. */
4848
/* { dg-final { scan-assembler-times {vneg.s[0-9]+ q[0-9]+, q[0-9]+} 6 } } */
4949
/* { dg-final { scan-assembler-times {vneg.f[0-9]+ q[0-9]+, q[0-9]+} 2 } } */
50+
/* { dg-final { scan-assembler-times {vldr[bhw].[0-9]+\tq[0-9]+} 8 } } */
51+
/* { dg-final { scan-assembler-times {vstr[bhw].[0-9]+\tq[0-9]+} 8 } } */
52+
/* { dg-final { scan-assembler-not {orr\tr[0-9]+, r[0-9]+, r[0-9]+} } } */

0 commit comments

Comments
 (0)