Skip to content

Commit 011f5e9

Browse files
author
Christophe Lyon
committed
arm: Improve thumb1_gen_const_int
Enable thumb1_gen_const_int to generate RTL or asm depending on the context, so that we avoid duplicating code to handle constants in Thumb-1 with -mpure-code. Use a template so that the algorithm is effectively shared, and rely on two classes to handle the actual emission as RTL or asm. The generated sequence is improved to handle right-shiftable and small values with less instructions. We now generate: 128: movs r0, r0, #128 264: movs r3, gcc-mirror#33 lsls r3, gcc-mirror#3 510: movs r3, #255 lsls r3, #1 512: movs r3, #1 lsls r3, gcc-mirror#9 764: movs r3, #191 lsls r3, gcc-mirror#2 65536: movs r3, #1 lsls r3, gcc-mirror#16 0x123456: movs r3, gcc-mirror#18 ;0x12 lsls r3, gcc-mirror#8 adds r3, gcc-mirror#52 ;0x34 lsls r3, gcc-mirror#8 adds r3, gcc-mirror#86 ;0x56 0x1123456: movs r3, #137 ;0x89 lsls r3, gcc-mirror#8 adds r3, gcc-mirror#26 ;0x1a lsls r3, gcc-mirror#8 adds r3, gcc-mirror#43 ;0x2b lsls r3, #1 0x1000010: movs r3, gcc-mirror#16 lsls r3, gcc-mirror#16 adds r3, #1 lsls r3, gcc-mirror#4 0x1000011: movs r3, #1 lsls r3, gcc-mirror#24 adds r3, gcc-mirror#17 -8192: movs r3, #1 lsls r3, gcc-mirror#13 rsbs r3, #0 The patch adds a testcase which does not fully exercise thumb1_gen_const_int, as other existing patterns already catch small constants. These parts of thumb1_gen_const_int are used by arm_thumb1_mi_thunk. 2020-11-02 Christophe Lyon <[email protected]> gcc/ * config/arm/arm.c (thumb1_const_rtl, thumb1_const_print): New classes. (thumb1_gen_const_int): Rename to ... (thumb1_gen_const_int_1): ... New helper function. Add capability to emit either RTL or asm, improve generated code. (thumb1_gen_const_int_rtl): New function. * config/arm/arm-protos.h (thumb1_gen_const_int): Rename to thumb1_gen_const_int_rtl. * config/arm/thumb1.md: Call thumb1_gen_const_int_rtl instead of thumb1_gen_const_int. gcc/testsuite/ * gcc.target/arm/pure-code/no-literal-pool-m0.c: New.
1 parent 79680c1 commit 011f5e9

File tree

4 files changed

+369
-34
lines changed

4 files changed

+369
-34
lines changed

gcc/config/arm/arm-protos.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ extern bool arm_small_register_classes_for_mode_p (machine_mode);
7474
extern int const_ok_for_arm (HOST_WIDE_INT);
7575
extern int const_ok_for_op (HOST_WIDE_INT, enum rtx_code);
7676
extern int const_ok_for_dimode_op (HOST_WIDE_INT, enum rtx_code);
77-
extern void thumb1_gen_const_int (rtx, HOST_WIDE_INT);
77+
extern void thumb1_gen_const_int_rtl (rtx, HOST_WIDE_INT);
7878
extern int arm_split_constant (RTX_CODE, machine_mode, rtx,
7979
HOST_WIDE_INT, rtx, rtx, int);
8080
extern int legitimate_pic_operand_p (rtx);

gcc/config/arm/arm.c

Lines changed: 192 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -4528,38 +4528,6 @@ const_ok_for_dimode_op (HOST_WIDE_INT i, enum rtx_code code)
45284528
}
45294529
}
45304530

4531-
/* Emit a sequence of movs/adds/shift to produce a 32-bit constant.
4532-
Avoid generating useless code when one of the bytes is zero. */
4533-
void
4534-
thumb1_gen_const_int (rtx op0, HOST_WIDE_INT op1)
4535-
{
4536-
bool mov_done_p = false;
4537-
int i;
4538-
4539-
/* Emit upper 3 bytes if needed. */
4540-
for (i = 0; i < 3; i++)
4541-
{
4542-
int byte = (op1 >> (8 * (3 - i))) & 0xff;
4543-
4544-
if (byte)
4545-
{
4546-
emit_set_insn (op0, mov_done_p
4547-
? gen_rtx_PLUS (SImode,op0, GEN_INT (byte))
4548-
: GEN_INT (byte));
4549-
mov_done_p = true;
4550-
}
4551-
4552-
if (mov_done_p)
4553-
emit_set_insn (op0, gen_rtx_ASHIFT (SImode, op0, GEN_INT (8)));
4554-
}
4555-
4556-
/* Emit lower byte if needed. */
4557-
if (!mov_done_p)
4558-
emit_set_insn (op0, GEN_INT (op1 & 0xff));
4559-
else if (op1 & 0xff)
4560-
emit_set_insn (op0, gen_rtx_PLUS (SImode, op0, GEN_INT (op1 & 0xff)));
4561-
}
4562-
45634531
/* Emit a sequence of insns to handle a large constant.
45644532
CODE is the code of the operation required, it can be any of SET, PLUS,
45654533
IOR, AND, XOR, MINUS;
@@ -28263,6 +28231,198 @@ arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
2826328231
default_internal_label (stream, prefix, labelno);
2826428232
}
2826528233

28234+
/* Define classes to generate code as RTL or output asm to a file.
28235+
Using templates then allows to use the same code to output code
28236+
sequences in the two formats. */
28237+
class thumb1_const_rtl
28238+
{
28239+
public:
28240+
thumb1_const_rtl (rtx dst) : dst (dst) {}
28241+
28242+
void mov (HOST_WIDE_INT val)
28243+
{
28244+
emit_set_insn (dst, GEN_INT (val));
28245+
}
28246+
28247+
void add (HOST_WIDE_INT val)
28248+
{
28249+
emit_set_insn (dst, gen_rtx_PLUS (SImode, dst, GEN_INT (val)));
28250+
}
28251+
28252+
void ashift (HOST_WIDE_INT shift)
28253+
{
28254+
emit_set_insn (dst, gen_rtx_ASHIFT (SImode, dst, GEN_INT (shift)));
28255+
}
28256+
28257+
void neg ()
28258+
{
28259+
emit_set_insn (dst, gen_rtx_NEG (SImode, dst));
28260+
}
28261+
28262+
private:
28263+
rtx dst;
28264+
};
28265+
28266+
class thumb1_const_print
28267+
{
28268+
public:
28269+
thumb1_const_print (FILE *f, int regno)
28270+
{
28271+
t_file = f;
28272+
dst_regname = reg_names[regno];
28273+
}
28274+
28275+
void mov (HOST_WIDE_INT val)
28276+
{
28277+
asm_fprintf (t_file, "\tmovs\t%s, #" HOST_WIDE_INT_PRINT_DEC "\n",
28278+
dst_regname, val);
28279+
}
28280+
28281+
void add (HOST_WIDE_INT val)
28282+
{
28283+
asm_fprintf (t_file, "\tadds\t%s, #" HOST_WIDE_INT_PRINT_DEC "\n",
28284+
dst_regname, val);
28285+
}
28286+
28287+
void ashift (HOST_WIDE_INT shift)
28288+
{
28289+
asm_fprintf (t_file, "\tlsls\t%s, #" HOST_WIDE_INT_PRINT_DEC "\n",
28290+
dst_regname, shift);
28291+
}
28292+
28293+
void neg ()
28294+
{
28295+
asm_fprintf (t_file, "\trsbs\t%s, #0\n", dst_regname);
28296+
}
28297+
28298+
private:
28299+
FILE *t_file;
28300+
const char *dst_regname;
28301+
};
28302+
28303+
/* Emit a sequence of movs/adds/shift to produce a 32-bit constant.
28304+
Avoid generating useless code when one of the bytes is zero. */
28305+
template <class T>
28306+
void
28307+
thumb1_gen_const_int_1 (T dst, HOST_WIDE_INT op1)
28308+
{
28309+
bool mov_done_p = false;
28310+
unsigned HOST_WIDE_INT val = op1;
28311+
int shift = 0;
28312+
int i;
28313+
28314+
gcc_assert (op1 == trunc_int_for_mode (op1, SImode));
28315+
28316+
if (val <= 255)
28317+
{
28318+
dst.mov (val);
28319+
return;
28320+
}
28321+
28322+
/* For negative numbers with the first nine bits set, build the
28323+
opposite of OP1, then negate it, it's generally shorter and not
28324+
longer. */
28325+
if ((val & 0xFF800000) == 0xFF800000)
28326+
{
28327+
thumb1_gen_const_int_1 (dst, -op1);
28328+
dst.neg ();
28329+
return;
28330+
}
28331+
28332+
/* In the general case, we need 7 instructions to build
28333+
a 32 bits constant (1 movs, 3 lsls, 3 adds). We can
28334+
do better if VAL is small enough, or
28335+
right-shiftable by a suitable amount. If the
28336+
right-shift enables to encode at least one less byte,
28337+
it's worth it: we save a adds and a lsls at the
28338+
expense of a final lsls. */
28339+
int final_shift = number_of_first_bit_set (val);
28340+
28341+
int leading_zeroes = clz_hwi (val);
28342+
int number_of_bytes_needed
28343+
= ((HOST_BITS_PER_WIDE_INT - 1 - leading_zeroes)
28344+
/ BITS_PER_UNIT) + 1;
28345+
int number_of_bytes_needed2
28346+
= ((HOST_BITS_PER_WIDE_INT - 1 - leading_zeroes - final_shift)
28347+
/ BITS_PER_UNIT) + 1;
28348+
28349+
if (number_of_bytes_needed2 < number_of_bytes_needed)
28350+
val >>= final_shift;
28351+
else
28352+
final_shift = 0;
28353+
28354+
/* If we are in a very small range, we can use either a single movs
28355+
or movs+adds. */
28356+
if (val <= 510)
28357+
{
28358+
if (val > 255)
28359+
{
28360+
unsigned HOST_WIDE_INT high = val - 255;
28361+
28362+
dst.mov (high);
28363+
dst.add (255);
28364+
}
28365+
else
28366+
dst.mov (val);
28367+
28368+
if (final_shift > 0)
28369+
dst.ashift (final_shift);
28370+
}
28371+
else
28372+
{
28373+
/* General case, emit upper 3 bytes as needed. */
28374+
for (i = 0; i < 3; i++)
28375+
{
28376+
unsigned HOST_WIDE_INT byte = (val >> (8 * (3 - i))) & 0xff;
28377+
28378+
if (byte)
28379+
{
28380+
/* We are about to emit new bits, stop accumulating a
28381+
shift amount, and left-shift only if we have already
28382+
emitted some upper bits. */
28383+
if (mov_done_p)
28384+
{
28385+
dst.ashift (shift);
28386+
dst.add (byte);
28387+
}
28388+
else
28389+
dst.mov (byte);
28390+
28391+
/* Stop accumulating shift amount since we've just
28392+
emitted some bits. */
28393+
shift = 0;
28394+
28395+
mov_done_p = true;
28396+
}
28397+
28398+
if (mov_done_p)
28399+
shift += 8;
28400+
}
28401+
28402+
/* Emit lower byte. */
28403+
if (!mov_done_p)
28404+
dst.mov (val & 0xff);
28405+
else
28406+
{
28407+
dst.ashift (shift);
28408+
if (val & 0xff)
28409+
dst.add (val & 0xff);
28410+
}
28411+
28412+
if (final_shift > 0)
28413+
dst.ashift (final_shift);
28414+
}
28415+
}
28416+
28417+
/* Proxy for thumb1.md, since the thumb1_const_print and
28418+
thumb1_const_rtl classes are not exported. */
28419+
void
28420+
thumb1_gen_const_int_rtl (rtx dst, HOST_WIDE_INT op1)
28421+
{
28422+
thumb1_const_rtl t (dst);
28423+
thumb1_gen_const_int_1 (t, op1);
28424+
}
28425+
2826628426
/* Output code to add DELTA to the first argument, and then jump
2826728427
to FUNCTION. Used for C++ multiple inheritance. */
2826828428

gcc/config/arm/thumb1.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -820,7 +820,7 @@
820820
&& !satisfies_constraint_K (operands[1])"
821821
[(clobber (const_int 0))]
822822
"
823-
thumb1_gen_const_int (operands[0], INTVAL (operands[1]));
823+
thumb1_gen_const_int_rtl (operands[0], INTVAL (operands[1]));
824824
DONE;
825825
"
826826
)

0 commit comments

Comments
 (0)