-
Notifications
You must be signed in to change notification settings - Fork 15.1k
Closed
Labels
Description
Consider the following command-line:
clang -o t.o -c -target aarch64-gnu-linux-eabi -mstrict-align -mcpu=cortex-a55 -O3 t.c
(1) Example 1 ("GodBolt":https://godbolt.org/z/35s8h7Kdz):
typedef struct
{
void* ccc[3];
void* mmm;
void* ddd;
unsigned eee[2];
_Bool bbb[8];
_Bool ggg;
_Bool abs;
unsigned char kkk[16];
} struct1;
typedef struct struct2
{
void* block[3];
unsigned char pass_flags[2];
unsigned int index[19];
struct1 yyy[];
} struct2;
union union1
{
unsigned u32;
struct
{
unsigned ggg : 24;
unsigned ffffffx : 2;
unsigned ffffffy : 2;
unsigned ffffffz : 2;
unsigned ffffffw : 2;
} aaa;
};
void func2(unsigned value);
unsigned func1(struct2* instr, unsigned yyy);
static void func3(struct1* yyy, union union1 zzzz)
{
func2(zzzz.u32);
if (yyy->ddd)
{
union union1 zzzz = {0};
func3(yyy->ddd, zzzz);
}
}
void bug(struct2* aaa);
void bug(struct2* aaa)
{
for (unsigned i = 0; i < 256; i++)
{
unsigned a = func1(aaa, i);
union union1 yyy;
yyy.aaa.ggg = aaa->yyy[i].ggg;
if (a <= 4)
{
yyy.aaa.ffffffx = aaa->yyy[i].kkk[0];
yyy.aaa.ffffffy = aaa->yyy[i].kkk[1];
yyy.aaa.ffffffz = aaa->yyy[i].kkk[2];
yyy.aaa.ffffffw = aaa->yyy[i].kkk[3];
}
func3(&aaa->yyy[i], yyy);
}
}
Output:
.LCPI0_0:
.word 24 // 0x18
.word 26 // 0x1a
.word 28 // 0x1c
.word 30 // 0x1e
bug: // @bug
sub sp, sp, #80
stp x29, x30, [sp, #16] // 16-byte Folded Spill
str x23, [sp, #32] // 8-byte Folded Spill
stp x22, x21, [sp, #48] // 16-byte Folded Spill
stp x20, x19, [sp, #64] // 16-byte Folded Spill
add x29, sp, #16
mov x19, x0
mov x20, xzr
mov w22, #80 // =0x50
adrp x8, .LCPI0_0
ldr q0, [x8, :lo12:.LCPI0_0]
str q0, [sp] // 16-byte Folded Spill
b .LBB0_2
.LBB0_1: // in Loop: Header=BB0_2 Depth=1
add x20, x20, #1
cmp x20, #256
b.eq .LBB0_7
.LBB0_2: // =>This Loop Header: Depth=1
mov x0, x19
mov w1, w20
bl func1
cmp w0, #4
b.hi .LBB0_4
madd x8, x20, x22, x19
ldr q1, [sp] // 16-byte Folded Reload
ldur s0, [x8, #162]
ushll v0.8h, v0.8b, #0
bic v0.4h, #252
ushll v0.4s, v0.4h, #0
ushl v0.4s, v0.4s, v1.4s
ext v1.16b, v0.16b, v0.16b, #8
orr v0.8b, v0.8b, v1.8b
fmov x8, d0
lsr x9, x8, #32
orr w8, w8, w9
b .LBB0_5
.LBB0_4: // in Loop: Header=BB0_2 Depth=1
and w8, w21, #0xff000000
.LBB0_5: // in Loop: Header=BB0_2 Depth=1
madd x9, x20, x22, x19
ldrb w9, [x9, #160]
orr w21, w8, w9
mov w0, w21
bl func2
madd x8, x20, x22, x19
ldr x23, [x8, #136]
cbz x23, .LBB0_1
.LBB0_6: // Parent Loop BB0_2 Depth=1
mov w0, wzr
bl func2
ldr x23, [x23, #32]
cbnz x23, .LBB0_6
b .LBB0_1
.LBB0_7:
ldp x20, x19, [sp, #64] // 16-byte Folded Reload
ldp x22, x21, [sp, #48] // 16-byte Folded Reload
ldp x29, x30, [sp, #16] // 16-byte Folded Reload
ldr x23, [sp, #32] // 8-byte Folded Reload
add sp, sp, #80
ret
Note ldur s0, [x8, #162]
.
(2) Example 2 ("GodBolt":https://godbolt.org/z/nsovb6s1f):
void f(char p[restrict], char *q)
{
for (int i = 0; i < 4; i++)
p[i] = -q[i];
}
void g(char p[restrict], char *q)
{
for (int i = 0; i < 4; i++)
p[i] = q[i];
}
Output:
f: // @f
movi v0.2d, #0000000000000000
ldr s1, [x1]
usubw v0.8h, v0.8h, v1.8b
umov w8, v0.h[3]
umov w9, v0.h[2]
umov w10, v0.h[1]
umov w11, v0.h[0]
strb w8, [x0, #3]
strb w9, [x0, #2]
strb w10, [x0, #1]
strb w11, [x0]
ret
g: // @g
ldrb w8, [x1, #3]
ldrb w9, [x1, #2]
ldrb w10, [x1, #1]
ldrb w11, [x1]
strb w8, [x0, #3]
strb w9, [x0, #2]
strb w10, [x0, #1]
strb w11, [x0]
ret
Note ldr s1, [x1]
Tested on 18.1.8.
This results in unaligned access exception raised on targets with strict alignment enabled in hardware.