[AArch64] LLVM generates unaligned access with -mstrict-align on AArch64

Consider the following command-line:

```
clang -o t.o -c -target aarch64-gnu-linux-eabi -mstrict-align -mcpu=cortex-a55 -O3 t.c 
```

(1) Example 1 ("GodBolt":https://godbolt.org/z/35s8h7Kdz):

<cut>

<details>

```c
typedef struct
{
  void* ccc[3];
  void* mmm;
  void* ddd;
  unsigned eee[2];
  _Bool bbb[8];
  
  _Bool ggg;
  _Bool abs;
  unsigned char kkk[16];
} struct1;

typedef struct struct2
{
  void* block[3];
  unsigned char pass_flags[2];
  unsigned int index[19];

  struct1 yyy[];
} struct2;


union union1
{
  unsigned u32;
  struct
  {
    unsigned ggg : 24;
    unsigned ffffffx : 2;
    unsigned ffffffy : 2;
    unsigned ffffffz : 2;
    unsigned ffffffw : 2;
  } aaa;
};

void func2(unsigned value);

unsigned func1(struct2* instr, unsigned yyy);

static void func3(struct1* yyy, union union1 zzzz)
{
  func2(zzzz.u32);
  if (yyy->ddd)
  {
    union union1 zzzz = {0};
    func3(yyy->ddd, zzzz);
  }
}

void bug(struct2* aaa);

void bug(struct2* aaa)
{
  for (unsigned i = 0; i < 256; i++)
  {
    unsigned a = func1(aaa, i);
    union union1 yyy;

    yyy.aaa.ggg = aaa->yyy[i].ggg;

    if (a <= 4)
    {
      yyy.aaa.ffffffx = aaa->yyy[i].kkk[0];
      yyy.aaa.ffffffy = aaa->yyy[i].kkk[1];
      yyy.aaa.ffffffz = aaa->yyy[i].kkk[2];
      yyy.aaa.ffffffw = aaa->yyy[i].kkk[3];
    }

    func3(&aaa->yyy[i], yyy);
  }
}
```

</details>

</cut>

Output:

<cut>

<details>

```
.LCPI0_0:
        .word   24                              // 0x18
        .word   26                              // 0x1a
        .word   28                              // 0x1c
        .word   30                              // 0x1e
bug:                                    // @bug
        sub     sp, sp, #80
        stp     x29, x30, [sp, #16]             // 16-byte Folded Spill
        str     x23, [sp, #32]                  // 8-byte Folded Spill
        stp     x22, x21, [sp, #48]             // 16-byte Folded Spill
        stp     x20, x19, [sp, #64]             // 16-byte Folded Spill
        add     x29, sp, #16
        mov     x19, x0
        mov     x20, xzr
        mov     w22, #80                        // =0x50
        adrp    x8, .LCPI0_0
        ldr     q0, [x8, :lo12:.LCPI0_0]
        str     q0, [sp]                        // 16-byte Folded Spill
        b       .LBB0_2
.LBB0_1:                                //   in Loop: Header=BB0_2 Depth=1
        add     x20, x20, #1
        cmp     x20, #256
        b.eq    .LBB0_7
.LBB0_2:                                // =>This Loop Header: Depth=1
        mov     x0, x19
        mov     w1, w20
        bl      func1
        cmp     w0, #4
        b.hi    .LBB0_4
        madd    x8, x20, x22, x19
        ldr     q1, [sp]                        // 16-byte Folded Reload
        ldur    s0, [x8, #162]
        ushll   v0.8h, v0.8b, #0
        bic     v0.4h, #252
        ushll   v0.4s, v0.4h, #0
        ushl    v0.4s, v0.4s, v1.4s
        ext     v1.16b, v0.16b, v0.16b, #8
        orr     v0.8b, v0.8b, v1.8b
        fmov    x8, d0
        lsr     x9, x8, #32
        orr     w8, w8, w9
        b       .LBB0_5
.LBB0_4:                                //   in Loop: Header=BB0_2 Depth=1
        and     w8, w21, #0xff000000
.LBB0_5:                                //   in Loop: Header=BB0_2 Depth=1
        madd    x9, x20, x22, x19
        ldrb    w9, [x9, #160]
        orr     w21, w8, w9
        mov     w0, w21
        bl      func2
        madd    x8, x20, x22, x19
        ldr     x23, [x8, #136]
        cbz     x23, .LBB0_1
.LBB0_6:                                //   Parent Loop BB0_2 Depth=1
        mov     w0, wzr
        bl      func2
        ldr     x23, [x23, #32]
        cbnz    x23, .LBB0_6
        b       .LBB0_1
.LBB0_7:
        ldp     x20, x19, [sp, #64]             // 16-byte Folded Reload
        ldp     x22, x21, [sp, #48]             // 16-byte Folded Reload
        ldp     x29, x30, [sp, #16]             // 16-byte Folded Reload
        ldr     x23, [sp, #32]                  // 8-byte Folded Reload
        add     sp, sp, #80
        ret
```

</details>

</cut>

Note `ldur s0, [x8, #162]`.

(2) Example 2 ("GodBolt":https://godbolt.org/z/nsovb6s1f):

<cut>

<details>

```c
void f(char p[restrict], char *q)
{
    for (int i = 0; i < 4; i++)
        p[i] = -q[i];
}

void g(char p[restrict], char *q)
{
    for (int i = 0; i < 4; i++)
        p[i] = q[i];
}
```

</details>

</cut>

Output:

<cut>

<details>

```c
f:                                      // @f
        movi    v0.2d, #0000000000000000
        ldr     s1, [x1]
        usubw   v0.8h, v0.8h, v1.8b
        umov    w8, v0.h[3]
        umov    w9, v0.h[2]
        umov    w10, v0.h[1]
        umov    w11, v0.h[0]
        strb    w8, [x0, #3]
        strb    w9, [x0, #2]
        strb    w10, [x0, #1]
        strb    w11, [x0]
        ret
g:                                      // @g
        ldrb    w8, [x1, #3]
        ldrb    w9, [x1, #2]
        ldrb    w10, [x1, #1]
        ldrb    w11, [x1]
        strb    w8, [x0, #3]
        strb    w9, [x0, #2]
        strb    w10, [x0, #1]
        strb    w11, [x0]
        ret
```

</details>
</cut>

Note `ldr s1, [x1]`

Tested on 18.1.8.

This results in unaligned access exception raised on targets with strict alignment enabled in hardware.

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

[AArch64] LLVM generates unaligned access with -mstrict-align on AArch64 #95811

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

[AArch64] LLVM generates unaligned access with -mstrict-align on AArch64 #95811

Description

Metadata

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Issue actions