Skip to content

!struct.x && !struct.y produces worse code than (struct.x == 0) && (struct.y == 0) #128778

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
Kmeakin opened this issue Feb 25, 2025 · 3 comments · Fixed by #128861
Closed

!struct.x && !struct.y produces worse code than (struct.x == 0) && (struct.y == 0) #128778

Kmeakin opened this issue Feb 25, 2025 · 3 comments · Fixed by #128861

Comments

@Kmeakin
Copy link
Contributor

Kmeakin commented Feb 25, 2025

!it.x && !it.y produces worse code than (it.x == 0) && (it.y == 0) if it is a struct type with bool members x and y.

Oddly using a uint8_t for x and y does not suffer from this problem.

This affects AArch64, x86-64 and RiscV targets

Real-world motivation

This struct from mimalloc

// The `in_full` and `has_aligned` page flags are put in a union to efficiently
// test if both are false (`full_aligned == 0`) in the `mi_free` routine.
typedef union mi_page_flags_s {
  uint8_t full_aligned;
  struct {
    uint8_t in_full : 1;
    uint8_t has_aligned : 1;
  } x;
} mi_page_flags_t;

C++ code

https://godbolt.org/z/1387M789s

#include <cstdint>

struct S1 {
    bool x;
    bool y;
};

struct S2 {
    bool x : 1;
    bool y : 1;
};

struct S3 {
    uint8_t x : 1;
    uint8_t y : 1;
};

struct S4 {
    uint8_t x ;
    uint8_t y ;
};

extern "C" {
auto src1(S1 it) -> bool { return !it.x && !it.y; }
auto tgt1(S1 it) -> bool { return (it.x == 0) && (it.y == 0); }

auto src2(S2 it) -> bool { return !it.x && !it.y; }
auto tgt2(S2 it) -> bool { return (it.x == 0) && (it.y == 0); }

auto src3(S3 it) -> bool { return !it.x && !it.y; }
auto tgt3(S3 it) -> bool { return (it.x == 0) && (it.y == 0); }

auto src4(S4 it) -> bool { return !it.x && !it.y; }
auto tgt4(S4 it) -> bool { return (it.x == 0) && (it.y == 0); }
}

AArch64 assembly

src1:
        tst     x0, #0x100
        eor     w9, w0, #0x1
        cset    w8, eq
        and     w0, w8, w9
        ret

tgt1:
        mov     w8, #257
        tst     x0, x8
        cset    w0, eq
        ret

src2:
        tst     x0, #0x2
        eor     w9, w0, #0x1
        cset    w8, eq
        and     w0, w8, w9
        ret

tgt2:
        tst     x0, #0x3
        cset    w0, eq
        ret

src3:
        tst     x0, #0x3
        cset    w0, eq
        ret

tgt3:
        tst     x0, #0x3
        cset    w0, eq
        ret

src4:
        tst     x0, #0xffff
        cset    w0, eq
        ret

tgt4:
        tst     x0, #0xffff
        cset    w0, eq
        ret

Alive proof

https://alive2.llvm.org/ce/z/JRwXu7

----------------------------------------
define i1 @src1(i64 %#0) nofree willreturn memory(none) {
#1:
  %#2 = trunc i64 %#0 to i1
  %#3 = and i64 %#0, 256
  %#4 = icmp eq i64 %#3, 0
  %#5 = xor i1 %#2, 1
  %#6 = and i1 %#4, %#5
  ret i1 %#6
}
=>
define i1 @tgt1(i64 %#0) nofree willreturn memory(none) {
#1:
  %#2 = and i64 %#0, 257
  %#3 = icmp eq i64 %#2, 0
  ret i1 %#3
}
Transformation seems to be correct!


----------------------------------------
define i1 @src2(i64 %#0) nofree willreturn memory(none) {
#1:
  %#2 = trunc i64 %#0 to i1
  %#3 = and i64 %#0, 2
  %#4 = icmp eq i64 %#3, 0
  %#5 = xor i1 %#2, 1
  %#6 = and i1 %#4, %#5
  ret i1 %#6
}
=>
define i1 @tgt2(i64 %#0) nofree willreturn memory(none) {
#1:
  %#2 = and i64 %#0, 3
  %#3 = icmp eq i64 %#2, 0
  ret i1 %#3
}
Transformation seems to be correct!


----------------------------------------
define i1 @src3(i64 %#0) nofree willreturn memory(none) {
#1:
  %#2 = and i64 %#0, 3
  %#3 = icmp eq i64 %#2, 0
  ret i1 %#3
}
=>
define i1 @tgt3(i64 %#0) nofree willreturn memory(none) {
#1:
  %#2 = and i64 %#0, 3
  %#3 = icmp eq i64 %#2, 0
  ret i1 %#3
}
Transformation seems to be correct!


----------------------------------------
define i1 @src4(i64 %#0) nofree willreturn memory(none) {
#1:
  %#2 = and i64 %#0, 65535
  %#3 = icmp eq i64 %#2, 0
  ret i1 %#3
}
=>
define i1 @tgt4(i64 %#0) nofree willreturn memory(none) {
#1:
  %#2 = and i64 %#0, 65535
  %#3 = icmp eq i64 %#2, 0
  ret i1 %#3
}
Transformation seems to be correct!

Summary:
  4 correct transformations
  0 incorrect transformations
  0 failed-to-prove transformations
  0 Alive2 errors
@llvmbot
Copy link
Member

llvmbot commented Feb 25, 2025

@llvm/issue-subscribers-backend-aarch64

Author: Karl Meakin (Kmeakin)

`!it.x && !it.y` produces worse code than `(it.x == 0) && (it.y == 0)` if `it` is a struct type with `bool` members `x` and `y`.

Oddly using a uint8_t for x and y does not suffer from this problem.

Real-world motivation

This struct from mimalloc

// The `in_full` and `has_aligned` page flags are put in a union to efficiently
// test if both are false (`full_aligned == 0`) in the `mi_free` routine.
typedef union mi_page_flags_s {
  uint8_t full_aligned;
  struct {
    uint8_t in_full : 1;
    uint8_t has_aligned : 1;
  } x;
} mi_page_flags_t;

C++ code

https://godbolt.org/z/1387M789s

#include &lt;cstdint&gt;

struct S1 {
    bool x;
    bool y;
};

struct S2 {
    bool x : 1;
    bool y : 1;
};

struct S3 {
    uint8_t x : 1;
    uint8_t y : 1;
};

struct S4 {
    uint8_t x ;
    uint8_t y ;
};

extern "C" {
auto src1(S1 it) -&gt; bool { return !it.x &amp;&amp; !it.y; }
auto tgt1(S1 it) -&gt; bool { return (it.x == 0) &amp;&amp; (it.y == 0); }

auto src2(S2 it) -&gt; bool { return !it.x &amp;&amp; !it.y; }
auto tgt2(S2 it) -&gt; bool { return (it.x == 0) &amp;&amp; (it.y == 0); }

auto src3(S3 it) -&gt; bool { return !it.x &amp;&amp; !it.y; }
auto tgt3(S3 it) -&gt; bool { return (it.x == 0) &amp;&amp; (it.y == 0); }

auto src4(S4 it) -&gt; bool { return !it.x &amp;&amp; !it.y; }
auto tgt4(S4 it) -&gt; bool { return (it.x == 0) &amp;&amp; (it.y == 0); }
}

AArch64 assembly

src1:
        tst     x0, #<!-- -->0x100
        eor     w9, w0, #<!-- -->0x1
        cset    w8, eq
        and     w0, w8, w9
        ret

tgt1:
        mov     w8, #<!-- -->257
        tst     x0, x8
        cset    w0, eq
        ret

src2:
        tst     x0, #<!-- -->0x2
        eor     w9, w0, #<!-- -->0x1
        cset    w8, eq
        and     w0, w8, w9
        ret

tgt2:
        tst     x0, #<!-- -->0x3
        cset    w0, eq
        ret

src3:
        tst     x0, #<!-- -->0x3
        cset    w0, eq
        ret

tgt3:
        tst     x0, #<!-- -->0x3
        cset    w0, eq
        ret

src4:
        tst     x0, #<!-- -->0xffff
        cset    w0, eq
        ret

tgt4:
        tst     x0, #<!-- -->0xffff
        cset    w0, eq
        ret

Alive proof

https://alive2.llvm.org/ce/z/JRwXu7

----------------------------------------
define i1 @<!-- -->src1(i64 %#<!-- -->0) nofree willreturn memory(none) {
#<!-- -->1:
  %#<!-- -->2 = trunc i64 %#<!-- -->0 to i1
  %#<!-- -->3 = and i64 %#<!-- -->0, 256
  %#<!-- -->4 = icmp eq i64 %#<!-- -->3, 0
  %#<!-- -->5 = xor i1 %#<!-- -->2, 1
  %#<!-- -->6 = and i1 %#<!-- -->4, %#<!-- -->5
  ret i1 %#<!-- -->6
}
=&gt;
define i1 @<!-- -->tgt1(i64 %#<!-- -->0) nofree willreturn memory(none) {
#<!-- -->1:
  %#<!-- -->2 = and i64 %#<!-- -->0, 257
  %#<!-- -->3 = icmp eq i64 %#<!-- -->2, 0
  ret i1 %#<!-- -->3
}
Transformation seems to be correct!


----------------------------------------
define i1 @<!-- -->src2(i64 %#<!-- -->0) nofree willreturn memory(none) {
#<!-- -->1:
  %#<!-- -->2 = trunc i64 %#<!-- -->0 to i1
  %#<!-- -->3 = and i64 %#<!-- -->0, 2
  %#<!-- -->4 = icmp eq i64 %#<!-- -->3, 0
  %#<!-- -->5 = xor i1 %#<!-- -->2, 1
  %#<!-- -->6 = and i1 %#<!-- -->4, %#<!-- -->5
  ret i1 %#<!-- -->6
}
=&gt;
define i1 @<!-- -->tgt2(i64 %#<!-- -->0) nofree willreturn memory(none) {
#<!-- -->1:
  %#<!-- -->2 = and i64 %#<!-- -->0, 3
  %#<!-- -->3 = icmp eq i64 %#<!-- -->2, 0
  ret i1 %#<!-- -->3
}
Transformation seems to be correct!


----------------------------------------
define i1 @<!-- -->src3(i64 %#<!-- -->0) nofree willreturn memory(none) {
#<!-- -->1:
  %#<!-- -->2 = and i64 %#<!-- -->0, 3
  %#<!-- -->3 = icmp eq i64 %#<!-- -->2, 0
  ret i1 %#<!-- -->3
}
=&gt;
define i1 @<!-- -->tgt3(i64 %#<!-- -->0) nofree willreturn memory(none) {
#<!-- -->1:
  %#<!-- -->2 = and i64 %#<!-- -->0, 3
  %#<!-- -->3 = icmp eq i64 %#<!-- -->2, 0
  ret i1 %#<!-- -->3
}
Transformation seems to be correct!


----------------------------------------
define i1 @<!-- -->src4(i64 %#<!-- -->0) nofree willreturn memory(none) {
#<!-- -->1:
  %#<!-- -->2 = and i64 %#<!-- -->0, 65535
  %#<!-- -->3 = icmp eq i64 %#<!-- -->2, 0
  ret i1 %#<!-- -->3
}
=&gt;
define i1 @<!-- -->tgt4(i64 %#<!-- -->0) nofree willreturn memory(none) {
#<!-- -->1:
  %#<!-- -->2 = and i64 %#<!-- -->0, 65535
  %#<!-- -->3 = icmp eq i64 %#<!-- -->2, 0
  ret i1 %#<!-- -->3
}
Transformation seems to be correct!

Summary:
  4 correct transformations
  0 incorrect transformations
  0 failed-to-prove transformations
  0 Alive2 errors

@Kmeakin Kmeakin changed the title [AArch64] !struct.x && !struct.y produces worse code than (struct.x == 0) && (struct.y == 0) !struct.x && !struct.y produces worse code than (struct.x == 0) && (struct.y == 0)` Feb 25, 2025
@Kmeakin Kmeakin changed the title !struct.x && !struct.y produces worse code than (struct.x == 0) && (struct.y == 0)` !struct.x && !struct.y produces worse code than (struct.x == 0) && (struct.y == 0) Feb 25, 2025
@dtcxzyw
Copy link
Member

dtcxzyw commented Feb 26, 2025

cc @andjo403

@andjo403
Copy link
Contributor

this is the continuation of #122179 I have the code prepared but still needs test and as I understand it there is some regressions also that is the reason that I have not finished the tests

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging a pull request may close this issue.

4 participants