Skip to content

__bf16 treated like _Float16 under #pragma STDC FENV_ACCESS ON (x86_64 clang 18.1.0) #104465

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
ngbronson-openai opened this issue Aug 15, 2024 · 4 comments
Labels
clang Clang issues not falling into any other category floating-point Floating-point math

Comments

@ngbronson-openai
Copy link

Under STDC FENV_ACCESS ON (or -ffp-exception-behavior=strict) __bf16 <-> float conversions are compiled using the half-precision intrinsics.

My test code

#pragma STDC FENV_ACCESS ON

float widenB(__bf16 x) { return x; }
 __bf16 narrowB(float x) { return x; }

float widenF(_Float16 x) { return x; }
_Float16 narrowF(float x) { return x; }

#pragma STDC FENV_ACCESS OFF

float widenB2(__bf16 x) { return x; }
__bf16 narrowB2(float x) { return x; }
float widenF2(_Float16 x) { return x; }
_Float16 narrowF2(float x) { return x; }

Compiled on godbolt x86-64 clang 18.1.0 with -march=sapphirerapids -std=gnu++2b -O3

_Z6widenBDF16b:
        vmovw   eax, xmm0
        movzx   eax, ax
        vmovd   xmm0, eax
        vcvtph2ps       xmm0, xmm0
        ret

narrowB(float):
        vxorps  xmm1, xmm1, xmm1
        vblendps        xmm0, xmm1, xmm0, 1
        vcvtps2ph       xmm0, xmm0, 4
        vmovw   eax, xmm0
        vmovw   xmm0, eax
        ret

widenF(_Float16):
        vcvtsh2ss       xmm0, xmm0, xmm0
        ret

narrowF(float):
        vcvtss2sh       xmm0, xmm0, xmm0
        ret

_Z7widenB2DF16b:
        vmovw   eax, xmm0
        shl     eax, 16
        vmovd   xmm0, eax
        ret

narrowB2(float):
        vcvtneps2bf16   xmm0, xmm0
        vmovw   eax, xmm0
        vmovw   xmm0, eax
        ret

widenF2(_Float16):
        vcvtsh2ss       xmm0, xmm0, xmm0
        ret

narrowF2(float):
        vcvtss2sh       xmm0, xmm0, xmm0
        ret

Note that BF16 intrinsics are used only for the versions without FENV_ACCESS.

With broadwell as a target the same issue (with FENV_ACCESS, __bf16 conversions are compiled as if they are _Float16)

_Z6widenBDF16b:
        vpextrw eax, xmm0, 0
        movzx   eax, ax
        vmovd   xmm0, eax
        vcvtph2ps       xmm0, xmm0
        ret

narrowB(float):
        vxorps  xmm1, xmm1, xmm1
        vblendps        xmm0, xmm1, xmm0, 1
        vcvtps2ph       xmm0, xmm0, 4
        vmovd   eax, xmm0
        vpinsrw xmm0, xmm0, eax, 0
        ret

widenF(_Float16):
        vpextrw eax, xmm0, 0
        movzx   eax, ax
        vmovd   xmm0, eax
        vcvtph2ps       xmm0, xmm0
        ret

narrowF(float):
        vxorps  xmm1, xmm1, xmm1
        vblendps        xmm0, xmm1, xmm0, 1
        vcvtps2ph       xmm0, xmm0, 4
        vmovd   eax, xmm0
        vpinsrw xmm0, xmm0, eax, 0
        ret

_Z7widenB2DF16b:
        vpextrw eax, xmm0, 0
        shl     eax, 16
        vmovd   xmm0, eax
        ret

narrowB2(float):
        push    rax
        call    __truncsfbf2@PLT
        pop     rax
        ret

widenF2(_Float16):
        vpextrw eax, xmm0, 0
        movzx   eax, ax
        vmovd   xmm0, eax
        vcvtph2ps       xmm0, xmm0
        ret

narrowF2(float):
        vcvtps2ph       xmm0, xmm0, 4
        vmovd   eax, xmm0
        vpinsrw xmm0, xmm0, eax, 0
        ret
@github-actions github-actions bot added the clang Clang issues not falling into any other category label Aug 15, 2024
@AaronBallman AaronBallman added the floating-point Floating-point math label Aug 19, 2024
@AaronBallman
Copy link
Collaborator

CC @andykaylor

@andykaylor
Copy link
Contributor

It looks like this is fixed in trunk, probably by this change: #80056

I don't know if there are any more 18.1 updates planned. We'll probably just need to wait for 19.1, which is expected very soon.

CC @shiltian @phoebewang

@AaronBallman
Copy link
Collaborator

Thanks @andykaylor!

@ngbronson-openai can you verify that trunk or one of the 19,x release candidates resolves the issue for you?

@phoebewang
Copy link
Contributor

It's correct on trunk https://godbolt.org/z/nqjf373qd

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
clang Clang issues not falling into any other category floating-point Floating-point math
Projects
None yet
Development

No branches or pull requests

4 participants