__bf16 treated like _Float16 under #pragma STDC FENV_ACCESS ON (x86_64 clang 18.1.0)

Under STDC FENV_ACCESS ON (or -ffp-exception-behavior=strict) __bf16 <-> float conversions are compiled using the half-precision intrinsics.

My test code

```
#pragma STDC FENV_ACCESS ON

float widenB(__bf16 x) { return x; }
 __bf16 narrowB(float x) { return x; }

float widenF(_Float16 x) { return x; }
_Float16 narrowF(float x) { return x; }

#pragma STDC FENV_ACCESS OFF

float widenB2(__bf16 x) { return x; }
__bf16 narrowB2(float x) { return x; }
float widenF2(_Float16 x) { return x; }
_Float16 narrowF2(float x) { return x; }
```

Compiled on godbolt `x86-64 clang 18.1.0` with `-march=sapphirerapids -std=gnu++2b -O3 `

```
_Z6widenBDF16b:
        vmovw   eax, xmm0
        movzx   eax, ax
        vmovd   xmm0, eax
        vcvtph2ps       xmm0, xmm0
        ret

narrowB(float):
        vxorps  xmm1, xmm1, xmm1
        vblendps        xmm0, xmm1, xmm0, 1
        vcvtps2ph       xmm0, xmm0, 4
        vmovw   eax, xmm0
        vmovw   xmm0, eax
        ret

widenF(_Float16):
        vcvtsh2ss       xmm0, xmm0, xmm0
        ret

narrowF(float):
        vcvtss2sh       xmm0, xmm0, xmm0
        ret

_Z7widenB2DF16b:
        vmovw   eax, xmm0
        shl     eax, 16
        vmovd   xmm0, eax
        ret

narrowB2(float):
        vcvtneps2bf16   xmm0, xmm0
        vmovw   eax, xmm0
        vmovw   xmm0, eax
        ret

widenF2(_Float16):
        vcvtsh2ss       xmm0, xmm0, xmm0
        ret

narrowF2(float):
        vcvtss2sh       xmm0, xmm0, xmm0
        ret
```

Note that BF16 intrinsics are used only for the versions without FENV_ACCESS.

With broadwell as a target the same issue (with FENV_ACCESS, __bf16 conversions are compiled as if they are _Float16)

```
_Z6widenBDF16b:
        vpextrw eax, xmm0, 0
        movzx   eax, ax
        vmovd   xmm0, eax
        vcvtph2ps       xmm0, xmm0
        ret

narrowB(float):
        vxorps  xmm1, xmm1, xmm1
        vblendps        xmm0, xmm1, xmm0, 1
        vcvtps2ph       xmm0, xmm0, 4
        vmovd   eax, xmm0
        vpinsrw xmm0, xmm0, eax, 0
        ret

widenF(_Float16):
        vpextrw eax, xmm0, 0
        movzx   eax, ax
        vmovd   xmm0, eax
        vcvtph2ps       xmm0, xmm0
        ret

narrowF(float):
        vxorps  xmm1, xmm1, xmm1
        vblendps        xmm0, xmm1, xmm0, 1
        vcvtps2ph       xmm0, xmm0, 4
        vmovd   eax, xmm0
        vpinsrw xmm0, xmm0, eax, 0
        ret

_Z7widenB2DF16b:
        vpextrw eax, xmm0, 0
        shl     eax, 16
        vmovd   xmm0, eax
        ret

narrowB2(float):
        push    rax
        call    __truncsfbf2@PLT
        pop     rax
        ret

widenF2(_Float16):
        vpextrw eax, xmm0, 0
        movzx   eax, ax
        vmovd   xmm0, eax
        vcvtph2ps       xmm0, xmm0
        ret

narrowF2(float):
        vcvtps2ph       xmm0, xmm0, 4
        vmovd   eax, xmm0
        vpinsrw xmm0, xmm0, eax, 0
        ret
```

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

__bf16 treated like _Float16 under #pragma STDC FENV_ACCESS ON (x86_64 clang 18.1.0) #104465

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

__bf16 treated like _Float16 under #pragma STDC FENV_ACCESS ON (x86_64 clang 18.1.0) #104465

Description

Metadata

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Issue actions