-
Notifications
You must be signed in to change notification settings - Fork 14.9k
Open
Labels
Description
Bugzilla Link | 35982 |
Version | trunk |
OS | Linux |
Blocks | #41664 |
CC | @topperc,@efriedma-quic,@gnzlbg,@jyknight,@RKSimon,@zygoloid,@tstellar |
Extended Description
POSTRAScheduler rearrange emms and mmx instruction, so we receive wrong result:
================= main.c ==============
#include <stdio.h>
#include <x86intrin.h>
float sum(__m64);
int main()
{
float result;
__m64 x = (_mm_set_pi32(5, 3));
result = sum(x);
printf("5 + 3 = %f\n", result);
_mm_empty();
return 0;
}
=======================================
================= nice.c ==============
#include <x86intrin.h>
float sum(__m64 x)
{
double t;
int x1, x2;
x1 = _mm_cvtsi64_si32(x);
x2 = _mm_cvtsi64_si32(_mm_unpackhi_pi32(x, x));
_mm_empty();
t = (float)x1 + (float)x2;
return t;
}
=======================================
>>> clang -v
clang version 7.0.0 (trunk 322555)
Target: x86_64-unknown-linux-gnu
Thread model: posix
...
>>> clang -m32 -O0 -o nice.exe main.c nice.c
>>> ./nice.exe
5 + 3 = 8.000000
>>> clang -m32 -O2 -o nice.exe main.c nice.c
>>> ./nice.exe
5 + 3 = -nan
>>> clang -m32 -O2 -o nice.exe main.c nice.c -mllvm -opt-bisect-limit=194 && ./nice.exe
...
BISECT: running pass (193) Tail Duplication on function (sum)
BISECT: running pass (194) Machine Copy Propagation Pass on function (sum)
BISECT: NOT running pass (195) Post RA top-down list latency scheduler on function (sum)
BISECT: NOT running pass (196) Branch Probability Basic Block Placement on function (sum)
...
5 + 3 = 8
>>> clang -m32 -O2 -o nice.exe main.c nice.c -mllvm -opt-bisect-limit=195 && ./nice.exe
...
BISECT: running pass (193) Tail Duplication on function (sum)
BISECT: running pass (194) Machine Copy Propagation Pass on function (sum)
BISECT: running pass (195) Post RA top-down list latency scheduler on function (sum)
BISECT: NOT running pass (196) Branch Probability Basic Block Placement on function (sum)
BISECT: NOT running pass (197) X86 Execution Dependency Fix on function (sum)
...
5 + 3 = -nan
Let's look at ASM before and after POSTRAScheduler:
=============== nice-194.s ============
cvtsi2ssl %eax, %xmm0
movq 8(%esp), %mm0
emms
punpckhdq %mm0, %mm0 # mm0 = mm0[1,1]
movd %mm0, %ecx
cvtsi2ssl %ecx, %xmm1
=======================================
POSTRAScheduler changed order of operations to:
=============== nice-195.s ============
movq 8(%esp), %mm0
punpckhdq %mm0, %mm0 # mm0 = mm0[1,1]
movd %mm0, %ecx
emms
cvtsi2ssl %eax, %xmm0
cvtsi2ssl %ecx, %xmm1
=======================================
So now emms is placed before mmx operation, and as a result, we receive wrong answer.