Skip to content

Improve IR for code which finds position of highest bit #43471

Closed
@davidbolvansky

Description

@davidbolvansky
Bugzilla Link 44126
Version trunk
OS Linux
CC @topperc,@hfinkel,@LebedevRI,@rotateright

Extended Description

https://github.com/facebook/zstd/blob/47034cd6c31125fdba3155abe9a618f580b4f3eb/programs/fileio.c#L1789

unsigned long long FIO_highbit64(unsigned long long v)
{
    unsigned  count = 0;
    v >>= 1;
    while (v) { v >>= 1; count++; }
    return count;
}

should be same as:

unsigned long long FIO_highbit64a(unsigned long long v)
{

    return 63 - __builtin_clzll(v);
}

But first version has worse IR and codegen:

define dso_local i64 @_Z13FIO_highbit64y(i64 %0) local_unnamed_addr #0 {
  %2 = lshr i64 %0, 1
  %3 = call i64 @llvm.ctlz.i64(i64 %2, i1 false), !range !2
  %4 = sub nuw nsw i64 64, %3
  ret i64 %4
}

=>

define dso_local i64 @_Z14FIO_highbit64ay(i64 %0) local_unnamed_addr #1 {
  %2 = tail call i64 @llvm.ctlz.i64(i64 %0, i1 true), !range !3
  %3 = xor i64 %2, 63
  ret i64 %3
}

It would be good to not forget on trunc variant:

unsigned FIO_highbit64(unsigned long long v)
{
    unsigned count = 0;
    v >>= 1;
    while (v) { v >>= 1; count++; }
    return count;
}
define dso_local i32 @_Z13FIO_highbit64y(i64 %0) local_unnamed_addr #0 {
  %2 = lshr i64 %0, 1
  %3 = call i64 @llvm.ctlz.i64(i64 %2, i1 false), !range !2
  %4 = trunc i64 %3 to i32
  %5 = sub nsw i32 64, %4
  ret i32 %5
}

=>

define dso_local i32 @_Z13FIO_highbit64y(i64 %0) local_unnamed_addr #0 {
  %2 = tail call i64 @llvm.cttz.i64(i64 %0, i1 true), !range !2
  %3 = trunc i64 %2 to i32
  %4 = xor i32 %3, 63
  ret i32 %4
}
FIO_highbit64(unsigned long long):
        shr     rdi
        je      .LBB0_1
        bsr     rcx, rdi
        xor     rcx, 63
        mov     eax, 64
        sub     eax, ecx
        ret
.LBB0_1:
        mov     ecx, 64
        mov     eax, 64
        sub     eax, ecx
        ret

vs:

FIO_highbit64(unsigned long long):
        bsf     rax, rdi
        xor     eax, 63
        ret

Metadata

Metadata

Assignees

No one assigned

    Labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions