Skip to content

Multiply by a power of 2 and ctz+shift should often be interchangeable #84763

Closed
@Validark

Description

@Validark

The compiler should probably be able to optimize either of these functions to the other one, depending on cost for the particular hardware (Godbolt link):

export fn foo(x: u64, y: u64) u64 {
    return x *% (y & (~y +% 1));
}

export fn bar(x: u64, y: u64) u64 {
    if (y == 0) return 0;
    return x << @intCast(@ctz(y));
}

x86 znver4 emit:

foo:
        blsi    rax, rsi
        imul    rax, rdi
        ret

bar:
        tzcnt   rax, rsi
        shlx    rax, rdi, rax
        cmovb   rax, rsi
        ret

RISC-V sifive_u74 emit:

foo:
        neg     a2, a1
        and     a1, a1, a2
        mul     a0, a1, a0
        ret

.LCPI1_0:
        .quad   151050438420815295
.LCPI1_1:
        .ascii  "\000\001\002\007\003\r\b\023\004\031\016\034\t\"\024(\005\021\032&\017.\0350\n\037#6\0252)9?\006\f\022\030\033!'\020%-/\036518>\013\027 $,47=\026+3<*;:"
bar:
        beqz    a1, .LBB1_2
        lui     a2, %hi(.LCPI1_0)
        neg     a3, a1
        and     a1, a1, a3
        ld      a2, %lo(.LCPI1_0)(a2)
        mul     a1, a1, a2
        lui     a2, %hi(.LCPI1_1)
        srli    a1, a1, 58
        addi    a2, a2, %lo(.LCPI1_1)
        add     a1, a1, a2
        lbu     a1, 0(a1)
        sll     a0, a0, a1
        ret
.LBB1_2:
        li      a0, 0
        ret

Metadata

Metadata

Assignees

Type

No type

Projects

No projects

Milestone

No milestone

Relationships

None yet

Development

No branches or pull requests

Issue actions