@@ -3356,10 +3356,9 @@ namespace skvm {
3356
3356
3357
3357
// Generally r(id),
3358
3358
// but with a hint, try to alias dst() to r(v) if dies_here(v).
3359
- auto dst = [&](Val hint = NA) -> Reg {
3360
- if (hint != NA) {
3361
- (void )try_alias (hint);
3362
- }
3359
+ auto dst = [&](Val hint1 = NA, Val hint2 = NA) -> Reg {
3360
+ if (hint1 != NA && try_alias (hint1)) { return r (id); }
3361
+ if (hint2 != NA && try_alias (hint2)) { return r (id); }
3363
3362
return r (id);
3364
3363
};
3365
3364
@@ -3746,13 +3745,13 @@ namespace skvm {
3746
3745
free_tmp (tmp);
3747
3746
} break ;
3748
3747
3749
- case Op::store8: a->xtns2h (dst (), r (x));
3748
+ case Op::store8: a->xtns2h (dst (x ), r (x));
3750
3749
a->xtnh2b (dst (), dst ());
3751
3750
if (scalar) { a->strb (dst (), arg[immy]); }
3752
3751
else { a->strs (dst (), arg[immy]); }
3753
3752
break ;
3754
3753
3755
- case Op::store16: a->xtns2h (dst (), r (x));
3754
+ case Op::store16: a->xtns2h (dst (x ), r (x));
3756
3755
if (scalar) { a->strh (dst (), arg[immy]); }
3757
3756
else { a->strd (dst (), arg[immy]); }
3758
3757
break ;
@@ -3840,7 +3839,7 @@ namespace skvm {
3840
3839
a->movs (GP1, r (x), i); // Extract index lane i into GP1.
3841
3840
a->add (GP1, GP0, GP1); // Add the gather base pointer.
3842
3841
a->ldrb (GP1, GP1); // Load that byte.
3843
- a->inss (dst (), GP1, i); // Insert it into dst() lane i.
3842
+ a->inss (dst (x ), GP1, i); // Insert it into dst() lane i.
3844
3843
}
3845
3844
} break ;
3846
3845
@@ -3852,7 +3851,7 @@ namespace skvm {
3852
3851
a->movs (GP1, r (x), i);
3853
3852
a->add (GP1, GP0, GP1, A::LSL, 1 ); // Scale index 2x into a byte offset.
3854
3853
a->ldrh (GP1, GP1); // 2-byte load.
3855
- a->inss (dst (), GP1, i);
3854
+ a->inss (dst (x ), GP1, i);
3856
3855
}
3857
3856
} break ;
3858
3857
@@ -3864,16 +3863,16 @@ namespace skvm {
3864
3863
a->movs (GP1, r (x), i);
3865
3864
a->add (GP1, GP0, GP1, A::LSL, 2 ); // Scale index 4x into a byte offset.
3866
3865
a->ldrs (GP1, GP1); // 4-byte load.
3867
- a->inss (dst (), GP1, i);
3866
+ a->inss (dst (x ), GP1, i);
3868
3867
}
3869
3868
} break ;
3870
3869
3871
- case Op::add_f32: a->fadd4s (dst (), r (x), r (y)); break ;
3872
- case Op::sub_f32: a->fsub4s (dst (), r (x), r (y)); break ;
3873
- case Op::mul_f32: a->fmul4s (dst (), r (x), r (y)); break ;
3874
- case Op::div_f32: a->fdiv4s (dst (), r (x), r (y)); break ;
3870
+ case Op::add_f32: a->fadd4s (dst (x,y ), r (x), r (y)); break ;
3871
+ case Op::sub_f32: a->fsub4s (dst (x,y ), r (x), r (y)); break ;
3872
+ case Op::mul_f32: a->fmul4s (dst (x,y ), r (x), r (y)); break ;
3873
+ case Op::div_f32: a->fdiv4s (dst (x,y ), r (x), r (y)); break ;
3875
3874
3876
- case Op::sqrt_f32: a->fsqrt4s (dst (), r (x)); break ;
3875
+ case Op::sqrt_f32: a->fsqrt4s (dst (x ), r (x)); break ;
3877
3876
3878
3877
case Op::fma_f32: // fmla.4s is z += x*y
3879
3878
if (try_alias (z)) { a->fmla4s ( r (z), r (x), r (y)); }
@@ -3894,21 +3893,21 @@ namespace skvm {
3894
3893
a->fneg4s (dst (), dst ());
3895
3894
break ;
3896
3895
3897
- case Op:: gt_f32: a->fcmgt4s (dst (), r (x), r (y)); break ;
3898
- case Op::gte_f32: a->fcmge4s (dst (), r (x), r (y)); break ;
3899
- case Op:: eq_f32: a->fcmeq4s (dst (), r (x), r (y)); break ;
3900
- case Op::neq_f32: a->fcmeq4s (dst (), r (x), r (y));
3901
- a->not16b (dst (), dst ()); break ;
3896
+ case Op:: gt_f32: a->fcmgt4s (dst (x,y ), r (x), r (y)); break ;
3897
+ case Op::gte_f32: a->fcmge4s (dst (x,y ), r (x), r (y)); break ;
3898
+ case Op:: eq_f32: a->fcmeq4s (dst (x,y ), r (x), r (y)); break ;
3899
+ case Op::neq_f32: a->fcmeq4s (dst (x,y ), r (x), r (y));
3900
+ a->not16b (dst (), dst ()); break ;
3902
3901
3903
3902
3904
- case Op::add_i32: a->add4s (dst (), r (x), r (y)); break ;
3905
- case Op::sub_i32: a->sub4s (dst (), r (x), r (y)); break ;
3906
- case Op::mul_i32: a->mul4s (dst (), r (x), r (y)); break ;
3903
+ case Op::add_i32: a->add4s (dst (x,y ), r (x), r (y)); break ;
3904
+ case Op::sub_i32: a->sub4s (dst (x,y ), r (x), r (y)); break ;
3905
+ case Op::mul_i32: a->mul4s (dst (x,y ), r (x), r (y)); break ;
3907
3906
3908
- case Op::bit_and : a->and16b (dst (), r (x), r (y)); break ;
3909
- case Op::bit_or : a->orr16b (dst (), r (x), r (y)); break ;
3910
- case Op::bit_xor : a->eor16b (dst (), r (x), r (y)); break ;
3911
- case Op::bit_clear: a->bic16b (dst (), r (x), r (y)); break ;
3907
+ case Op::bit_and : a->and16b (dst (x,y ), r (x), r (y)); break ;
3908
+ case Op::bit_or : a->orr16b (dst (x,y ), r (x), r (y)); break ;
3909
+ case Op::bit_xor : a->eor16b (dst (x,y ), r (x), r (y)); break ;
3910
+ case Op::bit_clear: a->bic16b (dst (x,y ), r (x), r (y)); break ;
3912
3911
3913
3912
case Op::select: // bsl16b is x = x ? y : z
3914
3913
if (try_alias (x)) { a->bsl16b ( r (x), r (y), r (z)); }
@@ -3928,18 +3927,18 @@ namespace skvm {
3928
3927
a->bsl16b (dst (), r (y), r (x));
3929
3928
break ;
3930
3929
3931
- case Op::shl_i32: a-> shl4s (dst (), r (x), immy); break ;
3932
- case Op::shr_i32: a->ushr4s (dst (), r (x), immy); break ;
3933
- case Op::sra_i32: a->sshr4s (dst (), r (x), immy); break ;
3930
+ case Op::shl_i32: a-> shl4s (dst (x ), r (x), immy); break ;
3931
+ case Op::shr_i32: a->ushr4s (dst (x ), r (x), immy); break ;
3932
+ case Op::sra_i32: a->sshr4s (dst (x ), r (x), immy); break ;
3934
3933
3935
- case Op::eq_i32: a->cmeq4s (dst (), r (x), r (y)); break ;
3936
- case Op::gt_i32: a->cmgt4s (dst (), r (x), r (y)); break ;
3934
+ case Op::eq_i32: a->cmeq4s (dst (x,y ), r (x), r (y)); break ;
3935
+ case Op::gt_i32: a->cmgt4s (dst (x,y ), r (x), r (y)); break ;
3937
3936
3938
- case Op::to_f32: a->scvtf4s (dst (), r (x)); break ;
3939
- case Op::trunc: a->fcvtzs4s (dst (), r (x)); break ;
3940
- case Op::round: a->fcvtns4s (dst (), r (x)); break ;
3941
- case Op::ceil: a->frintp4s (dst (), r (x)); break ;
3942
- case Op::floor: a->frintm4s (dst (), r (x)); break ;
3937
+ case Op::to_f32: a->scvtf4s (dst (x ), r (x)); break ;
3938
+ case Op::trunc: a->fcvtzs4s (dst (x ), r (x)); break ;
3939
+ case Op::round: a->fcvtns4s (dst (x ), r (x)); break ;
3940
+ case Op::ceil: a->frintp4s (dst (x ), r (x)); break ;
3941
+ case Op::floor: a->frintm4s (dst (x ), r (x)); break ;
3943
3942
3944
3943
case Op::to_fp16:
3945
3944
a->fcvtn (dst (x), r (x)); // 4x f32 -> 4x f16 in bottom four lanes
0 commit comments