@@ -2208,7 +2208,9 @@ namespace skvm {
2208
2208
void Assembler::fdiv4s (V d, V n, V m) { this ->op (0b0'1'1'01110'0'0'1 , m, 0b11111'1 , n, d); }
2209
2209
void Assembler::fmin4s (V d, V n, V m) { this ->op (0b0'1'0'01110'1'0'1 , m, 0b11110'1 , n, d); }
2210
2210
void Assembler::fmax4s (V d, V n, V m) { this ->op (0b0'1'0'01110'0'0'1 , m, 0b11110'1 , n, d); }
2211
- void Assembler::fneg4s (V d, V n) { this ->op (0b0'1'1'01110'1'0'10000'01111'10 , n, d); }
2211
+
2212
+ void Assembler::fneg4s (V d, V n) { this ->op (0b0'1'1'01110'1'0'10000'01111'10 , n,d); }
2213
+ void Assembler::fsqrt4s (V d, V n) { this ->op (0b0'1'1'01110'1'0'10000'11111'10 , n,d); }
2212
2214
2213
2215
void Assembler::fcmeq4s (V d, V n, V m) { this ->op (0b0'1'0'01110'0'0'1 , m, 0b1110'0'1 , n, d); }
2214
2216
void Assembler::fcmgt4s (V d, V n, V m) { this ->op (0b0'1'1'01110'1'0'1 , m, 0b1110'0'1 , n, d); }
@@ -2238,6 +2240,8 @@ namespace skvm {
2238
2240
void Assembler::scvtf4s (V d, V n) { this ->op (0b0'1'0'01110'0'0'10000'11101'10 , n,d); }
2239
2241
void Assembler::fcvtzs4s (V d, V n) { this ->op (0b0'1'0'01110'1'0'10000'1101'1'10 , n,d); }
2240
2242
void Assembler::fcvtns4s (V d, V n) { this ->op (0b0'1'0'01110'0'0'10000'1101'0'10 , n,d); }
2243
+ void Assembler::frintp4s (V d, V n) { this ->op (0b0'1'0'01110'1'0'10000'1100'0'10 , n,d); }
2244
+ void Assembler::frintm4s (V d, V n) { this ->op (0b0'1'0'01110'0'0'10000'1100'1'10 , n,d); }
2241
2245
2242
2246
void Assembler::xtns2h (V d, V n) { this ->op (0b0'0'0'01110'01'10000'10010'10 , n,d); }
2243
2247
void Assembler::xtnh2b (V d, V n) { this ->op (0b0'0'0'01110'00'10000'10010'10 , n,d); }
@@ -2346,6 +2350,10 @@ namespace skvm {
2346
2350
this ->op (0b10'011'1'00'00000000000000 , (V)0 , dst, (imm19 & 19_mask) << 5 );
2347
2351
}
2348
2352
2353
+ void Assembler::dup4s (V dst, X src) {
2354
+ this ->op (0b0'1'0'01110000'00100'0'0001'1 , src, dst);
2355
+ }
2356
+
2349
2357
void Assembler::ld1r4s (V dst, X src) {
2350
2358
this ->op (0b0'1'0011010'1'0'00000'110'0'10 , src, dst);
2351
2359
}
@@ -3260,12 +3268,10 @@ namespace skvm {
3260
3268
return r;
3261
3269
};
3262
3270
3263
- #if defined(__x86_64__) || defined(_M_X64) // Nothing special... just unused on ARM.
3264
3271
auto free_tmp = [&](Reg r) {
3265
3272
SkASSERT (regs[r] == TMP);
3266
3273
regs[r] = NA;
3267
3274
};
3268
- #endif
3269
3275
3270
3276
// Which register holds dst,x,y,z for this instruction? NA if none does yet.
3271
3277
int rd = NA,
@@ -3710,11 +3716,13 @@ namespace skvm {
3710
3716
break ;
3711
3717
3712
3718
#elif defined(__aarch64__)
3713
- default : // TODO
3714
- if (false ) {
3715
- SkDEBUGFAILF (" \n Op::%s (%d) not yet implemented\n " , name (op), op);
3716
- }
3717
- return false ;
3719
+ case Op::store64:
3720
+ case Op::store128:
3721
+ case Op::load64:
3722
+ case Op::load128:
3723
+ case Op::to_half:
3724
+ case Op::from_half:
3725
+ return false ; // TODO
3718
3726
3719
3727
case Op::assert_true: {
3720
3728
a->uminv4s (dst (), r (x)); // uminv acts like an all() across the vector.
@@ -3725,6 +3733,14 @@ namespace skvm {
3725
3733
a->label (&all_true);
3726
3734
} break ;
3727
3735
3736
+ case Op::index: {
3737
+ A::V tmp = alloc_tmp ();
3738
+ a->ldrq (tmp, &iota);
3739
+ a->dup4s (dst (), N);
3740
+ a->sub4s (dst (), dst (), tmp);
3741
+ free_tmp (tmp);
3742
+ } break ;
3743
+
3728
3744
case Op::store8: a->xtns2h (dst (), r (x));
3729
3745
a->xtnh2b (dst (), dst ());
3730
3746
if (scalar) { a->strb (dst (), arg[immy]); }
@@ -3801,6 +3817,8 @@ namespace skvm {
3801
3817
case Op::mul_f32: a->fmul4s (dst (), r (x), r (y)); break ;
3802
3818
case Op::div_f32: a->fdiv4s (dst (), r (x), r (y)); break ;
3803
3819
3820
+ case Op::sqrt_f32: a->fsqrt4s (dst (), r (x)); break ;
3821
+
3804
3822
case Op::fma_f32: // fmla.4s is z += x*y
3805
3823
if (try_alias (z)) { a->fmla4s ( r (z), r (x), r (y)); }
3806
3824
else { a->orr16b (dst (), r (z), r (z));
@@ -3864,8 +3882,8 @@ namespace skvm {
3864
3882
case Op::to_f32: a->scvtf4s (dst (), r (x)); break ;
3865
3883
case Op::trunc: a->fcvtzs4s (dst (), r (x)); break ;
3866
3884
case Op::round: a->fcvtns4s (dst (), r (x)); break ;
3867
- // TODO: fcvtns.4s rounds to nearest even.
3868
- // I think we actually want frintx -> fcvtzs to round to current mode.
3885
+ case Op::ceil: a-> frintp4s ( dst (), r (x)); break ;
3886
+ case Op::floor: a-> frintm4s ( dst (), r (x)); break ;
3869
3887
#endif
3870
3888
}
3871
3889
0 commit comments