Skip to content

Commit 0669fa8

Browse files
heiherAmanieu
authored andcommitted
core_arch: Add LoongArch frecipe intrinsics
1 parent 1e6ee7c commit 0669fa8

File tree

4 files changed

+184
-12
lines changed

4 files changed

+184
-12
lines changed

crates/core_arch/src/loongarch64/lasx/generated.rs

+42-6
Original file line numberDiff line numberDiff line change
@@ -668,6 +668,14 @@ extern "unadjusted" {
668668
fn __lasx_xvfrecip_s(a: v8f32) -> v8f32;
669669
#[link_name = "llvm.loongarch.lasx.xvfrecip.d"]
670670
fn __lasx_xvfrecip_d(a: v4f64) -> v4f64;
671+
#[link_name = "llvm.loongarch.lasx.xvfrecipe.s"]
672+
fn __lasx_xvfrecipe_s(a: v8f32) -> v8f32;
673+
#[link_name = "llvm.loongarch.lasx.xvfrecipe.d"]
674+
fn __lasx_xvfrecipe_d(a: v4f64) -> v4f64;
675+
#[link_name = "llvm.loongarch.lasx.xvfrsqrte.s"]
676+
fn __lasx_xvfrsqrte_s(a: v8f32) -> v8f32;
677+
#[link_name = "llvm.loongarch.lasx.xvfrsqrte.d"]
678+
fn __lasx_xvfrsqrte_d(a: v4f64) -> v4f64;
671679
#[link_name = "llvm.loongarch.lasx.xvfrint.s"]
672680
fn __lasx_xvfrint_s(a: v8f32) -> v8f32;
673681
#[link_name = "llvm.loongarch.lasx.xvfrint.d"]
@@ -941,15 +949,15 @@ extern "unadjusted" {
941949
#[link_name = "llvm.loongarch.lasx.xvld"]
942950
fn __lasx_xvld(a: *const i8, b: i32) -> v32i8;
943951
#[link_name = "llvm.loongarch.lasx.xvst"]
944-
fn __lasx_xvst(a: v32i8, b: *mut i8, c: i32) ;
952+
fn __lasx_xvst(a: v32i8, b: *mut i8, c: i32);
945953
#[link_name = "llvm.loongarch.lasx.xvstelm.b"]
946-
fn __lasx_xvstelm_b(a: v32i8, b: *mut i8, c: i32, d: u32) ;
954+
fn __lasx_xvstelm_b(a: v32i8, b: *mut i8, c: i32, d: u32);
947955
#[link_name = "llvm.loongarch.lasx.xvstelm.h"]
948-
fn __lasx_xvstelm_h(a: v16i16, b: *mut i8, c: i32, d: u32) ;
956+
fn __lasx_xvstelm_h(a: v16i16, b: *mut i8, c: i32, d: u32);
949957
#[link_name = "llvm.loongarch.lasx.xvstelm.w"]
950-
fn __lasx_xvstelm_w(a: v8i32, b: *mut i8, c: i32, d: u32) ;
958+
fn __lasx_xvstelm_w(a: v8i32, b: *mut i8, c: i32, d: u32);
951959
#[link_name = "llvm.loongarch.lasx.xvstelm.d"]
952-
fn __lasx_xvstelm_d(a: v4i64, b: *mut i8, c: i32, d: u32) ;
960+
fn __lasx_xvstelm_d(a: v4i64, b: *mut i8, c: i32, d: u32);
953961
#[link_name = "llvm.loongarch.lasx.xvinsve0.w"]
954962
fn __lasx_xvinsve0_w(a: v8i32, b: v8i32, c: u32) -> v8i32;
955963
#[link_name = "llvm.loongarch.lasx.xvinsve0.d"]
@@ -977,7 +985,7 @@ extern "unadjusted" {
977985
#[link_name = "llvm.loongarch.lasx.xvldx"]
978986
fn __lasx_xvldx(a: *const i8, b: i64) -> v32i8;
979987
#[link_name = "llvm.loongarch.lasx.xvstx"]
980-
fn __lasx_xvstx(a: v32i8, b: *mut i8, c: i64) ;
988+
fn __lasx_xvstx(a: v32i8, b: *mut i8, c: i64);
981989
#[link_name = "llvm.loongarch.lasx.xvextl.qu.du"]
982990
fn __lasx_xvextl_qu_du(a: v4u64) -> v4u64;
983991
#[link_name = "llvm.loongarch.lasx.xvinsgr2vr.w"]
@@ -3979,6 +3987,34 @@ pub unsafe fn lasx_xvfrecip_d(a: v4f64) -> v4f64 {
39793987
__lasx_xvfrecip_d(a)
39803988
}
39813989

3990+
#[inline]
3991+
#[target_feature(enable = "lasx,frecipe")]
3992+
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
3993+
pub unsafe fn lasx_xvfrecipe_s(a: v8f32) -> v8f32 {
3994+
__lasx_xvfrecipe_s(a)
3995+
}
3996+
3997+
#[inline]
3998+
#[target_feature(enable = "lasx,frecipe")]
3999+
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
4000+
pub unsafe fn lasx_xvfrecipe_d(a: v4f64) -> v4f64 {
4001+
__lasx_xvfrecipe_d(a)
4002+
}
4003+
4004+
#[inline]
4005+
#[target_feature(enable = "lasx,frecipe")]
4006+
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
4007+
pub unsafe fn lasx_xvfrsqrte_s(a: v8f32) -> v8f32 {
4008+
__lasx_xvfrsqrte_s(a)
4009+
}
4010+
4011+
#[inline]
4012+
#[target_feature(enable = "lasx,frecipe")]
4013+
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
4014+
pub unsafe fn lasx_xvfrsqrte_d(a: v4f64) -> v4f64 {
4015+
__lasx_xvfrsqrte_d(a)
4016+
}
4017+
39824018
#[inline]
39834019
#[target_feature(enable = "lasx")]
39844020
#[unstable(feature = "stdarch_loongarch", issue = "117427")]

crates/core_arch/src/loongarch64/lasx/tests.rs

+68
Original file line numberDiff line numberDiff line change
@@ -6558,6 +6558,74 @@ unsafe fn test_lasx_xvfrecip_d() {
65586558
assert_eq!(r, transmute(lasx_xvfrecip_d(transmute(a))));
65596559
}
65606560

6561+
#[simd_test(enable = "lasx,frecipe")]
6562+
unsafe fn test_lasx_xvfrecipe_s() {
6563+
let a = u32x8::new(
6564+
1061538089, 1009467584, 1043164316, 1030910448, 1059062619, 1048927856, 1064915194,
6565+
1028524176,
6566+
);
6567+
let r = i64x4::new(
6568+
4809660548434472067,
6569+
4721787188318892829,
6570+
4644815739361740708,
6571+
4728509413412007938,
6572+
);
6573+
6574+
assert_eq!(r, transmute(lasx_xvfrecipe_s(transmute(a))));
6575+
}
6576+
6577+
#[simd_test(enable = "lasx,frecipe")]
6578+
unsafe fn test_lasx_xvfrecipe_d() {
6579+
let a = u64x4::new(
6580+
4599514006383746620,
6581+
4607114589130093485,
6582+
4603063439897885463,
6583+
4602774413388259784,
6584+
);
6585+
let r = i64x4::new(
6586+
4614125529786744832,
6587+
4607216711966392320,
6588+
4610977572161847296,
6589+
4611499011256352768,
6590+
);
6591+
6592+
assert_eq!(r, transmute(lasx_xvfrecipe_d(transmute(a))));
6593+
}
6594+
6595+
#[simd_test(enable = "lasx,frecipe")]
6596+
unsafe fn test_lasx_xvfrsqrte_s() {
6597+
let a = u32x8::new(
6598+
1042369896, 1033402040, 1063640659, 1061099374, 1064617699, 1050687308, 1049602990,
6599+
1047907124,
6600+
);
6601+
let r = i64x4::new(
6602+
4641680627989561881,
6603+
4581330281566770462,
6604+
4604034110053345047,
6605+
4612427253546066334,
6606+
);
6607+
6608+
assert_eq!(r, transmute(lasx_xvfrsqrte_s(transmute(a))));
6609+
}
6610+
6611+
#[simd_test(enable = "lasx,frecipe")]
6612+
unsafe fn test_lasx_xvfrsqrte_d() {
6613+
let a = u64x4::new(
6614+
4601640737224225970,
6615+
4602882853441572005,
6616+
4594899837086694432,
6617+
4596019513190087348,
6618+
);
6619+
let r = i64x4::new(
6620+
4609450077243572224,
6621+
4608908592999825408,
6622+
4612828109287194624,
6623+
4612346183891812352,
6624+
);
6625+
6626+
assert_eq!(r, transmute(lasx_xvfrsqrte_d(transmute(a))));
6627+
}
6628+
65616629
#[simd_test(enable = "lasx")]
65626630
unsafe fn test_lasx_xvfrint_s() {
65636631
let a = u32x8::new(

crates/core_arch/src/loongarch64/lsx/generated.rs

+42-6
Original file line numberDiff line numberDiff line change
@@ -698,6 +698,14 @@ extern "unadjusted" {
698698
fn __lsx_vfrecip_s(a: v4f32) -> v4f32;
699699
#[link_name = "llvm.loongarch.lsx.vfrecip.d"]
700700
fn __lsx_vfrecip_d(a: v2f64) -> v2f64;
701+
#[link_name = "llvm.loongarch.lsx.vfrecipe.s"]
702+
fn __lsx_vfrecipe_s(a: v4f32) -> v4f32;
703+
#[link_name = "llvm.loongarch.lsx.vfrecipe.d"]
704+
fn __lsx_vfrecipe_d(a: v2f64) -> v2f64;
705+
#[link_name = "llvm.loongarch.lsx.vfrsqrte.s"]
706+
fn __lsx_vfrsqrte_s(a: v4f32) -> v4f32;
707+
#[link_name = "llvm.loongarch.lsx.vfrsqrte.d"]
708+
fn __lsx_vfrsqrte_d(a: v2f64) -> v2f64;
701709
#[link_name = "llvm.loongarch.lsx.vfrint.s"]
702710
fn __lsx_vfrint_s(a: v4f32) -> v4f32;
703711
#[link_name = "llvm.loongarch.lsx.vfrint.d"]
@@ -959,13 +967,13 @@ extern "unadjusted" {
959967
#[link_name = "llvm.loongarch.lsx.vfrintrm.d"]
960968
fn __lsx_vfrintrm_d(a: v2f64) -> v2f64;
961969
#[link_name = "llvm.loongarch.lsx.vstelm.b"]
962-
fn __lsx_vstelm_b(a: v16i8, b: *mut i8, c: i32, d: u32) ;
970+
fn __lsx_vstelm_b(a: v16i8, b: *mut i8, c: i32, d: u32);
963971
#[link_name = "llvm.loongarch.lsx.vstelm.h"]
964-
fn __lsx_vstelm_h(a: v8i16, b: *mut i8, c: i32, d: u32) ;
972+
fn __lsx_vstelm_h(a: v8i16, b: *mut i8, c: i32, d: u32);
965973
#[link_name = "llvm.loongarch.lsx.vstelm.w"]
966-
fn __lsx_vstelm_w(a: v4i32, b: *mut i8, c: i32, d: u32) ;
974+
fn __lsx_vstelm_w(a: v4i32, b: *mut i8, c: i32, d: u32);
967975
#[link_name = "llvm.loongarch.lsx.vstelm.d"]
968-
fn __lsx_vstelm_d(a: v2i64, b: *mut i8, c: i32, d: u32) ;
976+
fn __lsx_vstelm_d(a: v2i64, b: *mut i8, c: i32, d: u32);
969977
#[link_name = "llvm.loongarch.lsx.vaddwev.d.w"]
970978
fn __lsx_vaddwev_d_w(a: v4i32, b: v4i32) -> v2i64;
971979
#[link_name = "llvm.loongarch.lsx.vaddwev.w.h"]
@@ -1301,7 +1309,7 @@ extern "unadjusted" {
13011309
#[link_name = "llvm.loongarch.lsx.vld"]
13021310
fn __lsx_vld(a: *const i8, b: i32) -> v16i8;
13031311
#[link_name = "llvm.loongarch.lsx.vst"]
1304-
fn __lsx_vst(a: v16i8, b: *mut i8, c: i32) ;
1312+
fn __lsx_vst(a: v16i8, b: *mut i8, c: i32);
13051313
#[link_name = "llvm.loongarch.lsx.vssrlrn.b.h"]
13061314
fn __lsx_vssrlrn_b_h(a: v8i16, b: v8i16) -> v16i8;
13071315
#[link_name = "llvm.loongarch.lsx.vssrlrn.h.w"]
@@ -1323,7 +1331,7 @@ extern "unadjusted" {
13231331
#[link_name = "llvm.loongarch.lsx.vldx"]
13241332
fn __lsx_vldx(a: *const i8, b: i64) -> v16i8;
13251333
#[link_name = "llvm.loongarch.lsx.vstx"]
1326-
fn __lsx_vstx(a: v16i8, b: *mut i8, c: i64) ;
1334+
fn __lsx_vstx(a: v16i8, b: *mut i8, c: i64);
13271335
#[link_name = "llvm.loongarch.lsx.vextl.qu.du"]
13281336
fn __lsx_vextl_qu_du(a: v2u64) -> v2u64;
13291337
#[link_name = "llvm.loongarch.lsx.bnz.b"]
@@ -4068,6 +4076,34 @@ pub unsafe fn lsx_vfrecip_d(a: v2f64) -> v2f64 {
40684076
__lsx_vfrecip_d(a)
40694077
}
40704078

4079+
#[inline]
4080+
#[target_feature(enable = "lsx,frecipe")]
4081+
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
4082+
pub unsafe fn lsx_vfrecipe_s(a: v4f32) -> v4f32 {
4083+
__lsx_vfrecipe_s(a)
4084+
}
4085+
4086+
#[inline]
4087+
#[target_feature(enable = "lsx,frecipe")]
4088+
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
4089+
pub unsafe fn lsx_vfrecipe_d(a: v2f64) -> v2f64 {
4090+
__lsx_vfrecipe_d(a)
4091+
}
4092+
4093+
#[inline]
4094+
#[target_feature(enable = "lsx,frecipe")]
4095+
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
4096+
pub unsafe fn lsx_vfrsqrte_s(a: v4f32) -> v4f32 {
4097+
__lsx_vfrsqrte_s(a)
4098+
}
4099+
4100+
#[inline]
4101+
#[target_feature(enable = "lsx,frecipe")]
4102+
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
4103+
pub unsafe fn lsx_vfrsqrte_d(a: v2f64) -> v2f64 {
4104+
__lsx_vfrsqrte_d(a)
4105+
}
4106+
40714107
#[inline]
40724108
#[target_feature(enable = "lsx")]
40734109
#[unstable(feature = "stdarch_loongarch", issue = "117427")]

crates/core_arch/src/loongarch64/lsx/tests.rs

+32
Original file line numberDiff line numberDiff line change
@@ -3308,6 +3308,38 @@ unsafe fn test_lsx_vfrecip_d() {
33083308
assert_eq!(r, transmute(lsx_vfrecip_d(transmute(a))));
33093309
}
33103310

3311+
#[simd_test(enable = "lsx,frecipe")]
3312+
unsafe fn test_lsx_vfrecipe_s() {
3313+
let a = u32x4::new(1057583779, 1062308847, 1060089100, 1048454688);
3314+
let r = i64x2::new(4583644530211711115, 4647978179615164140);
3315+
3316+
assert_eq!(r, transmute(lsx_vfrecipe_s(transmute(a))));
3317+
}
3318+
3319+
#[simd_test(enable = "lsx,frecipe")]
3320+
unsafe fn test_lsx_vfrecipe_d() {
3321+
let a = u64x2::new(4605515926442181274, 4605369703273365674);
3322+
let r = i64x2::new(4608204937770303488, 4608317161507651584);
3323+
3324+
assert_eq!(r, transmute(lsx_vfrecipe_d(transmute(a))));
3325+
}
3326+
3327+
#[simd_test(enable = "lsx,frecipe")]
3328+
unsafe fn test_lsx_vfrsqrte_s() {
3329+
let a = u32x4::new(1064377488, 1055815904, 1056897740, 1064016656);
3330+
let r = i64x2::new(4592421282989204764, 4577184195020153336);
3331+
3332+
assert_eq!(r, transmute(lsx_vfrsqrte_s(transmute(a))));
3333+
}
3334+
3335+
#[simd_test(enable = "lsx,frecipe")]
3336+
unsafe fn test_lsx_vfrsqrte_d() {
3337+
let a = u64x2::new(4602766865443628663, 4605323203937791867);
3338+
let r = i64x2::new(4608986772678901760, 4607734355383549952);
3339+
3340+
assert_eq!(r, transmute(lsx_vfrsqrte_d(transmute(a))));
3341+
}
3342+
33113343
#[simd_test(enable = "lsx")]
33123344
unsafe fn test_lsx_vfrint_s() {
33133345
let a = u32x4::new(1062138521, 1056849108, 1034089720, 1038314384);

0 commit comments

Comments
 (0)