diff --git a/kernels/portable/cpu/op_acos.cpp b/kernels/portable/cpu/op_acos.cpp index dac3b1546f3..bca315e642d 100644 --- a/kernels/portable/cpu/op_acos.cpp +++ b/kernels/portable/cpu/op_acos.cpp @@ -15,8 +15,9 @@ namespace executor { namespace native { Tensor& acos_out(KernelRuntimeContext& ctx, const Tensor& in, Tensor& out) { - return internal::unary_ufunc_realhbbf16_to_floathbf16( - std::acos, ctx, in, out); + static constexpr const char op_name[] = "acos.out"; + return internal::unary_ufunc_realhbbf16_to_floathbf16( + [](auto x) { return std::acos(x); }, ctx, in, out); } } // namespace native diff --git a/kernels/portable/cpu/op_acosh.cpp b/kernels/portable/cpu/op_acosh.cpp index 77f7edf4c5d..df6af2a449e 100644 --- a/kernels/portable/cpu/op_acosh.cpp +++ b/kernels/portable/cpu/op_acosh.cpp @@ -15,8 +15,9 @@ namespace executor { namespace native { Tensor& acosh_out(KernelRuntimeContext& ctx, const Tensor& in, Tensor& out) { - return internal::unary_ufunc_realhbbf16_to_floathbf16( - std::acosh, ctx, in, out); + static constexpr const char op_name[] = "acosh.out"; + return internal::unary_ufunc_realhbbf16_to_floathbf16( + [](auto x) { return std::acosh(x); }, ctx, in, out); } } // namespace native diff --git a/kernels/portable/cpu/op_asin.cpp b/kernels/portable/cpu/op_asin.cpp index 6affa6e4122..56c47988674 100644 --- a/kernels/portable/cpu/op_asin.cpp +++ b/kernels/portable/cpu/op_asin.cpp @@ -15,8 +15,9 @@ namespace executor { namespace native { Tensor& asin_out(KernelRuntimeContext& ctx, const Tensor& in, Tensor& out) { - return internal::unary_ufunc_realhbbf16_to_floathbf16( - std::asin, ctx, in, out); + static constexpr const char op_name[] = "asin.out"; + return internal::unary_ufunc_realhbbf16_to_floathbf16( + [](auto x) { return std::asin(x); }, ctx, in, out); } } // namespace native diff --git a/kernels/portable/cpu/op_asinh.cpp b/kernels/portable/cpu/op_asinh.cpp index bce8dcf6d5a..5bbf6e539e0 100644 --- a/kernels/portable/cpu/op_asinh.cpp +++ b/kernels/portable/cpu/op_asinh.cpp @@ -15,8 +15,9 @@ namespace executor { namespace native { Tensor& asinh_out(KernelRuntimeContext& ctx, const Tensor& in, Tensor& out) { - return internal::unary_ufunc_realhbbf16_to_floathbf16( - std::asinh, ctx, in, out); + static constexpr const char op_name[] = "asinh.out"; + return internal::unary_ufunc_realhbbf16_to_floathbf16( + [](auto x) { return std::asinh(x); }, ctx, in, out); } } // namespace native diff --git a/kernels/portable/cpu/op_atan.cpp b/kernels/portable/cpu/op_atan.cpp index 23549627a3b..ad1fdf25391 100644 --- a/kernels/portable/cpu/op_atan.cpp +++ b/kernels/portable/cpu/op_atan.cpp @@ -15,8 +15,9 @@ namespace executor { namespace native { Tensor& atan_out(KernelRuntimeContext& ctx, const Tensor& in, Tensor& out) { - return internal::unary_ufunc_realhbbf16_to_floathbf16( - std::atan, ctx, in, out); + static constexpr const char op_name[] = "atan.out"; + return internal::unary_ufunc_realhbbf16_to_floathbf16( + [](auto x) { return std::atan(x); }, ctx, in, out); } } // namespace native diff --git a/kernels/portable/cpu/op_atanh.cpp b/kernels/portable/cpu/op_atanh.cpp index 13e6e8ca141..03a6d627dfb 100644 --- a/kernels/portable/cpu/op_atanh.cpp +++ b/kernels/portable/cpu/op_atanh.cpp @@ -15,8 +15,9 @@ namespace executor { namespace native { Tensor& atanh_out(KernelRuntimeContext& ctx, const Tensor& in, Tensor& out) { - return internal::unary_ufunc_realhbbf16_to_floathbf16( - std::atanh, ctx, in, out); + static constexpr const char op_name[] = "atanh.out"; + return internal::unary_ufunc_realhbbf16_to_floathbf16( + [](auto x) { return std::atanh(x); }, ctx, in, out); } } // namespace native diff --git a/kernels/portable/cpu/op_ceil.cpp b/kernels/portable/cpu/op_ceil.cpp index e2d9f9d6029..1090aa842be 100644 --- a/kernels/portable/cpu/op_ceil.cpp +++ b/kernels/portable/cpu/op_ceil.cpp @@ -17,7 +17,9 @@ namespace native { using executorch::aten::Tensor; Tensor& ceil_out(KernelRuntimeContext& ctx, const Tensor& in, Tensor& out) { - return internal::unary_ufunc_realh(std::ceil, ctx, in, out); + static constexpr const char op_name[] = "ceil.out"; + return internal::unary_ufunc_realh( + [](auto x) { return std::ceil(x); }, ctx, in, out); } } // namespace native diff --git a/kernels/portable/cpu/op_cos.cpp b/kernels/portable/cpu/op_cos.cpp index e536060d162..ae779cb6868 100644 --- a/kernels/portable/cpu/op_cos.cpp +++ b/kernels/portable/cpu/op_cos.cpp @@ -15,7 +15,9 @@ namespace executor { namespace native { Tensor& cos_out(KernelRuntimeContext& ctx, const Tensor& in, Tensor& out) { - return internal::unary_ufunc_realhbbf16_to_floathbf16(std::cos, ctx, in, out); + static constexpr const char op_name[] = "cos.out"; + return internal::unary_ufunc_realhbbf16_to_floathbf16( + [](auto x) { return std::cos(x); }, ctx, in, out); } } // namespace native diff --git a/kernels/portable/cpu/op_cosh.cpp b/kernels/portable/cpu/op_cosh.cpp index e622bbe6fcd..e221e0d5c0c 100644 --- a/kernels/portable/cpu/op_cosh.cpp +++ b/kernels/portable/cpu/op_cosh.cpp @@ -15,8 +15,9 @@ namespace executor { namespace native { Tensor& cosh_out(KernelRuntimeContext& ctx, const Tensor& in, Tensor& out) { - return internal::unary_ufunc_realhbbf16_to_floathbf16( - std::cosh, ctx, in, out); + static constexpr const char op_name[] = "cosh.out"; + return internal::unary_ufunc_realhbbf16_to_floathbf16( + [](auto x) { return std::cosh(x); }, ctx, in, out); } } // namespace native diff --git a/kernels/portable/cpu/op_erf.cpp b/kernels/portable/cpu/op_erf.cpp index 6897bcda95b..5ad9590906b 100644 --- a/kernels/portable/cpu/op_erf.cpp +++ b/kernels/portable/cpu/op_erf.cpp @@ -15,7 +15,9 @@ namespace executor { namespace native { Tensor& erf_out(KernelRuntimeContext& ctx, const Tensor& in, Tensor& out) { - return internal::unary_ufunc_realhbbf16_to_floathbf16(std::erf, ctx, in, out); + static constexpr const char op_name[] = "erf.out"; + return internal::unary_ufunc_realhbbf16_to_floathbf16( + [](auto x) { return std::erf(x); }, ctx, in, out); } } // namespace native diff --git a/kernels/portable/cpu/op_exp.cpp b/kernels/portable/cpu/op_exp.cpp index cbfc8924cb0..52a6da016ac 100644 --- a/kernels/portable/cpu/op_exp.cpp +++ b/kernels/portable/cpu/op_exp.cpp @@ -15,7 +15,9 @@ namespace executor { namespace native { Tensor& exp_out(KernelRuntimeContext& ctx, const Tensor& in, Tensor& out) { - return internal::unary_ufunc_realhbbf16_to_floathbf16(std::exp, ctx, in, out); + static constexpr const char op_name[] = "exp.out"; + return internal::unary_ufunc_realhbbf16_to_floathbf16( + [](auto x) { return std::exp(x); }, ctx, in, out); } } // namespace native diff --git a/kernels/portable/cpu/op_expm1.cpp b/kernels/portable/cpu/op_expm1.cpp index f2d49f615b1..22cd9aace24 100644 --- a/kernels/portable/cpu/op_expm1.cpp +++ b/kernels/portable/cpu/op_expm1.cpp @@ -15,8 +15,9 @@ namespace executor { namespace native { Tensor& expm1_out(KernelRuntimeContext& ctx, const Tensor& in, Tensor& out) { - return internal::unary_ufunc_realhbbf16_to_floathbf16( - std::expm1, ctx, in, out); + static constexpr const char op_name[] = "expm1.out"; + return internal::unary_ufunc_realhbbf16_to_floathbf16( + [](auto x) { return std::expm1(x); }, ctx, in, out); } } // namespace native diff --git a/kernels/portable/cpu/op_floor.cpp b/kernels/portable/cpu/op_floor.cpp index f389ef06a79..be4ad2ea3ec 100644 --- a/kernels/portable/cpu/op_floor.cpp +++ b/kernels/portable/cpu/op_floor.cpp @@ -17,7 +17,9 @@ namespace native { using executorch::aten::Tensor; Tensor& floor_out(KernelRuntimeContext& ctx, const Tensor& in, Tensor& out) { - return internal::unary_ufunc_realh(std::floor, ctx, in, out); + static constexpr const char op_name[] = "floor.out"; + return internal::unary_ufunc_realh( + [](auto x) { return std::floor(x); }, ctx, in, out); } } // namespace native diff --git a/kernels/portable/cpu/op_isinf.cpp b/kernels/portable/cpu/op_isinf.cpp index 92d1e563a2e..42798231a84 100644 --- a/kernels/portable/cpu/op_isinf.cpp +++ b/kernels/portable/cpu/op_isinf.cpp @@ -17,8 +17,9 @@ namespace native { Tensor& isinf_out(KernelRuntimeContext& ctx, const Tensor& in, Tensor& out) { // Lambda is syntactic sugar needed to workaround compilation on some older // non-compatible distros where isnan is returning int rather than bool - return internal::unary_ufunc_realhb_to_bool( - [](double x) -> bool { return std::isinf(x); }, ctx, in, out); + static constexpr const char op_name[] = "isinf.out"; + return internal::unary_ufunc_realhb_to_bool( + [](auto x) -> bool { return std::isinf(x); }, ctx, in, out); } } // namespace native diff --git a/kernels/portable/cpu/op_isnan.cpp b/kernels/portable/cpu/op_isnan.cpp index 51e189992ee..817d314fd2b 100644 --- a/kernels/portable/cpu/op_isnan.cpp +++ b/kernels/portable/cpu/op_isnan.cpp @@ -17,8 +17,9 @@ namespace native { Tensor& isnan_out(KernelRuntimeContext& ctx, const Tensor& in, Tensor& out) { // Lambda is syntactic sugar needed to workaround compilation on some older // non-compatible distros where isnan is returning int rather than bool - return internal::unary_ufunc_realhb_to_bool( - [](double x) -> bool { return std::isnan(x); }, ctx, in, out); + static constexpr const char op_name[] = "isnan.out"; + return internal::unary_ufunc_realhb_to_bool( + [](auto x) -> bool { return std::isnan(x); }, ctx, in, out); } } // namespace native diff --git a/kernels/portable/cpu/op_log.cpp b/kernels/portable/cpu/op_log.cpp index 8a36bce8c49..d47c6bf0acb 100644 --- a/kernels/portable/cpu/op_log.cpp +++ b/kernels/portable/cpu/op_log.cpp @@ -15,7 +15,9 @@ namespace executor { namespace native { Tensor& log_out(KernelRuntimeContext& ctx, const Tensor& in, Tensor& out) { - return internal::unary_ufunc_realhbbf16_to_floathbf16(std::log, ctx, in, out); + static constexpr const char op_name[] = "log.out"; + return internal::unary_ufunc_realhbbf16_to_floathbf16( + [](auto x) { return std::log(x); }, ctx, in, out); } } // namespace native diff --git a/kernels/portable/cpu/op_log10.cpp b/kernels/portable/cpu/op_log10.cpp index 89f9b672476..39f17cdda88 100644 --- a/kernels/portable/cpu/op_log10.cpp +++ b/kernels/portable/cpu/op_log10.cpp @@ -15,8 +15,9 @@ namespace executor { namespace native { Tensor& log10_out(KernelRuntimeContext& ctx, const Tensor& in, Tensor& out) { - return internal::unary_ufunc_realhbbf16_to_floathbf16( - std::log10, ctx, in, out); + static constexpr const char op_name[] = "log10.out"; + return internal::unary_ufunc_realhbbf16_to_floathbf16( + [](auto x) { return std::log10(x); }, ctx, in, out); } } // namespace native diff --git a/kernels/portable/cpu/op_log1p.cpp b/kernels/portable/cpu/op_log1p.cpp index 2daa31e37ff..8cc1052081f 100644 --- a/kernels/portable/cpu/op_log1p.cpp +++ b/kernels/portable/cpu/op_log1p.cpp @@ -15,8 +15,9 @@ namespace executor { namespace native { Tensor& log1p_out(KernelRuntimeContext& ctx, const Tensor& in, Tensor& out) { - return internal::unary_ufunc_realhbbf16_to_floathbf16( - std::log1p, ctx, in, out); + static constexpr const char op_name[] = "log1p.out"; + return internal::unary_ufunc_realhbbf16_to_floathbf16( + [](auto x) { return std::log1p(x); }, ctx, in, out); } } // namespace native diff --git a/kernels/portable/cpu/op_log2.cpp b/kernels/portable/cpu/op_log2.cpp index 4d7406832e4..ec85bcbc24f 100644 --- a/kernels/portable/cpu/op_log2.cpp +++ b/kernels/portable/cpu/op_log2.cpp @@ -15,8 +15,9 @@ namespace executor { namespace native { Tensor& log2_out(KernelRuntimeContext& ctx, const Tensor& in, Tensor& out) { - return internal::unary_ufunc_realhbbf16_to_floathbf16( - std::log2, ctx, in, out); + static constexpr const char op_name[] = "log2.out"; + return internal::unary_ufunc_realhbbf16_to_floathbf16( + [](auto x) { return std::log2(x); }, ctx, in, out); } } // namespace native diff --git a/kernels/portable/cpu/op_reciprocal.cpp b/kernels/portable/cpu/op_reciprocal.cpp index f22f9883858..c1ebd96ae4e 100644 --- a/kernels/portable/cpu/op_reciprocal.cpp +++ b/kernels/portable/cpu/op_reciprocal.cpp @@ -14,16 +14,27 @@ namespace executor { namespace native { namespace { +float reciprocal(float x) { + return 1.0f / x; +} + double reciprocal(double x) { return 1.0 / x; } +template < + typename Integer, + std::enable_if_t, bool> = true> +double reciprocal(Integer x) { + return reciprocal((double)x); +} } // namespace Tensor& reciprocal_out(KernelRuntimeContext& ctx, const Tensor& in, Tensor& out) { - return internal::unary_ufunc_realhbbf16_to_floathbf16( - reciprocal, ctx, in, out); + static constexpr const char op_name[] = "reciprocal.out"; + return internal::unary_ufunc_realhbbf16_to_floathbf16( + [](auto x) { return reciprocal(x); }, ctx, in, out); } } // namespace native diff --git a/kernels/portable/cpu/op_rsqrt.cpp b/kernels/portable/cpu/op_rsqrt.cpp index 19c4c6c1a57..628b72e9f19 100644 --- a/kernels/portable/cpu/op_rsqrt.cpp +++ b/kernels/portable/cpu/op_rsqrt.cpp @@ -12,16 +12,17 @@ namespace torch { namespace executor { namespace native { -namespace { - -double rsqrt(double x) { - return 1.0 / std::sqrt(x); -} - -} // namespace Tensor& rsqrt_out(KernelRuntimeContext& ctx, const Tensor& in, Tensor& out) { - return internal::unary_ufunc_realhbbf16_to_floathbf16(rsqrt, ctx, in, out); + static constexpr const char op_name[] = "rsqrt.out"; + return internal::unary_ufunc_realhbbf16_to_floathbf16( + [](auto x) { + auto result = std::sqrt(x); + return static_cast(1) / result; + }, + ctx, + in, + out); } } // namespace native diff --git a/kernels/portable/cpu/op_sin.cpp b/kernels/portable/cpu/op_sin.cpp index ad65c4be18b..85d43abd62f 100644 --- a/kernels/portable/cpu/op_sin.cpp +++ b/kernels/portable/cpu/op_sin.cpp @@ -15,7 +15,9 @@ namespace executor { namespace native { Tensor& sin_out(KernelRuntimeContext& ctx, const Tensor& in, Tensor& out) { - return internal::unary_ufunc_realhbbf16_to_floathbf16(std::sin, ctx, in, out); + static constexpr const char op_name[] = "sin.out"; + return internal::unary_ufunc_realhbbf16_to_floathbf16( + [](auto x) { return std::sin(x); }, ctx, in, out); } } // namespace native diff --git a/kernels/portable/cpu/op_sinh.cpp b/kernels/portable/cpu/op_sinh.cpp index 21666392392..9e75e66ea56 100644 --- a/kernels/portable/cpu/op_sinh.cpp +++ b/kernels/portable/cpu/op_sinh.cpp @@ -15,8 +15,9 @@ namespace executor { namespace native { Tensor& sinh_out(KernelRuntimeContext& ctx, const Tensor& in, Tensor& out) { - return internal::unary_ufunc_realhbbf16_to_floathbf16( - std::sinh, ctx, in, out); + static constexpr const char op_name[] = "sinh.out"; + return internal::unary_ufunc_realhbbf16_to_floathbf16( + [](auto x) { return std::sinh(x); }, ctx, in, out); } } // namespace native diff --git a/kernels/portable/cpu/op_sqrt.cpp b/kernels/portable/cpu/op_sqrt.cpp index bd2075f5b04..c80eee203f2 100644 --- a/kernels/portable/cpu/op_sqrt.cpp +++ b/kernels/portable/cpu/op_sqrt.cpp @@ -15,8 +15,9 @@ namespace executor { namespace native { Tensor& sqrt_out(KernelRuntimeContext& ctx, const Tensor& in, Tensor& out) { - return internal::unary_ufunc_realhbbf16_to_floathbf16( - std::sqrt, ctx, in, out); + static constexpr const char op_name[] = "sqrt.out"; + return internal::unary_ufunc_realhbbf16_to_floathbf16( + [](auto x) { return std::sqrt(x); }, ctx, in, out); } } // namespace native diff --git a/kernels/portable/cpu/op_tan.cpp b/kernels/portable/cpu/op_tan.cpp index a2b921d5146..37f241745c5 100644 --- a/kernels/portable/cpu/op_tan.cpp +++ b/kernels/portable/cpu/op_tan.cpp @@ -15,7 +15,9 @@ namespace executor { namespace native { Tensor& tan_out(KernelRuntimeContext& ctx, const Tensor& in, Tensor& out) { - return internal::unary_ufunc_realhbbf16_to_floathbf16(std::tan, ctx, in, out); + static constexpr const char op_name[] = "tan.out"; + return internal::unary_ufunc_realhbbf16_to_floathbf16( + [](auto x) { return std::tan(x); }, ctx, in, out); } } // namespace native diff --git a/kernels/portable/cpu/op_tanh.cpp b/kernels/portable/cpu/op_tanh.cpp index ae9f93dc62c..d0b0597abaa 100644 --- a/kernels/portable/cpu/op_tanh.cpp +++ b/kernels/portable/cpu/op_tanh.cpp @@ -15,8 +15,9 @@ namespace executor { namespace native { Tensor& tanh_out(KernelRuntimeContext& ctx, const Tensor& in, Tensor& out) { - return internal::unary_ufunc_realhbbf16_to_floathbf16( - std::tanh, ctx, in, out); + static constexpr const char op_name[] = "tanh.out"; + return internal::unary_ufunc_realhbbf16_to_floathbf16( + [](auto x) { return std::tanh(x); }, ctx, in, out); } } // namespace native diff --git a/kernels/portable/cpu/op_trunc.cpp b/kernels/portable/cpu/op_trunc.cpp index a14a2b18e2f..284099b494b 100644 --- a/kernels/portable/cpu/op_trunc.cpp +++ b/kernels/portable/cpu/op_trunc.cpp @@ -15,7 +15,9 @@ namespace executor { namespace native { Tensor& trunc_out(KernelRuntimeContext& ctx, const Tensor& in, Tensor& out) { - return internal::unary_ufunc_realh(std::trunc, ctx, in, out); + static constexpr const char op_name[] = "trunc.out"; + return internal::unary_ufunc_realh( + [](auto x) { return std::trunc(x); }, ctx, in, out); } } // namespace native diff --git a/kernels/portable/cpu/pattern/pattern.h b/kernels/portable/cpu/pattern/pattern.h index 83d4db92a1e..eae8a746d0e 100644 --- a/kernels/portable/cpu/pattern/pattern.h +++ b/kernels/portable/cpu/pattern/pattern.h @@ -46,6 +46,7 @@ question is a bit more specific, then add a descriptive sufix. */ #pragma once +#include #include namespace torch { @@ -53,29 +54,70 @@ namespace executor { namespace native { namespace internal { +// Implementation detail for the other helpers in this header. Returns +// true on success, false on failure. +bool check_and_resize_inputs( + KernelRuntimeContext& ctx, + const Tensor& in, + Tensor& out); + /** * Implements an op pattern for ops that take a single input tensor of any - * realh dtye, no additional arguments, and outputs a tensor of the same size + * realh dtype, no additional arguments, and outputs a tensor of the same size * and dtype. The function fn specifies the math operation which is applied to * the input tensor element-wise. */ +template Tensor& unary_ufunc_realh( - double (*fn)(double), + const Op& fn, KernelRuntimeContext& ctx, const Tensor& in, - Tensor& out); + Tensor& out) { + if (!check_and_resize_inputs(ctx, in, out)) { + return out; + } + ET_KERNEL_CHECK( + ctx, tensors_have_same_shape_and_dtype(in, out), InvalidArgument, out); + + ET_SWITCH_REALH_TYPES(in.scalar_type(), ctx, op_name, CTYPE, [&] { + utils::apply_unitensor_elementwise_fn( + fn, + ctx, + in, + utils::SupportedTensorDtypes::REALH, + out, + utils::SupportedTensorDtypes::SAME_AS_COMMON); + }); + return out; +} /** * Implements an op pattern for ops that take a single input tensor of any - * realhb dtye (real, half and boolean), no additional arguments, and outputs a + * realhb dtype (real, half and boolean), no additional arguments, and outputs a * boolean tensor of the same size. The function fn specifies the math * operation which is applied to the input tensor element-wise. */ +template Tensor& unary_ufunc_realhb_to_bool( - bool (*fn)(double), + const Op& fn, KernelRuntimeContext& ctx, const Tensor& in, - Tensor& out); + Tensor& out) { + if (!check_and_resize_inputs(ctx, in, out)) { + return out; + } + ET_SWITCH_REALHBBF16_TYPES(in.scalar_type(), ctx, op_name, CTYPE_IN, [&] { + utils::apply_unitensor_elementwise_fn( + [fn](const CTYPE_IN val_in) { return fn(val_in); }, + ctx, + in, + utils::SupportedTensorDtypes::REALHBBF16, + out, + utils::SupportedTensorDtypes::BOOL); + }); + + return out; +} /** * Implements an op pattern for ops that take a single input tensor of any @@ -83,11 +125,30 @@ Tensor& unary_ufunc_realhb_to_bool( * outputs a floating point tensor of the same size. The function fn specifies * the math operation which is applied to the input tensor element-wise. */ +template Tensor& unary_ufunc_realhbbf16_to_floathbf16( - double (*fn)(double), + const Op& fn, KernelRuntimeContext& ctx, const Tensor& in, - Tensor& out); + Tensor& out) { + ET_KERNEL_CHECK(ctx, tensor_is_floating_type(out), InvalidArgument, out); + + if (!check_and_resize_inputs(ctx, in, out)) { + return out; + } + + ET_SWITCH_REALHBBF16_TYPES(in.scalar_type(), ctx, op_name, CTYPE_IN, [&] { + utils::apply_unitensor_elementwise_fn( + [fn](const CTYPE_IN val_in) { return fn(val_in); }, + ctx, + in, + utils::SupportedTensorDtypes::REALHBBF16, + out, + utils::SupportedTensorDtypes::FLOATHBF16); + }); + + return out; +} } // namespace internal } // namespace native diff --git a/kernels/portable/cpu/pattern/targets.bzl b/kernels/portable/cpu/pattern/targets.bzl index 1de3035776e..7330d9e4570 100644 --- a/kernels/portable/cpu/pattern/targets.bzl +++ b/kernels/portable/cpu/pattern/targets.bzl @@ -50,17 +50,17 @@ def define_common_targets(): runtime.cxx_library( name = "pattern", srcs = [ - "unary_ufunc_realhb_to_bool.cpp", - "unary_ufunc_realhbbf16_to_floathbf16.cpp", - "unary_ufunc_realh.cpp", + "pattern.cpp", ], exported_headers = [ "pattern.h", ], compiler_flags = ["-Wno-missing-prototypes"], + exported_deps = [ + "//executorch/kernels/portable/cpu/util:elementwise_util", + ], deps = [ "//executorch/kernels/portable/cpu/util:broadcast_util", - "//executorch/kernels/portable/cpu/util:functional_util", "//executorch/runtime/kernel:kernel_includes", ], visibility = ["//executorch/kernels/portable/cpu/...", "//executorch/kernels/optimized/cpu/..."], diff --git a/kernels/portable/cpu/pattern/unary_ufunc_realh.cpp b/kernels/portable/cpu/pattern/unary_ufunc_realh.cpp deleted file mode 100644 index 16d847ace31..00000000000 --- a/kernels/portable/cpu/pattern/unary_ufunc_realh.cpp +++ /dev/null @@ -1,53 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -#include -#include -#include - -namespace torch { -namespace executor { -namespace native { -namespace internal { - -Tensor& unary_ufunc_realh( - double (*fn)(double), - KernelRuntimeContext& ctx, - const Tensor& in, - Tensor& out) { - (void)ctx; - - // Resize for dynamic shape - ET_KERNEL_CHECK_MSG( - ctx, - resize_tensor(out, in.sizes()) == Error::Ok, - InvalidArgument, - out, - "Failed to resize output tensor."); - - ET_KERNEL_CHECK( - ctx, tensors_have_same_shape_and_dtype(in, out), InvalidArgument, out); - - ET_KERNEL_CHECK( - ctx, tensors_have_same_dim_order(in, out), InvalidArgument, out); - - ET_SWITCH_REALH_TYPES(in.scalar_type(), ctx, __func__, CTYPE, [&] { - apply_unary_map_fn( - [fn](const CTYPE val_in) { return static_cast(fn(val_in)); }, - in.const_data_ptr(), - out.mutable_data_ptr(), - in.numel()); - }); - - return out; -} - -} // namespace internal -} // namespace native -} // namespace executor -} // namespace torch diff --git a/kernels/portable/cpu/pattern/unary_ufunc_realhb_to_bool.cpp b/kernels/portable/cpu/pattern/unary_ufunc_realhb_to_bool.cpp deleted file mode 100644 index 367137ad02c..00000000000 --- a/kernels/portable/cpu/pattern/unary_ufunc_realhb_to_bool.cpp +++ /dev/null @@ -1,60 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -#include -#include -#include - -namespace torch { -namespace executor { -namespace native { -namespace internal { - -Tensor& unary_ufunc_realhb_to_bool( - bool (*fn)(double), - KernelRuntimeContext& ctx, - const Tensor& in, - Tensor& out) { - (void)ctx; - - // Resize for dynamic shape - ET_KERNEL_CHECK_MSG( - ctx, - resize_tensor(out, in.sizes()) == Error::Ok, - InvalidArgument, - out, - "Failed to resize output tensor."); - - ET_KERNEL_CHECK_MSG( - ctx, - out.scalar_type() == executorch::aten::ScalarType::Bool, - InvalidArgument, - out, - "Expected out tensor to have dtype Bool, but got %" PRId8 " instead.", - static_cast(out.scalar_type())); - - ET_KERNEL_CHECK( - ctx, tensors_have_same_dim_order(in, out), InvalidArgument, out); - - const auto in_type = in.scalar_type(); - - ET_SWITCH_REALHBBF16_TYPES(in_type, ctx, __func__, CTYPE_IN, [&] { - apply_unary_map_fn( - [fn](const CTYPE_IN val_in) { return fn(val_in); }, - in.const_data_ptr(), - out.mutable_data_ptr(), - in.numel()); - }); - - return out; -} - -} // namespace internal -} // namespace native -} // namespace executor -} // namespace torch diff --git a/kernels/portable/cpu/pattern/unary_ufunc_realhbbf16_to_floathbf16.cpp b/kernels/portable/cpu/pattern/unary_ufunc_realhbbf16_to_floathbf16.cpp deleted file mode 100644 index 602b5b1bfd2..00000000000 --- a/kernels/portable/cpu/pattern/unary_ufunc_realhbbf16_to_floathbf16.cpp +++ /dev/null @@ -1,60 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -#include -#include -#include - -namespace torch { -namespace executor { -namespace native { -namespace internal { - -Tensor& unary_ufunc_realhbbf16_to_floathbf16( - double (*fn)(double), - KernelRuntimeContext& ctx, - const Tensor& in, - Tensor& out) { - (void)ctx; - - ET_KERNEL_CHECK(ctx, tensor_is_floating_type(out), InvalidArgument, out); - - // Resize for dynamic shape - ET_KERNEL_CHECK_MSG( - ctx, - resize_tensor(out, in.sizes()) == Error::Ok, - InvalidArgument, - out, - "Failed to resize output tensor."); - - ET_KERNEL_CHECK( - ctx, tensors_have_same_dim_order(in, out), InvalidArgument, out); - - const auto in_type = in.scalar_type(); - const auto out_type = out.scalar_type(); - - ET_SWITCH_REALHBBF16_TYPES(in_type, ctx, __func__, CTYPE_IN, [&] { - ET_SWITCH_FLOATHBF16_TYPES(out_type, ctx, __func__, CTYPE_OUT, [&] { - apply_unary_map_fn( - [fn](const CTYPE_IN val_in) { - CTYPE_OUT xi = static_cast(val_in); - return static_cast(fn(xi)); - }, - in.const_data_ptr(), - out.mutable_data_ptr(), - in.numel()); - }); - }); - - return out; -} - -} // namespace internal -} // namespace native -} // namespace executor -} // namespace torch diff --git a/kernels/portable/cpu/util/dtype_util.cpp b/kernels/portable/cpu/util/dtype_util.cpp index d240b9f83bc..81b1b203a54 100644 --- a/kernels/portable/cpu/util/dtype_util.cpp +++ b/kernels/portable/cpu/util/dtype_util.cpp @@ -23,10 +23,14 @@ bool check_tensor_dtype( return executorch::runtime::tensor_is_realhbbf16_type(t); case SupportedTensorDtypes::REALHBF16: return executorch::runtime::tensor_is_realhbf16_type(t); + case SupportedTensorDtypes::REALH: + return executorch::runtime::tensor_is_realh_type(t); case SupportedTensorDtypes::FLOATHBF16: return executorch::runtime::tensor_is_floating_type(t); case SupportedTensorDtypes::INTB: return executorch::runtime::tensor_is_integral_type(t, true); + case SupportedTensorDtypes::BOOL: + return executorch::runtime::tensor_is_type(t, ScalarType::Bool); case SupportedTensorDtypes::BOOL_OR_BYTE: return (executorch::runtime::tensor_is_type( t, ScalarType::Bool, ScalarType::Byte)); diff --git a/kernels/portable/cpu/util/dtype_util.h b/kernels/portable/cpu/util/dtype_util.h index 1f0e3403e82..ae9e9d31df5 100644 --- a/kernels/portable/cpu/util/dtype_util.h +++ b/kernels/portable/cpu/util/dtype_util.h @@ -51,6 +51,15 @@ load_to_common_fn get_load_to_common_fn_realhbf16( return result; } +template +load_to_common_fn get_load_to_common_fn_realh(const Tensor& t) { + CTYPE_COMMON (*result)(const void*) = nullptr; + ET_SWITCH_REALH_TYPES(t.scalar_type(), unused, op_name, TENSOR_CTYPE, [&]() { + result = internal::load_and_convert; + }); + return result; +} + template load_to_common_fn get_load_to_common_fn_floathbf16( const Tensor& t) { @@ -72,6 +81,16 @@ load_to_common_fn get_load_to_common_fn_intb(const Tensor& t) { return result; } +template +load_to_common_fn get_load_to_common_fn_bool(const Tensor& t) { + ET_CHECK_MSG( + t.scalar_type() == ScalarType::Bool, + "Unhandled dtype %s for %s", + ::executorch::runtime::toString(t.scalar_type()), + op_name); + return internal::load_and_convert; +} + template load_to_common_fn get_load_to_common_fn_bool_or_byte( const Tensor& t) { @@ -137,6 +156,16 @@ store_common_to_tensor_fn get_store_common_to_tensor_fn_realhbf16( return result; } +template +store_common_to_tensor_fn get_store_common_to_tensor_fn_realh( + const Tensor& t) { + void (*result)(CTYPE_COMMON, void*) = nullptr; + ET_SWITCH_REALH_TYPES(t.scalar_type(), unused, op_name, TENSOR_CTYPE, [&]() { + result = internal::convert_and_store; + }); + return result; +} + template store_common_to_tensor_fn get_store_common_to_tensor_fn_floathbf16(const Tensor& t) { @@ -159,6 +188,17 @@ store_common_to_tensor_fn get_store_common_to_tensor_fn_intb( return result; } +template +store_common_to_tensor_fn get_store_common_to_tensor_fn_bool( + const Tensor& t) { + ET_CHECK_MSG( + t.scalar_type() == ScalarType::Bool, + "Unhandled dtype %s for %s", + ::executorch::runtime::toString(t.scalar_type()), + op_name); + return internal::convert_and_store; +} + template store_common_to_tensor_fn get_store_common_to_tensor_fn_bool_or_byte(const Tensor& t) { @@ -191,8 +231,10 @@ get_store_common_to_tensor_fn_same_as_common(const Tensor& t) { enum class SupportedTensorDtypes { REALHBBF16, REALHBF16, + REALH, FLOATHBF16, INTB, + BOOL, BOOL_OR_BYTE, SAME_AS_COMPUTE, SAME_AS_COMMON, @@ -209,10 +251,14 @@ load_to_common_fn get_load_to_common_fn( return get_load_to_common_fn_realhbbf16(t); case SupportedTensorDtypes::REALHBF16: return get_load_to_common_fn_realhbf16(t); + case SupportedTensorDtypes::REALH: + return get_load_to_common_fn_realh(t); case SupportedTensorDtypes::FLOATHBF16: return get_load_to_common_fn_realhbf16(t); case SupportedTensorDtypes::INTB: return get_load_to_common_fn_intb(t); + case SupportedTensorDtypes::BOOL: + return get_load_to_common_fn_bool(t); case SupportedTensorDtypes::BOOL_OR_BYTE: return get_load_to_common_fn_bool_or_byte(t); case SupportedTensorDtypes::SAME_AS_COMPUTE: @@ -233,10 +279,14 @@ store_common_to_tensor_fn get_store_common_to_tensor_fn( return get_store_common_to_tensor_fn_realhbbf16(t); case SupportedTensorDtypes::REALHBF16: return get_store_common_to_tensor_fn_realhbf16(t); + case SupportedTensorDtypes::REALH: + return get_store_common_to_tensor_fn_realh(t); case SupportedTensorDtypes::FLOATHBF16: return get_store_common_to_tensor_fn_floathbf16(t); case SupportedTensorDtypes::INTB: return get_store_common_to_tensor_fn_intb(t); + case SupportedTensorDtypes::BOOL: + return get_store_common_to_tensor_fn_bool(t); case SupportedTensorDtypes::BOOL_OR_BYTE: return get_store_common_to_tensor_fn_bool_or_byte( t); diff --git a/kernels/test/UnaryUfuncRealHBBF16ToFloatHBF16Test.h b/kernels/test/UnaryUfuncRealHBBF16ToFloatHBF16Test.h index 6e49dd9e57b..64a52f47005 100644 --- a/kernels/test/UnaryUfuncRealHBBF16ToFloatHBF16Test.h +++ b/kernels/test/UnaryUfuncRealHBBF16ToFloatHBF16Test.h @@ -73,19 +73,17 @@ class UnaryUfuncRealHBBF16ToFloatHBF16Test : public OperatorTest { auto expected = tf_out.make({1, 6}, expected_vector); if (IN_DTYPE == ScalarType::BFloat16 || OUT_DTYPE == ScalarType::BFloat16) { double rtol = executorch::runtime::testing::internal::kDefaultRtol; - // It appears we need a higher tolerance for at least some ATen - // tests, like aten_op_acosh_test. - if (get_supported_features()->is_aten) { - rtol = 3e-3; - } + // Raise tolerance because both we and ATen run these + // computations at internal float32 precision rather than + // float64. + rtol = 3e-3; EXPECT_TENSOR_CLOSE_WITH_TOL(out, expected, rtol, executorch::runtime::testing::internal::kDefaultBFloat16Atol); } else if (IN_DTYPE == ScalarType::Half || OUT_DTYPE == ScalarType::Half) { double rtol = executorch::runtime::testing::internal::kDefaultRtol; - // It appears we need a higher tolerance for at least some ATen - // tests, like aten_op_acosh_test. - if (get_supported_features()->is_aten) { - rtol = 1e-3; - } + // Raise tolerance because both we and ATen run these + // computations at internal float32 precision rather than + // float64. + rtol = 1e-3; EXPECT_TENSOR_CLOSE_WITH_TOL(out, expected, rtol, executorch::runtime::testing::internal::kDefaultHalfAtol); } else { EXPECT_TENSOR_CLOSE(out, expected);