diff --git a/kernels/portable/cpu/op_acos.cpp b/kernels/portable/cpu/op_acos.cpp
index dac3b1546f3..bca315e642d 100644
--- a/kernels/portable/cpu/op_acos.cpp
+++ b/kernels/portable/cpu/op_acos.cpp
@@ -15,8 +15,9 @@ namespace executor {
 namespace native {
 
 Tensor& acos_out(KernelRuntimeContext& ctx, const Tensor& in, Tensor& out) {
-  return internal::unary_ufunc_realhbbf16_to_floathbf16(
-      std::acos, ctx, in, out);
+  static constexpr const char op_name[] = "acos.out";
+  return internal::unary_ufunc_realhbbf16_to_floathbf16<op_name>(
+      [](auto x) { return std::acos(x); }, ctx, in, out);
 }
 
 } // namespace native
diff --git a/kernels/portable/cpu/op_acosh.cpp b/kernels/portable/cpu/op_acosh.cpp
index 77f7edf4c5d..df6af2a449e 100644
--- a/kernels/portable/cpu/op_acosh.cpp
+++ b/kernels/portable/cpu/op_acosh.cpp
@@ -15,8 +15,9 @@ namespace executor {
 namespace native {
 
 Tensor& acosh_out(KernelRuntimeContext& ctx, const Tensor& in, Tensor& out) {
-  return internal::unary_ufunc_realhbbf16_to_floathbf16(
-      std::acosh, ctx, in, out);
+  static constexpr const char op_name[] = "acosh.out";
+  return internal::unary_ufunc_realhbbf16_to_floathbf16<op_name>(
+      [](auto x) { return std::acosh(x); }, ctx, in, out);
 }
 
 } // namespace native
diff --git a/kernels/portable/cpu/op_asin.cpp b/kernels/portable/cpu/op_asin.cpp
index 6affa6e4122..56c47988674 100644
--- a/kernels/portable/cpu/op_asin.cpp
+++ b/kernels/portable/cpu/op_asin.cpp
@@ -15,8 +15,9 @@ namespace executor {
 namespace native {
 
 Tensor& asin_out(KernelRuntimeContext& ctx, const Tensor& in, Tensor& out) {
-  return internal::unary_ufunc_realhbbf16_to_floathbf16(
-      std::asin, ctx, in, out);
+  static constexpr const char op_name[] = "asin.out";
+  return internal::unary_ufunc_realhbbf16_to_floathbf16<op_name>(
+      [](auto x) { return std::asin(x); }, ctx, in, out);
 }
 
 } // namespace native
diff --git a/kernels/portable/cpu/op_asinh.cpp b/kernels/portable/cpu/op_asinh.cpp
index bce8dcf6d5a..5bbf6e539e0 100644
--- a/kernels/portable/cpu/op_asinh.cpp
+++ b/kernels/portable/cpu/op_asinh.cpp
@@ -15,8 +15,9 @@ namespace executor {
 namespace native {
 
 Tensor& asinh_out(KernelRuntimeContext& ctx, const Tensor& in, Tensor& out) {
-  return internal::unary_ufunc_realhbbf16_to_floathbf16(
-      std::asinh, ctx, in, out);
+  static constexpr const char op_name[] = "asinh.out";
+  return internal::unary_ufunc_realhbbf16_to_floathbf16<op_name>(
+      [](auto x) { return std::asinh(x); }, ctx, in, out);
 }
 
 } // namespace native
diff --git a/kernels/portable/cpu/op_atan.cpp b/kernels/portable/cpu/op_atan.cpp
index 23549627a3b..ad1fdf25391 100644
--- a/kernels/portable/cpu/op_atan.cpp
+++ b/kernels/portable/cpu/op_atan.cpp
@@ -15,8 +15,9 @@ namespace executor {
 namespace native {
 
 Tensor& atan_out(KernelRuntimeContext& ctx, const Tensor& in, Tensor& out) {
-  return internal::unary_ufunc_realhbbf16_to_floathbf16(
-      std::atan, ctx, in, out);
+  static constexpr const char op_name[] = "atan.out";
+  return internal::unary_ufunc_realhbbf16_to_floathbf16<op_name>(
+      [](auto x) { return std::atan(x); }, ctx, in, out);
 }
 
 } // namespace native
diff --git a/kernels/portable/cpu/op_atanh.cpp b/kernels/portable/cpu/op_atanh.cpp
index 13e6e8ca141..03a6d627dfb 100644
--- a/kernels/portable/cpu/op_atanh.cpp
+++ b/kernels/portable/cpu/op_atanh.cpp
@@ -15,8 +15,9 @@ namespace executor {
 namespace native {
 
 Tensor& atanh_out(KernelRuntimeContext& ctx, const Tensor& in, Tensor& out) {
-  return internal::unary_ufunc_realhbbf16_to_floathbf16(
-      std::atanh, ctx, in, out);
+  static constexpr const char op_name[] = "atanh.out";
+  return internal::unary_ufunc_realhbbf16_to_floathbf16<op_name>(
+      [](auto x) { return std::atanh(x); }, ctx, in, out);
 }
 
 } // namespace native
diff --git a/kernels/portable/cpu/op_ceil.cpp b/kernels/portable/cpu/op_ceil.cpp
index e2d9f9d6029..1090aa842be 100644
--- a/kernels/portable/cpu/op_ceil.cpp
+++ b/kernels/portable/cpu/op_ceil.cpp
@@ -17,7 +17,9 @@ namespace native {
 using executorch::aten::Tensor;
 
 Tensor& ceil_out(KernelRuntimeContext& ctx, const Tensor& in, Tensor& out) {
-  return internal::unary_ufunc_realh(std::ceil, ctx, in, out);
+  static constexpr const char op_name[] = "ceil.out";
+  return internal::unary_ufunc_realh<op_name>(
+      [](auto x) { return std::ceil(x); }, ctx, in, out);
 }
 
 } // namespace native
diff --git a/kernels/portable/cpu/op_cos.cpp b/kernels/portable/cpu/op_cos.cpp
index e536060d162..ae779cb6868 100644
--- a/kernels/portable/cpu/op_cos.cpp
+++ b/kernels/portable/cpu/op_cos.cpp
@@ -15,7 +15,9 @@ namespace executor {
 namespace native {
 
 Tensor& cos_out(KernelRuntimeContext& ctx, const Tensor& in, Tensor& out) {
-  return internal::unary_ufunc_realhbbf16_to_floathbf16(std::cos, ctx, in, out);
+  static constexpr const char op_name[] = "cos.out";
+  return internal::unary_ufunc_realhbbf16_to_floathbf16<op_name>(
+      [](auto x) { return std::cos(x); }, ctx, in, out);
 }
 
 } // namespace native
diff --git a/kernels/portable/cpu/op_cosh.cpp b/kernels/portable/cpu/op_cosh.cpp
index e622bbe6fcd..e221e0d5c0c 100644
--- a/kernels/portable/cpu/op_cosh.cpp
+++ b/kernels/portable/cpu/op_cosh.cpp
@@ -15,8 +15,9 @@ namespace executor {
 namespace native {
 
 Tensor& cosh_out(KernelRuntimeContext& ctx, const Tensor& in, Tensor& out) {
-  return internal::unary_ufunc_realhbbf16_to_floathbf16(
-      std::cosh, ctx, in, out);
+  static constexpr const char op_name[] = "cosh.out";
+  return internal::unary_ufunc_realhbbf16_to_floathbf16<op_name>(
+      [](auto x) { return std::cosh(x); }, ctx, in, out);
 }
 
 } // namespace native
diff --git a/kernels/portable/cpu/op_erf.cpp b/kernels/portable/cpu/op_erf.cpp
index 6897bcda95b..5ad9590906b 100644
--- a/kernels/portable/cpu/op_erf.cpp
+++ b/kernels/portable/cpu/op_erf.cpp
@@ -15,7 +15,9 @@ namespace executor {
 namespace native {
 
 Tensor& erf_out(KernelRuntimeContext& ctx, const Tensor& in, Tensor& out) {
-  return internal::unary_ufunc_realhbbf16_to_floathbf16(std::erf, ctx, in, out);
+  static constexpr const char op_name[] = "erf.out";
+  return internal::unary_ufunc_realhbbf16_to_floathbf16<op_name>(
+      [](auto x) { return std::erf(x); }, ctx, in, out);
 }
 
 } // namespace native
diff --git a/kernels/portable/cpu/op_exp.cpp b/kernels/portable/cpu/op_exp.cpp
index cbfc8924cb0..52a6da016ac 100644
--- a/kernels/portable/cpu/op_exp.cpp
+++ b/kernels/portable/cpu/op_exp.cpp
@@ -15,7 +15,9 @@ namespace executor {
 namespace native {
 
 Tensor& exp_out(KernelRuntimeContext& ctx, const Tensor& in, Tensor& out) {
-  return internal::unary_ufunc_realhbbf16_to_floathbf16(std::exp, ctx, in, out);
+  static constexpr const char op_name[] = "exp.out";
+  return internal::unary_ufunc_realhbbf16_to_floathbf16<op_name>(
+      [](auto x) { return std::exp(x); }, ctx, in, out);
 }
 
 } // namespace native
diff --git a/kernels/portable/cpu/op_expm1.cpp b/kernels/portable/cpu/op_expm1.cpp
index f2d49f615b1..22cd9aace24 100644
--- a/kernels/portable/cpu/op_expm1.cpp
+++ b/kernels/portable/cpu/op_expm1.cpp
@@ -15,8 +15,9 @@ namespace executor {
 namespace native {
 
 Tensor& expm1_out(KernelRuntimeContext& ctx, const Tensor& in, Tensor& out) {
-  return internal::unary_ufunc_realhbbf16_to_floathbf16(
-      std::expm1, ctx, in, out);
+  static constexpr const char op_name[] = "expm1.out";
+  return internal::unary_ufunc_realhbbf16_to_floathbf16<op_name>(
+      [](auto x) { return std::expm1(x); }, ctx, in, out);
 }
 
 } // namespace native
diff --git a/kernels/portable/cpu/op_floor.cpp b/kernels/portable/cpu/op_floor.cpp
index f389ef06a79..be4ad2ea3ec 100644
--- a/kernels/portable/cpu/op_floor.cpp
+++ b/kernels/portable/cpu/op_floor.cpp
@@ -17,7 +17,9 @@ namespace native {
 using executorch::aten::Tensor;
 
 Tensor& floor_out(KernelRuntimeContext& ctx, const Tensor& in, Tensor& out) {
-  return internal::unary_ufunc_realh(std::floor, ctx, in, out);
+  static constexpr const char op_name[] = "floor.out";
+  return internal::unary_ufunc_realh<op_name>(
+      [](auto x) { return std::floor(x); }, ctx, in, out);
 }
 
 } // namespace native
diff --git a/kernels/portable/cpu/op_isinf.cpp b/kernels/portable/cpu/op_isinf.cpp
index 92d1e563a2e..42798231a84 100644
--- a/kernels/portable/cpu/op_isinf.cpp
+++ b/kernels/portable/cpu/op_isinf.cpp
@@ -17,8 +17,9 @@ namespace native {
 Tensor& isinf_out(KernelRuntimeContext& ctx, const Tensor& in, Tensor& out) {
   // Lambda is syntactic sugar needed to workaround compilation on some older
   // non-compatible distros where isnan is returning int rather than bool
-  return internal::unary_ufunc_realhb_to_bool(
-      [](double x) -> bool { return std::isinf(x); }, ctx, in, out);
+  static constexpr const char op_name[] = "isinf.out";
+  return internal::unary_ufunc_realhb_to_bool<op_name>(
+      [](auto x) -> bool { return std::isinf(x); }, ctx, in, out);
 }
 
 } // namespace native
diff --git a/kernels/portable/cpu/op_isnan.cpp b/kernels/portable/cpu/op_isnan.cpp
index 51e189992ee..817d314fd2b 100644
--- a/kernels/portable/cpu/op_isnan.cpp
+++ b/kernels/portable/cpu/op_isnan.cpp
@@ -17,8 +17,9 @@ namespace native {
 Tensor& isnan_out(KernelRuntimeContext& ctx, const Tensor& in, Tensor& out) {
   // Lambda is syntactic sugar needed to workaround compilation on some older
   // non-compatible distros where isnan is returning int rather than bool
-  return internal::unary_ufunc_realhb_to_bool(
-      [](double x) -> bool { return std::isnan(x); }, ctx, in, out);
+  static constexpr const char op_name[] = "isnan.out";
+  return internal::unary_ufunc_realhb_to_bool<op_name>(
+      [](auto x) -> bool { return std::isnan(x); }, ctx, in, out);
 }
 
 } // namespace native
diff --git a/kernels/portable/cpu/op_log.cpp b/kernels/portable/cpu/op_log.cpp
index 8a36bce8c49..d47c6bf0acb 100644
--- a/kernels/portable/cpu/op_log.cpp
+++ b/kernels/portable/cpu/op_log.cpp
@@ -15,7 +15,9 @@ namespace executor {
 namespace native {
 
 Tensor& log_out(KernelRuntimeContext& ctx, const Tensor& in, Tensor& out) {
-  return internal::unary_ufunc_realhbbf16_to_floathbf16(std::log, ctx, in, out);
+  static constexpr const char op_name[] = "log.out";
+  return internal::unary_ufunc_realhbbf16_to_floathbf16<op_name>(
+      [](auto x) { return std::log(x); }, ctx, in, out);
 }
 
 } // namespace native
diff --git a/kernels/portable/cpu/op_log10.cpp b/kernels/portable/cpu/op_log10.cpp
index 89f9b672476..39f17cdda88 100644
--- a/kernels/portable/cpu/op_log10.cpp
+++ b/kernels/portable/cpu/op_log10.cpp
@@ -15,8 +15,9 @@ namespace executor {
 namespace native {
 
 Tensor& log10_out(KernelRuntimeContext& ctx, const Tensor& in, Tensor& out) {
-  return internal::unary_ufunc_realhbbf16_to_floathbf16(
-      std::log10, ctx, in, out);
+  static constexpr const char op_name[] = "log10.out";
+  return internal::unary_ufunc_realhbbf16_to_floathbf16<op_name>(
+      [](auto x) { return std::log10(x); }, ctx, in, out);
 }
 
 } // namespace native
diff --git a/kernels/portable/cpu/op_log1p.cpp b/kernels/portable/cpu/op_log1p.cpp
index 2daa31e37ff..8cc1052081f 100644
--- a/kernels/portable/cpu/op_log1p.cpp
+++ b/kernels/portable/cpu/op_log1p.cpp
@@ -15,8 +15,9 @@ namespace executor {
 namespace native {
 
 Tensor& log1p_out(KernelRuntimeContext& ctx, const Tensor& in, Tensor& out) {
-  return internal::unary_ufunc_realhbbf16_to_floathbf16(
-      std::log1p, ctx, in, out);
+  static constexpr const char op_name[] = "log1p.out";
+  return internal::unary_ufunc_realhbbf16_to_floathbf16<op_name>(
+      [](auto x) { return std::log1p(x); }, ctx, in, out);
 }
 
 } // namespace native
diff --git a/kernels/portable/cpu/op_log2.cpp b/kernels/portable/cpu/op_log2.cpp
index 4d7406832e4..ec85bcbc24f 100644
--- a/kernels/portable/cpu/op_log2.cpp
+++ b/kernels/portable/cpu/op_log2.cpp
@@ -15,8 +15,9 @@ namespace executor {
 namespace native {
 
 Tensor& log2_out(KernelRuntimeContext& ctx, const Tensor& in, Tensor& out) {
-  return internal::unary_ufunc_realhbbf16_to_floathbf16(
-      std::log2, ctx, in, out);
+  static constexpr const char op_name[] = "log2.out";
+  return internal::unary_ufunc_realhbbf16_to_floathbf16<op_name>(
+      [](auto x) { return std::log2(x); }, ctx, in, out);
 }
 
 } // namespace native
diff --git a/kernels/portable/cpu/op_reciprocal.cpp b/kernels/portable/cpu/op_reciprocal.cpp
index f22f9883858..c1ebd96ae4e 100644
--- a/kernels/portable/cpu/op_reciprocal.cpp
+++ b/kernels/portable/cpu/op_reciprocal.cpp
@@ -14,16 +14,27 @@ namespace executor {
 namespace native {
 namespace {
 
+float reciprocal(float x) {
+  return 1.0f / x;
+}
+
 double reciprocal(double x) {
   return 1.0 / x;
 }
 
+template <
+    typename Integer,
+    std::enable_if_t<std::is_integral_v<Integer>, bool> = true>
+double reciprocal(Integer x) {
+  return reciprocal((double)x);
+}
 } // namespace
 
 Tensor&
 reciprocal_out(KernelRuntimeContext& ctx, const Tensor& in, Tensor& out) {
-  return internal::unary_ufunc_realhbbf16_to_floathbf16(
-      reciprocal, ctx, in, out);
+  static constexpr const char op_name[] = "reciprocal.out";
+  return internal::unary_ufunc_realhbbf16_to_floathbf16<op_name>(
+      [](auto x) { return reciprocal(x); }, ctx, in, out);
 }
 
 } // namespace native
diff --git a/kernels/portable/cpu/op_rsqrt.cpp b/kernels/portable/cpu/op_rsqrt.cpp
index 19c4c6c1a57..628b72e9f19 100644
--- a/kernels/portable/cpu/op_rsqrt.cpp
+++ b/kernels/portable/cpu/op_rsqrt.cpp
@@ -12,16 +12,17 @@
 namespace torch {
 namespace executor {
 namespace native {
-namespace {
-
-double rsqrt(double x) {
-  return 1.0 / std::sqrt(x);
-}
-
-} // namespace
 
 Tensor& rsqrt_out(KernelRuntimeContext& ctx, const Tensor& in, Tensor& out) {
-  return internal::unary_ufunc_realhbbf16_to_floathbf16(rsqrt, ctx, in, out);
+  static constexpr const char op_name[] = "rsqrt.out";
+  return internal::unary_ufunc_realhbbf16_to_floathbf16<op_name>(
+      [](auto x) {
+        auto result = std::sqrt(x);
+        return static_cast<decltype(result)>(1) / result;
+      },
+      ctx,
+      in,
+      out);
 }
 
 } // namespace native
diff --git a/kernels/portable/cpu/op_sin.cpp b/kernels/portable/cpu/op_sin.cpp
index ad65c4be18b..85d43abd62f 100644
--- a/kernels/portable/cpu/op_sin.cpp
+++ b/kernels/portable/cpu/op_sin.cpp
@@ -15,7 +15,9 @@ namespace executor {
 namespace native {
 
 Tensor& sin_out(KernelRuntimeContext& ctx, const Tensor& in, Tensor& out) {
-  return internal::unary_ufunc_realhbbf16_to_floathbf16(std::sin, ctx, in, out);
+  static constexpr const char op_name[] = "sin.out";
+  return internal::unary_ufunc_realhbbf16_to_floathbf16<op_name>(
+      [](auto x) { return std::sin(x); }, ctx, in, out);
 }
 
 } // namespace native
diff --git a/kernels/portable/cpu/op_sinh.cpp b/kernels/portable/cpu/op_sinh.cpp
index 21666392392..9e75e66ea56 100644
--- a/kernels/portable/cpu/op_sinh.cpp
+++ b/kernels/portable/cpu/op_sinh.cpp
@@ -15,8 +15,9 @@ namespace executor {
 namespace native {
 
 Tensor& sinh_out(KernelRuntimeContext& ctx, const Tensor& in, Tensor& out) {
-  return internal::unary_ufunc_realhbbf16_to_floathbf16(
-      std::sinh, ctx, in, out);
+  static constexpr const char op_name[] = "sinh.out";
+  return internal::unary_ufunc_realhbbf16_to_floathbf16<op_name>(
+      [](auto x) { return std::sinh(x); }, ctx, in, out);
 }
 
 } // namespace native
diff --git a/kernels/portable/cpu/op_sqrt.cpp b/kernels/portable/cpu/op_sqrt.cpp
index bd2075f5b04..c80eee203f2 100644
--- a/kernels/portable/cpu/op_sqrt.cpp
+++ b/kernels/portable/cpu/op_sqrt.cpp
@@ -15,8 +15,9 @@ namespace executor {
 namespace native {
 
 Tensor& sqrt_out(KernelRuntimeContext& ctx, const Tensor& in, Tensor& out) {
-  return internal::unary_ufunc_realhbbf16_to_floathbf16(
-      std::sqrt, ctx, in, out);
+  static constexpr const char op_name[] = "sqrt.out";
+  return internal::unary_ufunc_realhbbf16_to_floathbf16<op_name>(
+      [](auto x) { return std::sqrt(x); }, ctx, in, out);
 }
 
 } // namespace native
diff --git a/kernels/portable/cpu/op_tan.cpp b/kernels/portable/cpu/op_tan.cpp
index a2b921d5146..37f241745c5 100644
--- a/kernels/portable/cpu/op_tan.cpp
+++ b/kernels/portable/cpu/op_tan.cpp
@@ -15,7 +15,9 @@ namespace executor {
 namespace native {
 
 Tensor& tan_out(KernelRuntimeContext& ctx, const Tensor& in, Tensor& out) {
-  return internal::unary_ufunc_realhbbf16_to_floathbf16(std::tan, ctx, in, out);
+  static constexpr const char op_name[] = "tan.out";
+  return internal::unary_ufunc_realhbbf16_to_floathbf16<op_name>(
+      [](auto x) { return std::tan(x); }, ctx, in, out);
 }
 
 } // namespace native
diff --git a/kernels/portable/cpu/op_tanh.cpp b/kernels/portable/cpu/op_tanh.cpp
index ae9f93dc62c..d0b0597abaa 100644
--- a/kernels/portable/cpu/op_tanh.cpp
+++ b/kernels/portable/cpu/op_tanh.cpp
@@ -15,8 +15,9 @@ namespace executor {
 namespace native {
 
 Tensor& tanh_out(KernelRuntimeContext& ctx, const Tensor& in, Tensor& out) {
-  return internal::unary_ufunc_realhbbf16_to_floathbf16(
-      std::tanh, ctx, in, out);
+  static constexpr const char op_name[] = "tanh.out";
+  return internal::unary_ufunc_realhbbf16_to_floathbf16<op_name>(
+      [](auto x) { return std::tanh(x); }, ctx, in, out);
 }
 
 } // namespace native
diff --git a/kernels/portable/cpu/op_trunc.cpp b/kernels/portable/cpu/op_trunc.cpp
index a14a2b18e2f..284099b494b 100644
--- a/kernels/portable/cpu/op_trunc.cpp
+++ b/kernels/portable/cpu/op_trunc.cpp
@@ -15,7 +15,9 @@ namespace executor {
 namespace native {
 
 Tensor& trunc_out(KernelRuntimeContext& ctx, const Tensor& in, Tensor& out) {
-  return internal::unary_ufunc_realh(std::trunc, ctx, in, out);
+  static constexpr const char op_name[] = "trunc.out";
+  return internal::unary_ufunc_realh<op_name>(
+      [](auto x) { return std::trunc(x); }, ctx, in, out);
 }
 
 } // namespace native
diff --git a/kernels/portable/cpu/pattern/pattern.h b/kernels/portable/cpu/pattern/pattern.h
index 83d4db92a1e..eae8a746d0e 100644
--- a/kernels/portable/cpu/pattern/pattern.h
+++ b/kernels/portable/cpu/pattern/pattern.h
@@ -46,6 +46,7 @@ question is a bit more specific, then add a descriptive sufix. */
 
 #pragma once
 
+#include <executorch/kernels/portable/cpu/util/elementwise_util.h>
 #include <executorch/runtime/kernel/kernel_includes.h>
 
 namespace torch {
@@ -53,29 +54,70 @@ namespace executor {
 namespace native {
 namespace internal {
 
+// Implementation detail for the other helpers in this header. Returns
+// true on success, false on failure.
+bool check_and_resize_inputs(
+    KernelRuntimeContext& ctx,
+    const Tensor& in,
+    Tensor& out);
+
 /**
  * Implements an op pattern for ops that take a single input tensor of any
- * realh dtye, no additional arguments, and outputs a tensor of the same size
+ * realh dtype, no additional arguments, and outputs a tensor of the same size
  * and dtype. The function fn specifies the math operation which is applied to
  * the input tensor element-wise.
  */
+template <const char* op_name, typename Op>
 Tensor& unary_ufunc_realh(
-    double (*fn)(double),
+    const Op& fn,
     KernelRuntimeContext& ctx,
     const Tensor& in,
-    Tensor& out);
+    Tensor& out) {
+  if (!check_and_resize_inputs(ctx, in, out)) {
+    return out;
+  }
+  ET_KERNEL_CHECK(
+      ctx, tensors_have_same_shape_and_dtype(in, out), InvalidArgument, out);
+
+  ET_SWITCH_REALH_TYPES(in.scalar_type(), ctx, op_name, CTYPE, [&] {
+    utils::apply_unitensor_elementwise_fn<CTYPE, op_name>(
+        fn,
+        ctx,
+        in,
+        utils::SupportedTensorDtypes::REALH,
+        out,
+        utils::SupportedTensorDtypes::SAME_AS_COMMON);
+  });
+  return out;
+}
 
 /**
  * Implements an op pattern for ops that take a single input tensor of any
- * realhb dtye (real, half and boolean), no additional arguments, and outputs a
+ * realhb dtype (real, half and boolean), no additional arguments, and outputs a
  * boolean tensor of the same size. The function fn specifies the math
  * operation which is applied to the input tensor element-wise.
  */
+template <const char* op_name, typename Op>
 Tensor& unary_ufunc_realhb_to_bool(
-    bool (*fn)(double),
+    const Op& fn,
     KernelRuntimeContext& ctx,
     const Tensor& in,
-    Tensor& out);
+    Tensor& out) {
+  if (!check_and_resize_inputs(ctx, in, out)) {
+    return out;
+  }
+  ET_SWITCH_REALHBBF16_TYPES(in.scalar_type(), ctx, op_name, CTYPE_IN, [&] {
+    utils::apply_unitensor_elementwise_fn<CTYPE_IN, op_name>(
+        [fn](const CTYPE_IN val_in) { return fn(val_in); },
+        ctx,
+        in,
+        utils::SupportedTensorDtypes::REALHBBF16,
+        out,
+        utils::SupportedTensorDtypes::BOOL);
+  });
+
+  return out;
+}
 
 /**
  * Implements an op pattern for ops that take a single input tensor of any
@@ -83,11 +125,30 @@ Tensor& unary_ufunc_realhb_to_bool(
  * outputs a floating point tensor of the same size. The function fn specifies
  * the math operation which is applied to the input tensor element-wise.
  */
+template <const char* op_name, typename Op>
 Tensor& unary_ufunc_realhbbf16_to_floathbf16(
-    double (*fn)(double),
+    const Op& fn,
     KernelRuntimeContext& ctx,
     const Tensor& in,
-    Tensor& out);
+    Tensor& out) {
+  ET_KERNEL_CHECK(ctx, tensor_is_floating_type(out), InvalidArgument, out);
+
+  if (!check_and_resize_inputs(ctx, in, out)) {
+    return out;
+  }
+
+  ET_SWITCH_REALHBBF16_TYPES(in.scalar_type(), ctx, op_name, CTYPE_IN, [&] {
+    utils::apply_unitensor_elementwise_fn<CTYPE_IN, op_name>(
+        [fn](const CTYPE_IN val_in) { return fn(val_in); },
+        ctx,
+        in,
+        utils::SupportedTensorDtypes::REALHBBF16,
+        out,
+        utils::SupportedTensorDtypes::FLOATHBF16);
+  });
+
+  return out;
+}
 
 } // namespace internal
 } // namespace native
diff --git a/kernels/portable/cpu/pattern/targets.bzl b/kernels/portable/cpu/pattern/targets.bzl
index 1de3035776e..7330d9e4570 100644
--- a/kernels/portable/cpu/pattern/targets.bzl
+++ b/kernels/portable/cpu/pattern/targets.bzl
@@ -50,17 +50,17 @@ def define_common_targets():
     runtime.cxx_library(
         name = "pattern",
         srcs = [
-            "unary_ufunc_realhb_to_bool.cpp",
-            "unary_ufunc_realhbbf16_to_floathbf16.cpp",
-            "unary_ufunc_realh.cpp",
+            "pattern.cpp",
         ],
         exported_headers = [
             "pattern.h",
         ],
         compiler_flags = ["-Wno-missing-prototypes"],
+        exported_deps = [
+            "//executorch/kernels/portable/cpu/util:elementwise_util",
+        ],
         deps = [
             "//executorch/kernels/portable/cpu/util:broadcast_util",
-            "//executorch/kernels/portable/cpu/util:functional_util",
             "//executorch/runtime/kernel:kernel_includes",
         ],
         visibility = ["//executorch/kernels/portable/cpu/...", "//executorch/kernels/optimized/cpu/..."],
diff --git a/kernels/portable/cpu/pattern/unary_ufunc_realh.cpp b/kernels/portable/cpu/pattern/unary_ufunc_realh.cpp
deleted file mode 100644
index 16d847ace31..00000000000
--- a/kernels/portable/cpu/pattern/unary_ufunc_realh.cpp
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- * Copyright (c) Meta Platforms, Inc. and affiliates.
- * All rights reserved.
- *
- * This source code is licensed under the BSD-style license found in the
- * LICENSE file in the root directory of this source tree.
- */
-
-#include <executorch/kernels/portable/cpu/pattern/pattern.h>
-#include <executorch/kernels/portable/cpu/util/functional_util.h>
-#include <executorch/runtime/kernel/kernel_includes.h>
-
-namespace torch {
-namespace executor {
-namespace native {
-namespace internal {
-
-Tensor& unary_ufunc_realh(
-    double (*fn)(double),
-    KernelRuntimeContext& ctx,
-    const Tensor& in,
-    Tensor& out) {
-  (void)ctx;
-
-  // Resize for dynamic shape
-  ET_KERNEL_CHECK_MSG(
-      ctx,
-      resize_tensor(out, in.sizes()) == Error::Ok,
-      InvalidArgument,
-      out,
-      "Failed to resize output tensor.");
-
-  ET_KERNEL_CHECK(
-      ctx, tensors_have_same_shape_and_dtype(in, out), InvalidArgument, out);
-
-  ET_KERNEL_CHECK(
-      ctx, tensors_have_same_dim_order(in, out), InvalidArgument, out);
-
-  ET_SWITCH_REALH_TYPES(in.scalar_type(), ctx, __func__, CTYPE, [&] {
-    apply_unary_map_fn(
-        [fn](const CTYPE val_in) { return static_cast<CTYPE>(fn(val_in)); },
-        in.const_data_ptr<CTYPE>(),
-        out.mutable_data_ptr<CTYPE>(),
-        in.numel());
-  });
-
-  return out;
-}
-
-} // namespace internal
-} // namespace native
-} // namespace executor
-} // namespace torch
diff --git a/kernels/portable/cpu/pattern/unary_ufunc_realhb_to_bool.cpp b/kernels/portable/cpu/pattern/unary_ufunc_realhb_to_bool.cpp
deleted file mode 100644
index 367137ad02c..00000000000
--- a/kernels/portable/cpu/pattern/unary_ufunc_realhb_to_bool.cpp
+++ /dev/null
@@ -1,60 +0,0 @@
-/*
- * Copyright (c) Meta Platforms, Inc. and affiliates.
- * All rights reserved.
- *
- * This source code is licensed under the BSD-style license found in the
- * LICENSE file in the root directory of this source tree.
- */
-
-#include <executorch/kernels/portable/cpu/pattern/pattern.h>
-#include <executorch/kernels/portable/cpu/util/functional_util.h>
-#include <executorch/runtime/kernel/kernel_includes.h>
-
-namespace torch {
-namespace executor {
-namespace native {
-namespace internal {
-
-Tensor& unary_ufunc_realhb_to_bool(
-    bool (*fn)(double),
-    KernelRuntimeContext& ctx,
-    const Tensor& in,
-    Tensor& out) {
-  (void)ctx;
-
-  // Resize for dynamic shape
-  ET_KERNEL_CHECK_MSG(
-      ctx,
-      resize_tensor(out, in.sizes()) == Error::Ok,
-      InvalidArgument,
-      out,
-      "Failed to resize output tensor.");
-
-  ET_KERNEL_CHECK_MSG(
-      ctx,
-      out.scalar_type() == executorch::aten::ScalarType::Bool,
-      InvalidArgument,
-      out,
-      "Expected out tensor to have dtype Bool, but got %" PRId8 " instead.",
-      static_cast<int8_t>(out.scalar_type()));
-
-  ET_KERNEL_CHECK(
-      ctx, tensors_have_same_dim_order(in, out), InvalidArgument, out);
-
-  const auto in_type = in.scalar_type();
-
-  ET_SWITCH_REALHBBF16_TYPES(in_type, ctx, __func__, CTYPE_IN, [&] {
-    apply_unary_map_fn(
-        [fn](const CTYPE_IN val_in) { return fn(val_in); },
-        in.const_data_ptr<CTYPE_IN>(),
-        out.mutable_data_ptr<bool>(),
-        in.numel());
-  });
-
-  return out;
-}
-
-} // namespace internal
-} // namespace native
-} // namespace executor
-} // namespace torch
diff --git a/kernels/portable/cpu/pattern/unary_ufunc_realhbbf16_to_floathbf16.cpp b/kernels/portable/cpu/pattern/unary_ufunc_realhbbf16_to_floathbf16.cpp
deleted file mode 100644
index 602b5b1bfd2..00000000000
--- a/kernels/portable/cpu/pattern/unary_ufunc_realhbbf16_to_floathbf16.cpp
+++ /dev/null
@@ -1,60 +0,0 @@
-/*
- * Copyright (c) Meta Platforms, Inc. and affiliates.
- * All rights reserved.
- *
- * This source code is licensed under the BSD-style license found in the
- * LICENSE file in the root directory of this source tree.
- */
-
-#include <executorch/kernels/portable/cpu/pattern/pattern.h>
-#include <executorch/kernels/portable/cpu/util/functional_util.h>
-#include <executorch/runtime/kernel/kernel_includes.h>
-
-namespace torch {
-namespace executor {
-namespace native {
-namespace internal {
-
-Tensor& unary_ufunc_realhbbf16_to_floathbf16(
-    double (*fn)(double),
-    KernelRuntimeContext& ctx,
-    const Tensor& in,
-    Tensor& out) {
-  (void)ctx;
-
-  ET_KERNEL_CHECK(ctx, tensor_is_floating_type(out), InvalidArgument, out);
-
-  // Resize for dynamic shape
-  ET_KERNEL_CHECK_MSG(
-      ctx,
-      resize_tensor(out, in.sizes()) == Error::Ok,
-      InvalidArgument,
-      out,
-      "Failed to resize output tensor.");
-
-  ET_KERNEL_CHECK(
-      ctx, tensors_have_same_dim_order(in, out), InvalidArgument, out);
-
-  const auto in_type = in.scalar_type();
-  const auto out_type = out.scalar_type();
-
-  ET_SWITCH_REALHBBF16_TYPES(in_type, ctx, __func__, CTYPE_IN, [&] {
-    ET_SWITCH_FLOATHBF16_TYPES(out_type, ctx, __func__, CTYPE_OUT, [&] {
-      apply_unary_map_fn(
-          [fn](const CTYPE_IN val_in) {
-            CTYPE_OUT xi = static_cast<CTYPE_OUT>(val_in);
-            return static_cast<CTYPE_OUT>(fn(xi));
-          },
-          in.const_data_ptr<CTYPE_IN>(),
-          out.mutable_data_ptr<CTYPE_OUT>(),
-          in.numel());
-    });
-  });
-
-  return out;
-}
-
-} // namespace internal
-} // namespace native
-} // namespace executor
-} // namespace torch
diff --git a/kernels/portable/cpu/util/dtype_util.cpp b/kernels/portable/cpu/util/dtype_util.cpp
index d240b9f83bc..81b1b203a54 100644
--- a/kernels/portable/cpu/util/dtype_util.cpp
+++ b/kernels/portable/cpu/util/dtype_util.cpp
@@ -23,10 +23,14 @@ bool check_tensor_dtype(
       return executorch::runtime::tensor_is_realhbbf16_type(t);
     case SupportedTensorDtypes::REALHBF16:
       return executorch::runtime::tensor_is_realhbf16_type(t);
+    case SupportedTensorDtypes::REALH:
+      return executorch::runtime::tensor_is_realh_type(t);
     case SupportedTensorDtypes::FLOATHBF16:
       return executorch::runtime::tensor_is_floating_type(t);
     case SupportedTensorDtypes::INTB:
       return executorch::runtime::tensor_is_integral_type(t, true);
+    case SupportedTensorDtypes::BOOL:
+      return executorch::runtime::tensor_is_type(t, ScalarType::Bool);
     case SupportedTensorDtypes::BOOL_OR_BYTE:
       return (executorch::runtime::tensor_is_type(
           t, ScalarType::Bool, ScalarType::Byte));
diff --git a/kernels/portable/cpu/util/dtype_util.h b/kernels/portable/cpu/util/dtype_util.h
index 1f0e3403e82..ae9e9d31df5 100644
--- a/kernels/portable/cpu/util/dtype_util.h
+++ b/kernels/portable/cpu/util/dtype_util.h
@@ -51,6 +51,15 @@ load_to_common_fn<CTYPE_COMMON> get_load_to_common_fn_realhbf16(
   return result;
 }
 
+template <typename CTYPE_COMMON, const char* op_name>
+load_to_common_fn<CTYPE_COMMON> get_load_to_common_fn_realh(const Tensor& t) {
+  CTYPE_COMMON (*result)(const void*) = nullptr;
+  ET_SWITCH_REALH_TYPES(t.scalar_type(), unused, op_name, TENSOR_CTYPE, [&]() {
+    result = internal::load_and_convert<CTYPE_COMMON, TENSOR_CTYPE>;
+  });
+  return result;
+}
+
 template <typename CTYPE_COMMON, const char* op_name>
 load_to_common_fn<CTYPE_COMMON> get_load_to_common_fn_floathbf16(
     const Tensor& t) {
@@ -72,6 +81,16 @@ load_to_common_fn<CTYPE_COMMON> get_load_to_common_fn_intb(const Tensor& t) {
   return result;
 }
 
+template <typename CTYPE_COMMON, const char* op_name>
+load_to_common_fn<CTYPE_COMMON> get_load_to_common_fn_bool(const Tensor& t) {
+  ET_CHECK_MSG(
+      t.scalar_type() == ScalarType::Bool,
+      "Unhandled dtype %s for %s",
+      ::executorch::runtime::toString(t.scalar_type()),
+      op_name);
+  return internal::load_and_convert<CTYPE_COMMON, bool>;
+}
+
 template <typename CTYPE_COMMON, const char* op_name>
 load_to_common_fn<CTYPE_COMMON> get_load_to_common_fn_bool_or_byte(
     const Tensor& t) {
@@ -137,6 +156,16 @@ store_common_to_tensor_fn<CTYPE_COMMON> get_store_common_to_tensor_fn_realhbf16(
   return result;
 }
 
+template <typename CTYPE_COMMON, const char* op_name>
+store_common_to_tensor_fn<CTYPE_COMMON> get_store_common_to_tensor_fn_realh(
+    const Tensor& t) {
+  void (*result)(CTYPE_COMMON, void*) = nullptr;
+  ET_SWITCH_REALH_TYPES(t.scalar_type(), unused, op_name, TENSOR_CTYPE, [&]() {
+    result = internal::convert_and_store<TENSOR_CTYPE, CTYPE_COMMON>;
+  });
+  return result;
+}
+
 template <typename CTYPE_COMMON, const char* op_name>
 store_common_to_tensor_fn<CTYPE_COMMON>
 get_store_common_to_tensor_fn_floathbf16(const Tensor& t) {
@@ -159,6 +188,17 @@ store_common_to_tensor_fn<CTYPE_COMMON> get_store_common_to_tensor_fn_intb(
   return result;
 }
 
+template <typename CTYPE_COMMON, const char* op_name>
+store_common_to_tensor_fn<CTYPE_COMMON> get_store_common_to_tensor_fn_bool(
+    const Tensor& t) {
+  ET_CHECK_MSG(
+      t.scalar_type() == ScalarType::Bool,
+      "Unhandled dtype %s for %s",
+      ::executorch::runtime::toString(t.scalar_type()),
+      op_name);
+  return internal::convert_and_store<bool, CTYPE_COMMON>;
+}
+
 template <typename CTYPE_COMMON, const char* op_name>
 store_common_to_tensor_fn<CTYPE_COMMON>
 get_store_common_to_tensor_fn_bool_or_byte(const Tensor& t) {
@@ -191,8 +231,10 @@ get_store_common_to_tensor_fn_same_as_common(const Tensor& t) {
 enum class SupportedTensorDtypes {
   REALHBBF16,
   REALHBF16,
+  REALH,
   FLOATHBF16,
   INTB,
+  BOOL,
   BOOL_OR_BYTE,
   SAME_AS_COMPUTE,
   SAME_AS_COMMON,
@@ -209,10 +251,14 @@ load_to_common_fn<CTYPE_COMMON> get_load_to_common_fn(
       return get_load_to_common_fn_realhbbf16<CTYPE_COMMON, op_name>(t);
     case SupportedTensorDtypes::REALHBF16:
       return get_load_to_common_fn_realhbf16<CTYPE_COMMON, op_name>(t);
+    case SupportedTensorDtypes::REALH:
+      return get_load_to_common_fn_realh<CTYPE_COMMON, op_name>(t);
     case SupportedTensorDtypes::FLOATHBF16:
       return get_load_to_common_fn_realhbf16<CTYPE_COMMON, op_name>(t);
     case SupportedTensorDtypes::INTB:
       return get_load_to_common_fn_intb<CTYPE_COMMON, op_name>(t);
+    case SupportedTensorDtypes::BOOL:
+      return get_load_to_common_fn_bool<CTYPE_COMMON, op_name>(t);
     case SupportedTensorDtypes::BOOL_OR_BYTE:
       return get_load_to_common_fn_bool_or_byte<CTYPE_COMMON, op_name>(t);
     case SupportedTensorDtypes::SAME_AS_COMPUTE:
@@ -233,10 +279,14 @@ store_common_to_tensor_fn<CTYPE_COMMON> get_store_common_to_tensor_fn(
       return get_store_common_to_tensor_fn_realhbbf16<CTYPE_COMMON, op_name>(t);
     case SupportedTensorDtypes::REALHBF16:
       return get_store_common_to_tensor_fn_realhbf16<CTYPE_COMMON, op_name>(t);
+    case SupportedTensorDtypes::REALH:
+      return get_store_common_to_tensor_fn_realh<CTYPE_COMMON, op_name>(t);
     case SupportedTensorDtypes::FLOATHBF16:
       return get_store_common_to_tensor_fn_floathbf16<CTYPE_COMMON, op_name>(t);
     case SupportedTensorDtypes::INTB:
       return get_store_common_to_tensor_fn_intb<CTYPE_COMMON, op_name>(t);
+    case SupportedTensorDtypes::BOOL:
+      return get_store_common_to_tensor_fn_bool<CTYPE_COMMON, op_name>(t);
     case SupportedTensorDtypes::BOOL_OR_BYTE:
       return get_store_common_to_tensor_fn_bool_or_byte<CTYPE_COMMON, op_name>(
           t);
diff --git a/kernels/test/UnaryUfuncRealHBBF16ToFloatHBF16Test.h b/kernels/test/UnaryUfuncRealHBBF16ToFloatHBF16Test.h
index 6e49dd9e57b..64a52f47005 100644
--- a/kernels/test/UnaryUfuncRealHBBF16ToFloatHBF16Test.h
+++ b/kernels/test/UnaryUfuncRealHBBF16ToFloatHBF16Test.h
@@ -73,19 +73,17 @@ class UnaryUfuncRealHBBF16ToFloatHBF16Test : public OperatorTest {
     auto expected = tf_out.make({1, 6}, expected_vector);
     if (IN_DTYPE == ScalarType::BFloat16 || OUT_DTYPE == ScalarType::BFloat16) {
       double rtol = executorch::runtime::testing::internal::kDefaultRtol;
-      // It appears we need a higher tolerance for at least some ATen
-      // tests, like aten_op_acosh_test.
-      if (get_supported_features()->is_aten) {
-        rtol = 3e-3;
-      }
+      // Raise tolerance because both we and ATen run these
+      // computations at internal float32 precision rather than
+      // float64.
+      rtol = 3e-3;
       EXPECT_TENSOR_CLOSE_WITH_TOL(out, expected, rtol, executorch::runtime::testing::internal::kDefaultBFloat16Atol);
     } else if (IN_DTYPE == ScalarType::Half || OUT_DTYPE == ScalarType::Half) {
       double rtol = executorch::runtime::testing::internal::kDefaultRtol;
-      // It appears we need a higher tolerance for at least some ATen
-      // tests, like aten_op_acosh_test.
-      if (get_supported_features()->is_aten) {
-        rtol = 1e-3;
-      }
+      // Raise tolerance because both we and ATen run these
+      // computations at internal float32 precision rather than
+      // float64.
+      rtol = 1e-3;
       EXPECT_TENSOR_CLOSE_WITH_TOL(out, expected, rtol, executorch::runtime::testing::internal::kDefaultHalfAtol);
     } else {
       EXPECT_TENSOR_CLOSE(out, expected);