Fix axis0 calls in reduction Python binding (#1459)

ndgrigorian · web-flow · commit 02e7714c8041 · 2023-10-27T15:52:04.000-07:00
* max and min now use MinMaxAtomicSupportFactory These functions were using ArithmeticAtomicSupportFactory, which disables atomics for floating point types * Resolves #1455 This issue was caused by a typo where when the `axis0` kernels for tree and atomic reductions would be called, the `axis1` kernel would be called instead * Adds tests for #1455 resolution
diff --git a/dpctl/tensor/libtensor/source/reductions/reduction_atomic_support.hpp b/dpctl/tensor/libtensor/source/reductions/reduction_atomic_support.hpp
@@ -117,12 +117,12 @@ template <typename fnT, typename T> struct MinMaxAtomicSupportFactory
 };
 
 template <typename fnT, typename T>
-struct MaxAtomicSupportFactory : public ArithmeticAtomicSupportFactory<fnT, T>
+struct MaxAtomicSupportFactory : public MinMaxAtomicSupportFactory<fnT, T>
 {
 };
 
 template <typename fnT, typename T>
-struct MinAtomicSupportFactory : public ArithmeticAtomicSupportFactory<fnT, T>
+struct MinAtomicSupportFactory : public MinMaxAtomicSupportFactory<fnT, T>
 {
 };
 
diff --git a/dpctl/tensor/libtensor/source/reductions/reduction_over_axis.hpp b/dpctl/tensor/libtensor/source/reductions/reduction_over_axis.hpp
@@ -417,10 +417,10 @@ std::pair<sycl::event, sycl::event> py_reduction_over_axis(
                 typename std::remove_all_extents<contig_fnT>::type;
             contig_fn_ptr_T fn;
             if (supports_atomics) {
-                fn = axis1_atomic_dispatch_table[src_typeid][dst_typeid];
+                fn = axis0_atomic_dispatch_table[src_typeid][dst_typeid];
             }
             else {
-                fn = axis1_temps_dispatch_table[src_typeid][dst_typeid];
+                fn = axis0_temps_dispatch_table[src_typeid][dst_typeid];
             }
             if (fn != nullptr) {
                 sycl::event reduction_over_axis0_contig_ev =
@@ -727,7 +727,7 @@ std::pair<sycl::event, sycl::event> py_tree_reduction_over_axis(
             }
         }
         else if (mat_reduce_over_axis0) {
-            auto fn = axis1_temps_dispatch_table[src_typeid][dst_typeid];
+            auto fn = axis0_temps_dispatch_table[src_typeid][dst_typeid];
             if (fn != nullptr) {
                 sycl::event reduction_over_axis0_contig_ev =
                     fn(exec_q, iter_nelems, reduction_nelems, src.get_data(),
@@ -929,7 +929,6 @@ std::pair<sycl::event, sycl::event> py_search_over_axis(
     }
 
     using dpctl::tensor::py_internal::simplify_iteration_space;
-    using dpctl::tensor::py_internal::simplify_iteration_space_1;
 
     auto const &src_shape_vecs = src.get_shape_vector();
     auto const &src_strides_vecs = src.get_strides_vector();
diff --git a/dpctl/tests/test_tensor_sum.py b/dpctl/tests/test_tensor_sum.py
@@ -212,6 +212,36 @@ def test_axis0_bug():
     assert dpt.all(s == expected)
 
 
+def test_sum_axis1_axis0():
+    """See gh-1455"""
+    get_queue_or_skip()
+
+    # The atomic case is checked in `test_usm_ndarray_reductions`
+    # This test checks the tree reduction path for correctness
+    x = dpt.reshape(dpt.arange(3 * 4 * 5, dtype="f4"), (3, 4, 5))
+
+    m = dpt.sum(x, axis=0)
+    expected = dpt.asarray(
+        [
+            [60, 63, 66, 69, 72],
+            [75, 78, 81, 84, 87],
+            [90, 93, 96, 99, 102],
+            [105, 108, 111, 114, 117],
+        ],
+        dtype="f4",
+    )
+    tol = dpt.finfo(m.dtype).resolution
+    assert dpt.allclose(m, expected, atol=tol, rtol=tol)
+
+    x = dpt.flip(x, axis=2)
+    m = dpt.sum(x, axis=2)
+    expected = dpt.asarray(
+        [[10, 35, 60, 85], [110, 135, 160, 185], [210, 235, 260, 285]],
+        dtype="f4",
+    )
+    assert dpt.allclose(m, expected, atol=tol, rtol=tol)
+
+
 def _any_complex(dtypes):
     return any(dpt.isdtype(dpt.dtype(dt), "complex floating") for dt in dtypes)
 
diff --git a/dpctl/tests/test_usm_ndarray_reductions.py b/dpctl/tests/test_usm_ndarray_reductions.py
@@ -61,6 +61,20 @@ def test_max_min_axis():
     assert dpt.all(m == x[:, 0, 0, :, 0])
 
 
+def test_max_axis1_axis0():
+    """See gh-1455"""
+    get_queue_or_skip()
+
+    x = dpt.reshape(dpt.arange(3 * 4 * 5), (3, 4, 5))
+
+    m = dpt.max(x, axis=0)
+    assert dpt.all(m == x[-1, :, :])
+
+    x = dpt.flip(x, axis=2)
+    m = dpt.max(x, axis=2)
+    assert dpt.all(m == x[:, :, 0])
+
+
 def test_reduction_keepdims():
     get_queue_or_skip()
 
@@ -440,3 +454,28 @@ def test_hypot_complex():
     x = dpt.zeros(1, dtype="c8")
     with pytest.raises(TypeError):
         dpt.reduce_hypot(x)
+
+
+def test_tree_reduction_axis1_axis0():
+    """See gh-1455"""
+    get_queue_or_skip()
+
+    x = dpt.reshape(dpt.arange(3 * 4 * 5, dtype="f4"), (3, 4, 5))
+
+    m = dpt.logsumexp(x, axis=0)
+    tol = dpt.finfo(m.dtype).resolution
+    assert_allclose(
+        dpt.asnumpy(m),
+        np.logaddexp.reduce(dpt.asnumpy(x), axis=0, dtype=m.dtype),
+        rtol=tol,
+        atol=tol,
+    )
+
+    x = dpt.flip(x, axis=2)
+    m = dpt.logsumexp(x, axis=2)
+    assert_allclose(
+        dpt.asnumpy(m),
+        np.logaddexp.reduce(dpt.asnumpy(x), axis=2, dtype=m.dtype),
+        rtol=tol,
+        atol=tol,
+    )

Original file line number	Diff line number	Diff line change
`@@ -117,12 +117,12 @@ template <typename fnT, typename T> struct MinMaxAtomicSupportFactory`
`117`	`117`	`};`
`118`	`118`
`119`	`119`	`template <typename fnT, typename T>`
`120`		`-struct MaxAtomicSupportFactory : public ArithmeticAtomicSupportFactory<fnT, T>`
	`120`	`+struct MaxAtomicSupportFactory : public MinMaxAtomicSupportFactory<fnT, T>`
`121`	`121`	`{`
`122`	`122`	`};`
`123`	`123`
`124`	`124`	`template <typename fnT, typename T>`
`125`		`-struct MinAtomicSupportFactory : public ArithmeticAtomicSupportFactory<fnT, T>`
	`125`	`+struct MinAtomicSupportFactory : public MinMaxAtomicSupportFactory<fnT, T>`
`126`	`126`	`{`
`127`	`127`	`};`
`128`	`128`
Original file line number	Diff line number	Diff line change
`@@ -417,10 +417,10 @@ std::pair<sycl::event, sycl::event> py_reduction_over_axis(`
`417`	`417`	`typename std::remove_all_extents<contig_fnT>::type;`
`418`	`418`	`contig_fn_ptr_T fn;`
`419`	`419`	`if (supports_atomics) {`
`420`		`- fn = axis1_atomic_dispatch_table[src_typeid][dst_typeid];`
	`420`	`+ fn = axis0_atomic_dispatch_table[src_typeid][dst_typeid];`
`421`	`421`	`}`
`422`	`422`	`else {`
`423`		`- fn = axis1_temps_dispatch_table[src_typeid][dst_typeid];`
	`423`	`+ fn = axis0_temps_dispatch_table[src_typeid][dst_typeid];`
`424`	`424`	`}`
`425`	`425`	`if (fn != nullptr) {`
`426`	`426`	`sycl::event reduction_over_axis0_contig_ev =`
`@@ -727,7 +727,7 @@ std::pair<sycl::event, sycl::event> py_tree_reduction_over_axis(`
`727`	`727`	`}`
`728`	`728`	`}`
`729`	`729`	`else if (mat_reduce_over_axis0) {`
`730`		`- auto fn = axis1_temps_dispatch_table[src_typeid][dst_typeid];`
	`730`	`+ auto fn = axis0_temps_dispatch_table[src_typeid][dst_typeid];`
`731`	`731`	`if (fn != nullptr) {`
`732`	`732`	`sycl::event reduction_over_axis0_contig_ev =`
`733`	`733`	`fn(exec_q, iter_nelems, reduction_nelems, src.get_data(),`
`@@ -929,7 +929,6 @@ std::pair<sycl::event, sycl::event> py_search_over_axis(`
`929`	`929`	`}`
`930`	`930`
`931`	`931`	`using dpctl::tensor::py_internal::simplify_iteration_space;`
`932`		`- using dpctl::tensor::py_internal::simplify_iteration_space_1;`
`933`	`932`
`934`	`933`	`auto const &src_shape_vecs = src.get_shape_vector();`
`935`	`934`	`auto const &src_strides_vecs = src.get_strides_vector();`