From 80f0463dc266658ccfb00b05dd3207108c198462 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Tue, 18 Apr 2023 12:17:57 -0500 Subject: [PATCH] Return events for computational tasks, rather than temp-clean up host tasks This resolves a hang that Anton has uncovered. The gist of the bug is ``` for i in range(op_count): v[i] = dpt.empty_like(a[i], order='F', dtype=v_type) ht_copy_ev[i], copy_ev = ti._copy_usm_ndarray_into_usm_ndarray(src=a[i], dst=v[i], sycl_queue=a_sycl_queue) di._wait(copy_ev) ``` Here is `di` is a Pybind11 extensions, whose function `_wait` was implemented as ``` m.def("_wait", [](sycl::event e) -> void { e.wait(); }, ""); ``` The script would hang on the second iteration of the loop if `di._wait` call. The hang disappears after the change in this commit. Out attempts to build a C++ reproducer have not been successful thus far. --- dpctl/tensor/libtensor/source/boolean_advanced_indexing.cpp | 3 +-- dpctl/tensor/libtensor/source/copy_and_cast_usm_to_usm.cpp | 2 +- dpctl/tensor/libtensor/source/copy_for_reshape.cpp | 2 +- dpctl/tensor/libtensor/source/integer_advanced_indexing.cpp | 4 ++-- dpctl/tensor/libtensor/source/triul_ctor.cpp | 5 ++--- dpctl/tensor/libtensor/source/where.cpp | 2 +- 6 files changed, 8 insertions(+), 10 deletions(-) diff --git a/dpctl/tensor/libtensor/source/boolean_advanced_indexing.cpp b/dpctl/tensor/libtensor/source/boolean_advanced_indexing.cpp index ef7847d8c2..168e29acff 100644 --- a/dpctl/tensor/libtensor/source/boolean_advanced_indexing.cpp +++ b/dpctl/tensor/libtensor/source/boolean_advanced_indexing.cpp @@ -988,8 +988,7 @@ std::pair py_nonzero( sycl::event py_obj_management_host_task_ev = dpctl::utils::keep_args_alive( exec_q, {cumsum, indexes}, host_task_events); - return std::make_pair(py_obj_management_host_task_ev, - temporaries_cleanup_ev); + return std::make_pair(py_obj_management_host_task_ev, non_zero_indexes_ev); } } // namespace py_internal diff --git a/dpctl/tensor/libtensor/source/copy_and_cast_usm_to_usm.cpp b/dpctl/tensor/libtensor/source/copy_and_cast_usm_to_usm.cpp index 72272ff356..1edbb52dfb 100644 --- a/dpctl/tensor/libtensor/source/copy_and_cast_usm_to_usm.cpp +++ b/dpctl/tensor/libtensor/source/copy_and_cast_usm_to_usm.cpp @@ -272,7 +272,7 @@ copy_usm_ndarray_into_usm_ndarray(dpctl::tensor::usm_ndarray src, host_task_events.push_back(temporaries_cleanup_ev); return std::make_pair(keep_args_alive(exec_q, {src, dst}, host_task_events), - temporaries_cleanup_ev); + copy_and_cast_generic_ev); } void init_copy_and_cast_usm_to_usm_dispatch_tables(void) diff --git a/dpctl/tensor/libtensor/source/copy_for_reshape.cpp b/dpctl/tensor/libtensor/source/copy_for_reshape.cpp index dfd2c85b52..c3ed489bb5 100644 --- a/dpctl/tensor/libtensor/source/copy_for_reshape.cpp +++ b/dpctl/tensor/libtensor/source/copy_for_reshape.cpp @@ -167,7 +167,7 @@ copy_usm_ndarray_for_reshape(dpctl::tensor::usm_ndarray src, host_task_events.push_back(temporaries_cleanup_ev); return std::make_pair(keep_args_alive(exec_q, {src, dst}, host_task_events), - temporaries_cleanup_ev); + copy_for_reshape_event); } void init_copy_for_reshape_dispatch_vectors(void) diff --git a/dpctl/tensor/libtensor/source/integer_advanced_indexing.cpp b/dpctl/tensor/libtensor/source/integer_advanced_indexing.cpp index cd6b2f658b..ee6e741b7a 100644 --- a/dpctl/tensor/libtensor/source/integer_advanced_indexing.cpp +++ b/dpctl/tensor/libtensor/source/integer_advanced_indexing.cpp @@ -540,7 +540,7 @@ usm_ndarray_take(dpctl::tensor::usm_ndarray src, sycl::event arg_cleanup_ev = keep_args_alive(exec_q, {src, py_ind, dst}, host_task_events); - return std::make_pair(arg_cleanup_ev, temporaries_cleanup_ev); + return std::make_pair(arg_cleanup_ev, take_generic_ev); } std::pair @@ -854,7 +854,7 @@ usm_ndarray_put(dpctl::tensor::usm_ndarray dst, sycl::event arg_cleanup_ev = keep_args_alive(exec_q, {dst, py_ind, val}, host_task_events); - return std::make_pair(arg_cleanup_ev, temporaries_cleanup_ev); + return std::make_pair(arg_cleanup_ev, put_generic_ev); } void init_advanced_indexing_dispatch_tables(void) diff --git a/dpctl/tensor/libtensor/source/triul_ctor.cpp b/dpctl/tensor/libtensor/source/triul_ctor.cpp index 7c1a0e5cd9..686cc77032 100644 --- a/dpctl/tensor/libtensor/source/triul_ctor.cpp +++ b/dpctl/tensor/libtensor/source/triul_ctor.cpp @@ -202,7 +202,7 @@ usm_ndarray_triul(sycl::queue exec_q, } auto temporaries_cleanup_ev = exec_q.submit([&](sycl::handler &cgh) { - cgh.depends_on({tri_ev}); + cgh.depends_on(tri_ev); auto ctx = exec_q.get_context(); cgh.host_task( [shp_host_shape_and_strides, dev_shape_and_strides, ctx]() { @@ -213,8 +213,7 @@ usm_ndarray_triul(sycl::queue exec_q, }); return std::make_pair( - keep_args_alive(exec_q, {src, dst}, {temporaries_cleanup_ev}), - temporaries_cleanup_ev); + keep_args_alive(exec_q, {src, dst}, {temporaries_cleanup_ev}), tri_ev); } void init_triul_ctor_dispatch_vectors(void) diff --git a/dpctl/tensor/libtensor/source/where.cpp b/dpctl/tensor/libtensor/source/where.cpp index 5c9933f537..3122eb31d8 100644 --- a/dpctl/tensor/libtensor/source/where.cpp +++ b/dpctl/tensor/libtensor/source/where.cpp @@ -244,7 +244,7 @@ py_where(dpctl::tensor::usm_ndarray condition, sycl::event arg_cleanup_ev = keep_args_alive(exec_q, {x1, x2, condition, dst}, host_task_events); - return std::make_pair(arg_cleanup_ev, temporaries_cleanup_ev); + return std::make_pair(arg_cleanup_ev, where_ev); } void init_where_dispatch_tables(void)