@@ -64,16 +64,46 @@ static sycl::event div_impl(sycl::queue exec_q,
64
64
{
65
65
type_utils::validate_type_for_device<T>(exec_q);
66
66
67
- const T* a = reinterpret_cast <const T*>(in_a);
68
- const T* b = reinterpret_cast <const T*>(in_b);
69
- T* y = reinterpret_cast <T*>(out_y);
67
+ std::cerr << " enter div_impl" << std::endl;
70
68
71
- return mkl_vm::div (exec_q,
69
+ const T* _a = reinterpret_cast <const T*>(in_a);
70
+ const T* _b = reinterpret_cast <const T*>(in_b);
71
+ T* _y = reinterpret_cast <T*>(out_y);
72
+
73
+ std::cerr << " casting is done" << std::endl;
74
+
75
+ T* a = sycl::malloc_device<T>(n, exec_q);
76
+ T* b = sycl::malloc_device<T>(n, exec_q);
77
+ T* y = sycl::malloc_device<T>(n, exec_q);
78
+
79
+ std::cerr << " malloc is done" << std::endl;
80
+
81
+ exec_q.copy (_a, a, n).wait ();
82
+ exec_q.copy (_b, b, n).wait ();
83
+ exec_q.copy (_y, y, n).wait ();
84
+
85
+ std::cerr << " copy is done" << std::endl;
86
+
87
+ sycl::event ev = mkl_vm::div (exec_q,
72
88
n, // number of elements to be calculated
73
89
a, // pointer `a` containing 1st input vector of size n
74
90
b, // pointer `b` containing 2nd input vector of size n
75
91
y, // pointer `y` to the output vector of size n
76
92
depends);
93
+ ev.wait ();
94
+
95
+ std::cerr << " div is done" << std::endl;
96
+
97
+ exec_q.copy (y, _y, n).wait ();
98
+
99
+ std::cerr << " copy is done" << std::endl;
100
+
101
+ sycl::free (a, exec_q);
102
+ sycl::free (b, exec_q);
103
+ sycl::free (y, exec_q);
104
+
105
+ std::cerr << " leaving div_impl" << std::endl;
106
+ return sycl::event ();
77
107
}
78
108
79
109
std::pair<sycl::event, sycl::event> div (sycl::queue exec_q,
@@ -175,9 +205,21 @@ std::pair<sycl::event, sycl::event> div(sycl::queue exec_q,
175
205
throw py::value_error (" No div implementation defined" );
176
206
}
177
207
sycl::event sum_ev = div_fn (exec_q, src_nelems, src1_data, src2_data, dst_data, depends);
178
-
179
- sycl::event ht_ev = dpctl::utils::keep_args_alive (exec_q, {src1, src2, dst}, {sum_ev});
180
- return std::make_pair (ht_ev, sum_ev);
208
+ // sum_ev.wait();
209
+
210
+ // int* dummy = sycl::malloc_device<int>(1, exec_q);
211
+ // sycl::event cleanup_ev = exec_q.submit([&](sycl::handler& cgh) {
212
+ // // cgh.depends_on(sum_ev);
213
+ // auto ctx = exec_q.get_context();
214
+ // cgh.host_task([dummy, ctx]() {
215
+ // // dummy host task to pass into keep_args_alive
216
+ // sycl::free(dummy, ctx);
217
+ // });
218
+ // });
219
+
220
+ // sycl::event ht_ev = dpctl::utils::keep_args_alive(exec_q, {src1, src2, dst}, {sum_ev});
221
+ // return std::make_pair(ht_ev, sum_ev);
222
+ return std::make_pair (sycl::event (), sycl::event ());
181
223
}
182
224
183
225
bool can_call_div (sycl::queue exec_q,
0 commit comments