|
15 | 15 | # limitations under the License.
|
16 | 16 |
|
17 | 17 |
|
18 |
| -import dpnp |
19 | 18 | import numpy as np
|
20 | 19 |
|
21 | 20 | import dpctl
|
22 | 21 | import dpctl.tensor as dpt
|
23 | 22 | from dpctl import SyclTimer
|
24 | 23 |
|
25 |
| -n = 4000 |
| 24 | + |
| 25 | +def matmul(m1, m2): |
| 26 | + """Naive matrix multiplication implementation""" |
| 27 | + assert m1.ndim == 2 |
| 28 | + assert m2.ndim == 2 |
| 29 | + assert m1.shape[1] == m2.shape[0] |
| 30 | + m1 = m1[:, dpt.newaxis, :] |
| 31 | + m2 = dpt.permute_dims(m2, (1, 0))[dpt.newaxis, :, :] |
| 32 | + # form m_prod[i, j, k] = m1[i,k] * m2[k, j] |
| 33 | + m_prods = m1 * m2 |
| 34 | + # sum over k |
| 35 | + return dpt.sum(m_prods, axis=-1) |
| 36 | + |
| 37 | + |
| 38 | +n = 500 |
26 | 39 |
|
27 | 40 | try:
|
28 | 41 | q = dpctl.SyclQueue(property="enable_profiling")
|
|
33 | 46 | )
|
34 | 47 | exit(0)
|
35 | 48 |
|
36 |
| -a = dpt.reshape(dpt.arange(n * n, dtype=np.float32, sycl_queue=q), (n, n)) |
37 |
| -b = dpt.reshape( |
38 |
| - dpt.asarray(np.random.random(n * n), dtype=np.float32, sycl_queue=q), (n, n) |
39 |
| -) |
| 49 | +a_flat = dpt.arange(n * n, dtype=dpt.float32, sycl_queue=q) |
| 50 | +a = dpt.reshape(a_flat, (n, n)) |
40 | 51 |
|
41 |
| -timer = SyclTimer(time_scale=1) |
| 52 | +b_rand = np.random.random(n * n).astype(np.float32) |
| 53 | +b_flat = dpt.asarray(b_rand, dtype=dpt.float32, sycl_queue=q) |
| 54 | +b = dpt.reshape(b_flat, (n, n)) |
42 | 55 |
|
43 | 56 | wall_times = []
|
44 | 57 | device_times = []
|
| 58 | + |
45 | 59 | print(
|
46 |
| - f"Performing matrix multiplication of two {n} by {n} matrices " |
| 60 | + f"Computing naive matrix multiplication of two {n} by {n} matrices " |
47 | 61 | f"on {q.sycl_device.name}, repeating 5 times."
|
48 | 62 | )
|
| 63 | +print() |
49 | 64 | for _ in range(5):
|
| 65 | + timer = SyclTimer(time_scale=1) |
50 | 66 | with timer(q):
|
51 |
| - a_matmul_b = dpnp.matmul(a, b) |
| 67 | + a_matmul_b = matmul(a, b) |
52 | 68 | host_time, device_time = timer.dt
|
53 | 69 | wall_times.append(host_time)
|
54 | 70 | device_times.append(device_time)
|
55 | 71 |
|
56 |
| -c = dpnp.asnumpy(a_matmul_b) |
57 |
| -cc = np.dot(dpnp.asnumpy(a), dpnp.asnumpy(b)) |
| 72 | +c = dpt.asnumpy(a_matmul_b) |
| 73 | +cc = np.dot(dpt.asnumpy(a), dpt.asnumpy(b)) |
58 | 74 |
|
59 | 75 | print("Wall time: ", wall_times, "\nDevice time: ", device_times)
|
| 76 | +print() |
60 | 77 | print(
|
61 | 78 | "Accuracy test: passed."
|
62 | 79 | if np.allclose(c, cc)
|
63 |
| - else (f"Accuracy test: failed. Discrepancy {np.max(np.abs(c-cc))}") |
| 80 | + else (f"Accuracy test: FAILED. \n Discrepancy = {np.max(np.abs(c-cc))}") |
64 | 81 | )
|
0 commit comments