Skip to content

Commit 13907ac

Browse files
committed
push to remote effectively
1 parent f933fe2 commit 13907ac

File tree

4 files changed

+177
-26
lines changed

4 files changed

+177
-26
lines changed

cuda_core/tests/notes

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
how to query the devices available on the system
2+
how to copy memory from device to host and vice versa, the copy to copy from take buffers which aren't exposed

cuda_core/tests/test_device.py

Lines changed: 19 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,11 @@
22
from cuda.core.experimental._device import Device
33
from cuda.core.experimental._utils import handle_return, ComputeCapability, CUDAError, \
44
precondition
5+
import pytest
6+
7+
@pytest.fixture(scope='module')
8+
def init_cuda():
9+
Device().set_current()
510

611
def test_device_initialization():
712
device = Device()
@@ -11,6 +16,15 @@ def test_device_repr():
1116
device = Device()
1217
assert str(device).startswith('<Device 0')
1318

19+
def test_device_alloc():
20+
device = Device()
21+
device.set_current()
22+
buffer = device.allocate(1024)
23+
device.sync()
24+
assert buffer.handle != 0
25+
assert buffer.size == 1024
26+
assert buffer.device_id == 0
27+
1428
def test_device_set_current():
1529
device = Device()
1630
device.set_current()
@@ -20,14 +34,13 @@ def test_device_create_stream():
2034
stream = device.create_stream()
2135
assert stream is not None
2236

23-
2437
def test_pci_bus_id():
25-
device = Device(0)
38+
device = Device()
2639
bus_id = handle_return(cudart.cudaDeviceGetPCIBusId(13, device.device_id))
2740
assert device.pci_bus_id == bus_id[:12].decode()
2841

2942
def test_uuid():
30-
device = Device(0)
43+
device = Device()
3144
driver_ver = handle_return(cuda.cuDriverGetVersion())
3245
if driver_ver >= 11040:
3346
uuid = handle_return(cuda.cuDeviceGetUuid_v2(device.device_id))
@@ -38,16 +51,16 @@ def test_uuid():
3851
assert device.uuid == expected_uuid
3952

4053
def test_name():
41-
device = Device(0)
54+
device = Device()
4255
name = handle_return(cuda.cuDeviceGetName(128, device.device_id))
4356
name = name.split(b'\0')[0]
4457
assert device.name == name.decode()
4558

4659
def test_compute_capability():
47-
device = Device(0)
60+
device = Device()
4861
major = handle_return(cudart.cudaDeviceGetAttribute(
4962
cudart.cudaDeviceAttr.cudaDevAttrComputeCapabilityMajor, device.device_id))
5063
minor = handle_return(cudart.cudaDeviceGetAttribute(
5164
cudart.cudaDeviceAttr.cudaDevAttrComputeCapabilityMinor, device.device_id))
5265
expected_cc = ComputeCapability(major, minor)
53-
assert device.compute_capability == expected_cc
66+
assert device.compute_capability == expected_cc

cuda_core/tests/test_memory.py

Lines changed: 148 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -4,17 +4,72 @@
44
from cuda.core.experimental._device import Device
55
from cuda import cuda
66
from cuda.core.experimental._utils import handle_return
7+
import ctypes
78

8-
class DummyMemoryResource(MemoryResource):
9-
def __init__(self):
9+
@pytest.fixture(scope='module')
10+
def init_cuda():
11+
Device().set_current()
12+
13+
class DummyDeviceMemoryResource(MemoryResource):
14+
def __init__(self, device):
15+
self.device = device
1016
pass
1117

1218
def allocate(self, size, stream=None) -> Buffer:
1319
ptr = handle_return(cuda.cuMemAlloc(size))
1420
return Buffer(ptr=ptr, size=size, mr=self)
1521

1622
def deallocate(self, ptr, size, stream=None):
17-
handle_return(cuda.cuMemFree(ptr))
23+
cuda.cuMemFree(ptr)
24+
25+
@property
26+
def is_device_accessible(self) -> bool:
27+
return True
28+
29+
@property
30+
def is_host_accessible(self) -> bool:
31+
return False
32+
33+
@property
34+
def device_id(self) -> int:
35+
return 0
36+
37+
class DummyHostMemoryResource(MemoryResource):
38+
def __init__(self):
39+
pass
40+
41+
def allocate(self, size, stream=None) -> Buffer:
42+
# Allocate a ctypes buffer of size `size`
43+
ptr = (ctypes.c_byte * size)()
44+
return Buffer(ptr=ptr, size=size, mr=self)
45+
46+
def deallocate(self, ptr, size, stream=None):
47+
#the memory is deallocated per the ctypes deallocation at garbage collection time
48+
pass
49+
50+
@property
51+
def is_device_accessible(self) -> bool:
52+
return False
53+
54+
@property
55+
def is_host_accessible(self) -> bool:
56+
return True
57+
58+
@property
59+
def device_id(self) -> int:
60+
raise RuntimeError("the pinned memory resource is not bound to any GPU")
61+
62+
class DummyUnifiedMemoryResource(MemoryResource):
63+
def __init__(self, device):
64+
self.device = device
65+
pass
66+
67+
def allocate(self, size, stream=None) -> Buffer:
68+
ptr = handle_return(cuda.cuMemAllocManaged(size, cuda.CUmemAttach_flags.CU_MEM_ATTACH_GLOBAL.value))
69+
return Buffer(ptr=ptr, size=size, mr=self)
70+
71+
def deallocate(self, ptr, size, stream=None):
72+
cuda.cuMemFree(ptr)
1873

1974
@property
2075
def is_device_accessible(self) -> bool:
@@ -28,44 +83,117 @@ def is_host_accessible(self) -> bool:
2883
def device_id(self) -> int:
2984
return 0
3085

31-
def test_buffer_initialization():
32-
dummy_mr = DummyMemoryResource()
86+
class DummyPinnedMemoryResource(MemoryResource):
87+
def __init__(self, device):
88+
self.device = device
89+
pass
90+
91+
def allocate(self, size, stream=None) -> Buffer:
92+
ptr = handle_return(cuda.cuMemAllocHost(size))
93+
return Buffer(ptr=ptr, size=size, mr=self)
94+
95+
def deallocate(self, ptr, size, stream=None):
96+
cuda.cuMemFreeHost(ptr)
97+
98+
@property
99+
def is_device_accessible(self) -> bool:
100+
return True
101+
102+
@property
103+
def is_host_accessible(self) -> bool:
104+
return True
105+
106+
@property
107+
def device_id(self) -> int:
108+
raise RuntimeError("the pinned memory resource is not bound to any GPU")
109+
110+
def buffer_initialization(dummy_mr : MemoryResource):
33111
buffer = dummy_mr.allocate(size=1024)
34112
assert buffer.handle != 0
35113
assert buffer.size == 1024
36114
assert buffer.memory_resource == dummy_mr
37-
assert buffer.is_device_accessible == True
38-
assert buffer.is_host_accessible == True
39-
assert buffer.device_id == 0
115+
assert buffer.is_device_accessible == dummy_mr.is_device_accessible
116+
assert buffer.is_host_accessible == dummy_mr.is_host_accessible
40117
dummy_mr.deallocate(buffer.handle, buffer.size)
41118

42-
def test_buffer_copy_to():
43-
dummy_mr = DummyMemoryResource()
44-
src_buffer = dummy_mr.allocate(size=1024)
45-
dst_buffer = dummy_mr.allocate(size=1024)
119+
def test_buffer_initialization():
46120
device = Device()
47121
device.set_current()
122+
buffer_initialization(DummyDeviceMemoryResource(device))
123+
buffer_initialization(DummyHostMemoryResource())
124+
buffer_initialization(DummyUnifiedMemoryResource(device))
125+
buffer_initialization(DummyPinnedMemoryResource(device))
126+
127+
def buffer_copy_to(dummy_mr : MemoryResource, device : Device, check = False):
128+
src_buffer = dummy_mr.allocate(size=1024)
129+
dst_buffer = dummy_mr.allocate(size=1024)
48130
stream = device.create_stream()
131+
132+
if check:
133+
src_ptr = ctypes.cast(src_buffer.handle, ctypes.POINTER(ctypes.c_byte))
134+
for i in range(1024):
135+
src_ptr[i] = ctypes.c_byte(i)
136+
49137
src_buffer.copy_to(dst_buffer, stream=stream)
50-
# Assuming cuMemcpyAsync is correctly called, we can't directly check the result here
138+
device.sync()
139+
140+
if check:
141+
dst_ptr = ctypes.cast(dst_buffer.handle, ctypes.POINTER(ctypes.c_byte))
142+
143+
for i in range(10):
144+
assert dst_ptr[i] == src_ptr[i]
145+
51146
dummy_mr.deallocate(src_buffer.handle, src_buffer.size)
52147
dummy_mr.deallocate(dst_buffer.handle, dst_buffer.size)
53148

54-
def test_buffer_copy_from():
55-
dummy_mr = DummyMemoryResource()
56-
src_buffer = dummy_mr.allocate(size=1024)
57-
dst_buffer = dummy_mr.allocate(size=1024)
149+
def test_buffer_copy_to():
58150
device = Device()
59151
device.set_current()
152+
buffer_copy_to(DummyDeviceMemoryResource(device), device)
153+
buffer_copy_to(DummyUnifiedMemoryResource(device), device)
154+
buffer_copy_to(DummyPinnedMemoryResource(device), device, check = True)
155+
156+
def buffer_copy_from(dummy_mr : MemoryResource, device, check = False):
157+
src_buffer = dummy_mr.allocate(size=1024)
158+
dst_buffer = dummy_mr.allocate(size=1024)
60159
stream = device.create_stream()
160+
161+
if check:
162+
src_ptr = ctypes.cast(src_buffer.handle, ctypes.POINTER(ctypes.c_byte))
163+
for i in range(1024):
164+
src_ptr[i] = ctypes.c_byte(i)
165+
61166
dst_buffer.copy_from(src_buffer, stream=stream)
62-
# Assuming cuMemcpyAsync is correctly called, we can't directly check the result here
167+
device.sync()
168+
169+
if check:
170+
dst_ptr = ctypes.cast(dst_buffer.handle, ctypes.POINTER(ctypes.c_byte))
171+
172+
for i in range(10):
173+
assert dst_ptr[i] == src_ptr[i]
174+
63175
dummy_mr.deallocate(src_buffer.handle, src_buffer.size)
64176
dummy_mr.deallocate(dst_buffer.handle, dst_buffer.size)
65177

66-
def test_buffer_close():
67-
dummy_mr = DummyMemoryResource()
178+
def test_buffer_copy_from():
179+
device = Device()
180+
device.set_current()
181+
buffer_copy_from(DummyDeviceMemoryResource(device), device)
182+
buffer_copy_from(DummyUnifiedMemoryResource(device), device)
183+
buffer_copy_from(DummyPinnedMemoryResource(device), device, check = True)
184+
185+
def buffer_close(dummy_mr : MemoryResource):
68186
buffer = dummy_mr.allocate(size=1024)
69187
buffer.close()
70188
assert buffer.handle == 0
71189
assert buffer.memory_resource == None
190+
191+
def test_buffer_close():
192+
device = Device()
193+
device.set_current()
194+
buffer_close(DummyDeviceMemoryResource(device))
195+
buffer_close(DummyHostMemoryResource())
196+
buffer_close(DummyUnifiedMemoryResource(device))
197+
buffer_close(DummyPinnedMemoryResource(device))
198+
199+
test_buffer_copy_to()

cuda_core/tests/test_program.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,11 @@
22
from cuda import nvrtc
33
from cuda.core.experimental._program import Program
44
from cuda.core.experimental._module import ObjectCode, Kernel
5+
from cuda.core.experimental._device import Device
6+
7+
@pytest.fixture(scope='module')
8+
def init_cuda():
9+
Device().set_current()
510

611
def test_program_init_valid_code_type():
712
code = "extern \"C\" __global__ void my_kernel() {}"
@@ -48,3 +53,6 @@ def test_program_close():
4853
program = Program(code, "c++")
4954
program.close()
5055
assert program.handle is None
56+
57+
Device().set_current()
58+
test_program_compile_valid_target_type()

0 commit comments

Comments
 (0)