4
4
from cuda .core .experimental ._device import Device
5
5
from cuda import cuda
6
6
from cuda .core .experimental ._utils import handle_return
7
+ import ctypes
7
8
8
- class DummyMemoryResource (MemoryResource ):
9
- def __init__ (self ):
9
+ @pytest .fixture (scope = 'module' )
10
+ def init_cuda ():
11
+ Device ().set_current ()
12
+
13
+ class DummyDeviceMemoryResource (MemoryResource ):
14
+ def __init__ (self , device ):
15
+ self .device = device
10
16
pass
11
17
12
18
def allocate (self , size , stream = None ) -> Buffer :
13
19
ptr = handle_return (cuda .cuMemAlloc (size ))
14
20
return Buffer (ptr = ptr , size = size , mr = self )
15
21
16
22
def deallocate (self , ptr , size , stream = None ):
17
- handle_return (cuda .cuMemFree (ptr ))
23
+ cuda .cuMemFree (ptr )
24
+
25
+ @property
26
+ def is_device_accessible (self ) -> bool :
27
+ return True
28
+
29
+ @property
30
+ def is_host_accessible (self ) -> bool :
31
+ return False
32
+
33
+ @property
34
+ def device_id (self ) -> int :
35
+ return 0
36
+
37
+ class DummyHostMemoryResource (MemoryResource ):
38
+ def __init__ (self ):
39
+ pass
40
+
41
+ def allocate (self , size , stream = None ) -> Buffer :
42
+ # Allocate a ctypes buffer of size `size`
43
+ ptr = (ctypes .c_byte * size )()
44
+ return Buffer (ptr = ptr , size = size , mr = self )
45
+
46
+ def deallocate (self , ptr , size , stream = None ):
47
+ #the memory is deallocated per the ctypes deallocation at garbage collection time
48
+ pass
49
+
50
+ @property
51
+ def is_device_accessible (self ) -> bool :
52
+ return False
53
+
54
+ @property
55
+ def is_host_accessible (self ) -> bool :
56
+ return True
57
+
58
+ @property
59
+ def device_id (self ) -> int :
60
+ raise RuntimeError ("the pinned memory resource is not bound to any GPU" )
61
+
62
+ class DummyUnifiedMemoryResource (MemoryResource ):
63
+ def __init__ (self , device ):
64
+ self .device = device
65
+ pass
66
+
67
+ def allocate (self , size , stream = None ) -> Buffer :
68
+ ptr = handle_return (cuda .cuMemAllocManaged (size , cuda .CUmemAttach_flags .CU_MEM_ATTACH_GLOBAL .value ))
69
+ return Buffer (ptr = ptr , size = size , mr = self )
70
+
71
+ def deallocate (self , ptr , size , stream = None ):
72
+ cuda .cuMemFree (ptr )
18
73
19
74
@property
20
75
def is_device_accessible (self ) -> bool :
@@ -28,44 +83,117 @@ def is_host_accessible(self) -> bool:
28
83
def device_id (self ) -> int :
29
84
return 0
30
85
31
- def test_buffer_initialization ():
32
- dummy_mr = DummyMemoryResource ()
86
+ class DummyPinnedMemoryResource (MemoryResource ):
87
+ def __init__ (self , device ):
88
+ self .device = device
89
+ pass
90
+
91
+ def allocate (self , size , stream = None ) -> Buffer :
92
+ ptr = handle_return (cuda .cuMemAllocHost (size ))
93
+ return Buffer (ptr = ptr , size = size , mr = self )
94
+
95
+ def deallocate (self , ptr , size , stream = None ):
96
+ cuda .cuMemFreeHost (ptr )
97
+
98
+ @property
99
+ def is_device_accessible (self ) -> bool :
100
+ return True
101
+
102
+ @property
103
+ def is_host_accessible (self ) -> bool :
104
+ return True
105
+
106
+ @property
107
+ def device_id (self ) -> int :
108
+ raise RuntimeError ("the pinned memory resource is not bound to any GPU" )
109
+
110
+ def buffer_initialization (dummy_mr : MemoryResource ):
33
111
buffer = dummy_mr .allocate (size = 1024 )
34
112
assert buffer .handle != 0
35
113
assert buffer .size == 1024
36
114
assert buffer .memory_resource == dummy_mr
37
- assert buffer .is_device_accessible == True
38
- assert buffer .is_host_accessible == True
39
- assert buffer .device_id == 0
115
+ assert buffer .is_device_accessible == dummy_mr .is_device_accessible
116
+ assert buffer .is_host_accessible == dummy_mr .is_host_accessible
40
117
dummy_mr .deallocate (buffer .handle , buffer .size )
41
118
42
- def test_buffer_copy_to ():
43
- dummy_mr = DummyMemoryResource ()
44
- src_buffer = dummy_mr .allocate (size = 1024 )
45
- dst_buffer = dummy_mr .allocate (size = 1024 )
119
+ def test_buffer_initialization ():
46
120
device = Device ()
47
121
device .set_current ()
122
+ buffer_initialization (DummyDeviceMemoryResource (device ))
123
+ buffer_initialization (DummyHostMemoryResource ())
124
+ buffer_initialization (DummyUnifiedMemoryResource (device ))
125
+ buffer_initialization (DummyPinnedMemoryResource (device ))
126
+
127
+ def buffer_copy_to (dummy_mr : MemoryResource , device : Device , check = False ):
128
+ src_buffer = dummy_mr .allocate (size = 1024 )
129
+ dst_buffer = dummy_mr .allocate (size = 1024 )
48
130
stream = device .create_stream ()
131
+
132
+ if check :
133
+ src_ptr = ctypes .cast (src_buffer .handle , ctypes .POINTER (ctypes .c_byte ))
134
+ for i in range (1024 ):
135
+ src_ptr [i ] = ctypes .c_byte (i )
136
+
49
137
src_buffer .copy_to (dst_buffer , stream = stream )
50
- # Assuming cuMemcpyAsync is correctly called, we can't directly check the result here
138
+ device .sync ()
139
+
140
+ if check :
141
+ dst_ptr = ctypes .cast (dst_buffer .handle , ctypes .POINTER (ctypes .c_byte ))
142
+
143
+ for i in range (10 ):
144
+ assert dst_ptr [i ] == src_ptr [i ]
145
+
51
146
dummy_mr .deallocate (src_buffer .handle , src_buffer .size )
52
147
dummy_mr .deallocate (dst_buffer .handle , dst_buffer .size )
53
148
54
- def test_buffer_copy_from ():
55
- dummy_mr = DummyMemoryResource ()
56
- src_buffer = dummy_mr .allocate (size = 1024 )
57
- dst_buffer = dummy_mr .allocate (size = 1024 )
149
+ def test_buffer_copy_to ():
58
150
device = Device ()
59
151
device .set_current ()
152
+ buffer_copy_to (DummyDeviceMemoryResource (device ), device )
153
+ buffer_copy_to (DummyUnifiedMemoryResource (device ), device )
154
+ buffer_copy_to (DummyPinnedMemoryResource (device ), device , check = True )
155
+
156
+ def buffer_copy_from (dummy_mr : MemoryResource , device , check = False ):
157
+ src_buffer = dummy_mr .allocate (size = 1024 )
158
+ dst_buffer = dummy_mr .allocate (size = 1024 )
60
159
stream = device .create_stream ()
160
+
161
+ if check :
162
+ src_ptr = ctypes .cast (src_buffer .handle , ctypes .POINTER (ctypes .c_byte ))
163
+ for i in range (1024 ):
164
+ src_ptr [i ] = ctypes .c_byte (i )
165
+
61
166
dst_buffer .copy_from (src_buffer , stream = stream )
62
- # Assuming cuMemcpyAsync is correctly called, we can't directly check the result here
167
+ device .sync ()
168
+
169
+ if check :
170
+ dst_ptr = ctypes .cast (dst_buffer .handle , ctypes .POINTER (ctypes .c_byte ))
171
+
172
+ for i in range (10 ):
173
+ assert dst_ptr [i ] == src_ptr [i ]
174
+
63
175
dummy_mr .deallocate (src_buffer .handle , src_buffer .size )
64
176
dummy_mr .deallocate (dst_buffer .handle , dst_buffer .size )
65
177
66
- def test_buffer_close ():
67
- dummy_mr = DummyMemoryResource ()
178
+ def test_buffer_copy_from ():
179
+ device = Device ()
180
+ device .set_current ()
181
+ buffer_copy_from (DummyDeviceMemoryResource (device ), device )
182
+ buffer_copy_from (DummyUnifiedMemoryResource (device ), device )
183
+ buffer_copy_from (DummyPinnedMemoryResource (device ), device , check = True )
184
+
185
+ def buffer_close (dummy_mr : MemoryResource ):
68
186
buffer = dummy_mr .allocate (size = 1024 )
69
187
buffer .close ()
70
188
assert buffer .handle == 0
71
189
assert buffer .memory_resource == None
190
+
191
+ def test_buffer_close ():
192
+ device = Device ()
193
+ device .set_current ()
194
+ buffer_close (DummyDeviceMemoryResource (device ))
195
+ buffer_close (DummyHostMemoryResource ())
196
+ buffer_close (DummyUnifiedMemoryResource (device ))
197
+ buffer_close (DummyPinnedMemoryResource (device ))
198
+
199
+ test_buffer_copy_to ()
0 commit comments