Skip to content

Commit 8782dfa

Browse files
committed
NEW: Make event timing error messages more specific and actionable
The CUDA driver provides different error messages for various errors when trying to compute elapsed time, and the documentation explains each of these scenarious. Surface each of these to Python uses with actionable error messages.
1 parent 37b401a commit 8782dfa

File tree

2 files changed

+81
-3
lines changed

2 files changed

+81
-3
lines changed

cuda_core/cuda/core/experimental/_event.py

Lines changed: 21 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -120,9 +120,27 @@ def __sub__(self, other):
120120
try:
121121
timing = handle_return(driver.cuEventElapsedTime(other.handle, self.handle))
122122
except CUDAError as e:
123-
raise RuntimeError(
124-
"Timing capability must be enabled in order to subtract two Events; timing is disabled by default."
125-
) from e
123+
error_message = str(e)
124+
if "CUDA_ERROR_INVALID_HANDLE" in error_message:
125+
if self.is_timing_disabled or other.is_timing_disabled:
126+
explanation = (
127+
"Both Events must be created with timing enabled in order to subtract them; "
128+
"use EventOptions(enable_timing=True) when creating both events."
129+
)
130+
else:
131+
explanation = (
132+
"Both Events must be recorded before they can be subtracted; "
133+
"use Stream.record() to record both events to a stream."
134+
)
135+
elif "CUDA_ERROR_NOT_READY" in error_message:
136+
explanation = (
137+
"One or both events have not completed; "
138+
"use Event.sync(), Stream.sync(), or Device.sync() to wait for the events to complete "
139+
"before subtracting them."
140+
)
141+
else:
142+
raise e
143+
raise RuntimeError(explanation) from e
126144
return timing
127145

128146
@property

cuda_core/tests/test_event.py

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,3 +80,63 @@ def test_is_done(init_cuda):
8080
# Without a sync, the captured work might not have yet completed
8181
# Therefore this check should never raise an exception
8282
assert event.is_done in (True, False)
83+
84+
85+
def test_error_timing_disabled():
86+
device = Device()
87+
device.set_current()
88+
enabled = EventOptions(enable_timing=True)
89+
disabled = EventOptions(enable_timing=False)
90+
stream = device.create_stream()
91+
92+
event1 = stream.record(options=enabled)
93+
event2 = stream.record(options=disabled)
94+
stream.sync()
95+
with pytest.raises(RuntimeError, match="^Both Events must be created with timing enabled"):
96+
event2 - event1
97+
98+
event1 = stream.record(options=disabled)
99+
event2 = stream.record(options=disabled)
100+
stream.sync()
101+
with pytest.raises(RuntimeError, match="^Both Events must be created with timing enabled"):
102+
event2 - event1
103+
104+
event1 = stream.record(options=enabled)
105+
event2 = stream.record(options=enabled)
106+
stream.sync()
107+
event2 - event1
108+
109+
110+
def test_error_timing_recorded():
111+
device = Device()
112+
device.set_current()
113+
enabled = EventOptions(enable_timing=True)
114+
stream = device.create_stream()
115+
116+
event1 = stream.record(options=enabled)
117+
event2 = device.create_event(options=enabled)
118+
event3 = device.create_event(options=enabled)
119+
120+
stream.sync()
121+
with pytest.raises(RuntimeError, match="^Both Events must be recorded"):
122+
event2 - event1
123+
with pytest.raises(RuntimeError, match="^Both Events must be recorded"):
124+
event1 - event2
125+
with pytest.raises(RuntimeError, match="^Both Events must be recorded"):
126+
event3 - event2
127+
128+
129+
def test_error_timing_incomplete():
130+
device = Device()
131+
device.set_current()
132+
enabled = EventOptions(enable_timing=True)
133+
stream = device.create_stream()
134+
135+
event1 = stream.record(options=enabled)
136+
event2 = device.create_event(options=enabled)
137+
stream.wait(event2)
138+
event3 = stream.record(options=enabled)
139+
140+
# event3 will never complete because the stream is waiting on event2 which is never recorded
141+
with pytest.raises(RuntimeError, match="^One or both events have not completed."):
142+
event3 - event1

0 commit comments

Comments
 (0)