diff --git a/cuda_core/cuda/core/experimental/_context.py b/cuda_core/cuda/core/experimental/_context.pyx
similarity index 56%
rename from cuda_core/cuda/core/experimental/_context.py
rename to cuda_core/cuda/core/experimental/_context.pyx
index 24e06d69c..205f6c983 100644
--- a/cuda_core/cuda/core/experimental/_context.py
+++ b/cuda_core/cuda/core/experimental/_context.pyx
@@ -13,16 +13,21 @@ class ContextOptions:
     pass  # TODO
 
 
-class Context:
-    __slots__ = ("_handle", "_id")
+cdef class Context:
 
-    def __new__(self, *args, **kwargs):
+    cdef:
+        object _handle
+        int _device_id
+
+    def __init__(self, *args, **kwargs):
         raise RuntimeError("Context objects cannot be instantiated directly. Please use Device or Stream APIs.")
 
     @classmethod
-    def _from_ctx(cls, obj, dev_id):
-        assert_type(obj, driver.CUcontext)
-        ctx = super().__new__(cls)
-        ctx._handle = obj
-        ctx._id = dev_id
+    def _from_ctx(cls, handle: driver.CUcontext, int device_id):
+        cdef Context ctx = Context.__new__(Context)
+        ctx._handle = handle
+        ctx._device_id = device_id
         return ctx
+
+    def __eq__(self, other):
+        return int(self._handle) == int(other._handle)
diff --git a/cuda_core/cuda/core/experimental/_device.py b/cuda_core/cuda/core/experimental/_device.py
index c9a786070..c89f659a9 100644
--- a/cuda_core/cuda/core/experimental/_device.py
+++ b/cuda_core/cuda/core/experimental/_device.py
@@ -1237,7 +1237,6 @@ def create_stream(self, obj: Optional[IsStreamT] = None, options: StreamOptions
         """
         return Stream._init(obj=obj, options=options)
 
-    @precondition(_check_context_initialized)
     def create_event(self, options: Optional[EventOptions] = None) -> Event:
         """Create an Event object without recording it to a Stream.
 
@@ -1256,7 +1255,10 @@ def create_event(self, options: Optional[EventOptions] = None) -> Event:
             Newly created event object.
 
         """
-        return Event._init(self._id, self.context._handle, options)
+        ctx = driver.cuCtxGetCurrent()[1]
+        if int(ctx) == 0:
+            raise CUDAError("No context is bound to the calling CPU thread.")
+        return Event._init(self._id, ctx, options)
 
     @precondition(_check_context_initialized)
     def allocate(self, size, stream: Optional[Stream] = None) -> Buffer:
diff --git a/cuda_core/cuda/core/experimental/_event.py b/cuda_core/cuda/core/experimental/_event.pyx
similarity index 82%
rename from cuda_core/cuda/core/experimental/_event.py
rename to cuda_core/cuda/core/experimental/_event.pyx
index 800f34c9a..1c1302a9b 100644
--- a/cuda_core/cuda/core/experimental/_event.py
+++ b/cuda_core/cuda/core/experimental/_event.pyx
@@ -4,14 +4,12 @@
 
 from __future__ import annotations
 
-import weakref
 from dataclasses import dataclass
 from typing import TYPE_CHECKING, Optional
 
 from cuda.core.experimental._context import Context
 from cuda.core.experimental._utils.cuda_utils import (
     CUDAError,
-    check_or_create_options,
     driver,
     handle_return,
 )
@@ -25,7 +23,7 @@
 
 
 @dataclass
-class EventOptions:
+cdef class EventOptions:
     """Customizable :obj:`~_event.Event` options.
 
     Attributes
@@ -49,7 +47,27 @@ class EventOptions:
     support_ipc: Optional[bool] = False
 
 
-class Event:
+cdef inline EventOptions check_or_create_options(options, str options_description):
+    """
+    Create the specified options dataclass from a dictionary of options or None.
+    """
+    cdef EventOptions opts
+    if options is None:
+        opts = EventOptions()
+    elif isinstance(options, dict):
+        opts = EventOptions(**options)
+    elif not isinstance(options, EventOptions):
+        raise TypeError(
+            f"The {options_description} must be provided as an object "
+            f"of type {EventOptions.__name__} or as a dict with valid {options_description}. "
+            f"The provided object is '{options}'."
+        )
+
+    return opts
+
+
+
+cdef class Event:
     """Represent a record at a specific point of execution within a CUDA stream.
 
     Applications can asynchronously record events at any point in
@@ -77,30 +95,20 @@ class Event:
     and they should instead be created through a :obj:`~_stream.Stream` object.
 
     """
-
-    class _MembersNeededForFinalize:
-        __slots__ = ("handle",)
-
-        def __init__(self, event_obj, handle):
-            self.handle = handle
-            weakref.finalize(event_obj, self.close)
-
-        def close(self):
-            if self.handle is not None:
-                handle_return(driver.cuEventDestroy(self.handle))
-                self.handle = None
-
-    def __new__(self, *args, **kwargs):
+    cdef:
+        object _handle
+        bint _timing_disabled
+        bint _busy_waited
+        int _device_id
+        object _ctx_handle
+
+    def __init__(self, *args, **kwargs):
         raise RuntimeError("Event objects cannot be instantiated directly. Please use Stream APIs (record).")
 
-    __slots__ = ("__weakref__", "_mnff", "_timing_disabled", "_busy_waited", "_device_id", "_ctx_handle")
-
     @classmethod
-    def _init(cls, device_id: int, ctx_handle: Context, options: Optional[EventOptions] = None):
-        self = super().__new__(cls)
-        self._mnff = Event._MembersNeededForFinalize(self, None)
-
-        options = check_or_create_options(EventOptions, options, "Event options")
+    def _init(cls, device_id: int, ctx_handle: Context, opts=None):
+        cdef Event self = Event.__new__(Event)
+        cdef EventOptions options = check_or_create_options(opts, "Event options")
         flags = 0x0
         self._timing_disabled = False
         self._busy_waited = False
@@ -112,14 +120,22 @@ def _init(cls, device_id: int, ctx_handle: Context, options: Optional[EventOptio
             self._busy_waited = True
         if options.support_ipc:
             raise NotImplementedError("WIP: https://github.com/NVIDIA/cuda-python/issues/103")
-        self._mnff.handle = handle_return(driver.cuEventCreate(flags))
+        _, self._handle = driver.cuEventCreate(flags)
         self._device_id = device_id
         self._ctx_handle = ctx_handle
         return self
 
+    cdef _close(self):
+        if self._handle is not None:
+            _ = driver.cuEventDestroy(self._handle)
+            self._handle = None
+
     def close(self):
         """Destroy the event."""
-        self._mnff.close()
+        self._close()
+
+    def __dealloc__(self):
+        self._close()
 
     def __isub__(self, other):
         return NotImplemented
@@ -129,7 +145,7 @@ def __rsub__(self, other):
 
     def __sub__(self, other):
         # return self - other (in milliseconds)
-        err, timing = driver.cuEventElapsedTime(other.handle, self.handle)
+        err, timing = driver.cuEventElapsedTime(other.handle, self._handle)
         try:
             raise_if_driver_error(err)
             return timing
@@ -180,12 +196,12 @@ def sync(self):
         has been completed.
 
         """
-        handle_return(driver.cuEventSynchronize(self._mnff.handle))
+        handle_return(driver.cuEventSynchronize(self._handle))
 
     @property
     def is_done(self) -> bool:
         """Return True if all captured works have been completed, otherwise False."""
-        (result,) = driver.cuEventQuery(self._mnff.handle)
+        (result,) = driver.cuEventQuery(self._handle)
         if result == driver.CUresult.CUDA_SUCCESS:
             return True
         if result == driver.CUresult.CUDA_ERROR_NOT_READY:
@@ -201,7 +217,7 @@ def handle(self) -> cuda.bindings.driver.CUevent:
             This handle is a Python object. To get the memory address of the underlying C
             handle, call ``int(Event.handle)``.
         """
-        return self._mnff.handle
+        return self._handle
 
     @property
     def device(self) -> Device:
diff --git a/cuda_core/cuda/core/experimental/_utils/cuda_utils.py b/cuda_core/cuda/core/experimental/_utils/cuda_utils.pyx
similarity index 96%
rename from cuda_core/cuda/core/experimental/_utils/cuda_utils.py
rename to cuda_core/cuda/core/experimental/_utils/cuda_utils.pyx
index 48b48d2fb..77ce533e6 100644
--- a/cuda_core/cuda/core/experimental/_utils/cuda_utils.py
+++ b/cuda_core/cuda/core/experimental/_utils/cuda_utils.pyx
@@ -52,7 +52,7 @@ def _reduce_3_tuple(t: tuple):
     return t[0] * t[1] * t[2]
 
 
-def _check_driver_error(error):
+cpdef inline void _check_driver_error(error) except*:
     if error == driver.CUresult.CUDA_SUCCESS:
         return
     name_err, name = driver.cuGetErrorName(error)
@@ -69,7 +69,7 @@ def _check_driver_error(error):
     raise CUDAError(f"{name}: {desc}")
 
 
-def _check_runtime_error(error):
+cpdef inline void _check_runtime_error(error) except*:
     if error == runtime.cudaError_t.cudaSuccess:
         return
     name_err, name = runtime.cudaGetErrorName(error)
@@ -86,7 +86,7 @@ def _check_runtime_error(error):
     raise CUDAError(f"{name}: {desc}")
 
 
-def _check_error(error, handle=None):
+cdef inline void _check_error(error, handle=None) except*:
     if isinstance(error, driver.CUresult):
         _check_driver_error(error)
     elif isinstance(error, runtime.cudaError_t):
@@ -105,7 +105,7 @@ def _check_error(error, handle=None):
         raise RuntimeError(f"Unknown error type: {error}")
 
 
-def handle_return(result, handle=None):
+def handle_return(tuple result, handle=None):
     _check_error(result[0], handle=handle)
     if len(result) == 1:
         return
diff --git a/cuda_core/setup.py b/cuda_core/setup.py
index f2005c3dd..f2b84bfaf 100644
--- a/cuda_core/setup.py
+++ b/cuda_core/setup.py
@@ -2,28 +2,28 @@
 #
 # SPDX-License-Identifier: Apache-2.0
 
+import glob
 import os
 
 from Cython.Build import cythonize
 from setuptools import Extension, setup
 from setuptools.command.build_ext import build_ext as _build_ext
 
-ext_modules = (
-    Extension(
-        "cuda.core.experimental._dlpack",
-        sources=["cuda/core/experimental/_dlpack.pyx"],
-        language="c++",
-    ),
-    Extension(
-        "cuda.core.experimental._memoryview",
-        sources=["cuda/core/experimental/_memoryview.pyx"],
-        language="c++",
-    ),
+
+# It seems setuptools' wildcard support has problems for namespace packages,
+# so we explicitly spell out all Extension instances.
+root_module = "cuda.core.experimental"
+root_path = f"{os.path.sep}".join(root_module.split(".")) + os.path.sep
+ext_files = glob.glob(f"{root_path}/**/*.pyx", recursive=True)
+def strip_prefix_suffix(filename):
+    return filename[len(root_path):-4]
+module_names = (strip_prefix_suffix(f) for f in ext_files)
+ext_modules = tuple(
     Extension(
-        "cuda.core.experimental._kernel_arg_handler",
-        sources=["cuda/core/experimental/_kernel_arg_handler.pyx"],
+        f"cuda.core.experimental.{mod.replace(os.path.sep, '.')}",
+        sources=[f"cuda/core/experimental/{mod}.pyx"],
         language="c++",
-    ),
+    ) for mod in module_names
 )