diff --git a/.gitignore b/.gitignore
index 59cf74c..451461d 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,10 +1,12 @@
 .idea/
 __pycache__/
 build/
+dist/
+imgui.ini
 *.egg-info/
 .*/
 *.so
 *.html
 *.cpp
 *.c
-.*
\ No newline at end of file
+.*
diff --git a/examples/cuda/dynamic_geometry.cu b/examples/cuda/dynamic_geometry.cu
new file mode 100644
index 0000000..4a7936c
--- /dev/null
+++ b/examples/cuda/dynamic_geometry.cu
@@ -0,0 +1,146 @@
+//
+// Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+
+#include <optix.h>
+
+#include "dynamic_geometry.h"
+#include "vec_math.h"
+#include "helpers.h"
+
+extern "C" {
+    __constant__ Params params;
+}
+
+
+static __forceinline__ __device__ void trace(
+    OptixTraversableHandle handle,
+    float3                 ray_origin,
+    float3                 ray_direction,
+    float                  tmin,
+    float                  tmax,
+    float3*                prd
+)
+{
+    unsigned int p0, p1, p2;
+    p0 = float_as_int( prd->x );
+    p1 = float_as_int( prd->y );
+    p2 = float_as_int( prd->z );
+    optixTrace(
+        handle,
+        ray_origin,
+        ray_direction,
+        tmin,
+        tmax,
+        0.0f,                // rayTime
+        OptixVisibilityMask( 1 ),
+        OPTIX_RAY_FLAG_NONE,
+        0,                   // SBT offset
+        0,                   // SBT stride
+        0,                   // missSBTIndex
+        p0, p1, p2 );
+    prd->x = int_as_float( p0 );
+    prd->y = int_as_float( p1 );
+    prd->z = int_as_float( p2 );
+}
+
+
+static __forceinline__ __device__ void setPayload( float3 p )
+{
+    optixSetPayload_0( float_as_int( p.x ) );
+    optixSetPayload_1( float_as_int( p.y ) );
+    optixSetPayload_2( float_as_int( p.z ) );
+}
+
+
+static __forceinline__ __device__ float3 getPayload()
+{
+    return make_float3(
+        int_as_float( optixGetPayload_0() ),
+        int_as_float( optixGetPayload_1() ),
+        int_as_float( optixGetPayload_2() )
+    );
+}
+
+
+extern "C" __global__ void __raygen__rg()
+{
+    const uint3 idx = optixGetLaunchIndex();
+    const uint3 dim = optixGetLaunchDimensions();
+
+    const float3 eye = params.eye;
+    const float3 U = params.U;
+    const float3 V = params.V;
+    const float3 W = params.W;
+    const float2      d = 2.0f * make_float2(
+        static_cast< float >( idx.x ) / static_cast< float >( dim.x ),
+        static_cast< float >( idx.y ) / static_cast< float >( dim.y )
+    ) - 1.0f;
+    
+    const float3 direction = normalize( d.x * U + d.y * V + W );
+    float3       payload_rgb = make_float3( 0.5f, 0.5f, 0.5f );
+
+    trace( params.trav_handle,
+        eye,
+        direction,
+        0.00f,  // tmin
+        1e16f,  // tmax
+        &payload_rgb );
+
+    params.frame_buffer[idx.y * params.width + idx.x] = make_color( payload_rgb );
+}
+
+
+extern "C" __global__ void __miss__ms()
+{
+    MissData* rt_data = reinterpret_cast< MissData* >( optixGetSbtDataPointer() );
+    float3    payload = getPayload();
+    setPayload( make_float3( rt_data->bg_color.x, rt_data->bg_color.y, rt_data->bg_color.z ) );
+}
+
+
+extern "C" __global__ void __closesthit__ch()
+{
+    HitGroupData* rt_data = reinterpret_cast< HitGroupData* >( optixGetSbtDataPointer() );
+
+    // fetch current triangle vertices
+    float3 data[3];
+    optixGetTriangleVertexData( optixGetGASTraversableHandle(), optixGetPrimitiveIndex(), optixGetSbtGASIndex(),
+        optixGetRayTime(), data );
+
+    // compute triangle normal
+    data[1] -= data[0];
+    data[2] -= data[0];
+    float3 normal = make_float3(
+        data[1].y*data[2].z - data[1].z*data[2].y,
+        data[1].z*data[2].x - data[1].x*data[2].z,
+        data[1].x*data[2].y - data[1].y*data[2].x );
+    const float s = 0.5f / sqrtf( normal.x*normal.x + normal.y*normal.y + normal.z*normal.z );
+
+    // convert normal to color and store in payload
+    setPayload( (normal*s + make_float3( 0.5 )) * rt_data->color );
+}
diff --git a/examples/cuda/dynamic_geometry.h b/examples/cuda/dynamic_geometry.h
new file mode 100644
index 0000000..3e799a3
--- /dev/null
+++ b/examples/cuda/dynamic_geometry.h
@@ -0,0 +1,56 @@
+//
+// Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+//
+
+struct Params
+{
+    uchar4*                frame_buffer;
+    unsigned int           width;
+    unsigned int           height;
+    float3                 eye, U, V, W;
+    OptixTraversableHandle trav_handle;
+    int                    subframe_index;
+};
+
+struct RayGenData
+{
+    float3 cam_eye;
+    float3 camera_u, camera_v, camera_w;
+};
+
+
+struct MissData
+{
+    float4 bg_color;
+};
+
+
+struct HitGroupData
+{
+    float3 color;
+};
diff --git a/examples/cuda/dynamic_geometry_vertex_generation.cu b/examples/cuda/dynamic_geometry_vertex_generation.cu
new file mode 100644
index 0000000..5b1de7e
--- /dev/null
+++ b/examples/cuda/dynamic_geometry_vertex_generation.cu
@@ -0,0 +1,108 @@
+//
+// Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+
+#include "vec_math.h"
+
+
+enum struct AnimationMode: int
+{
+    NONE = 0,
+    DEFORM = 1,
+    EXPLODE = 2,
+};
+
+
+__forceinline__ __device__ float triangle_wave( float x, float shift = 0.f, float period = 2.f * M_PIf, float amplitude = 1.f )
+{
+    return fabsf( fmodf( ( 4.f / period ) * ( x - shift ), 4.f * amplitude ) - 2.f * amplitude ) - amplitude;
+}
+
+__forceinline__ __device__ void write_animated_triangle( float3* out_vertices, int tidx, float3 v0, float3 v1, float3 v2, AnimationMode mode, float time )
+{
+    float3 v = make_float3( 0 );
+
+    if( mode == AnimationMode::EXPLODE )
+    {
+        // Generate displacement vector from triangle index
+        const float theta = ( (float)M_PIf * ( ( tidx + 1 ) * ( 13 / M_PIf ) ) );
+        const float phi   = ( (float)( 2.0 * M_PIf ) * ( ( tidx + 1 ) * ( 97 / M_PIf ) ) );
+
+        // Apply displacement to the sphere triangles
+        v = make_float3( triangle_wave( phi ) * triangle_wave( theta, M_PIf / 2.f ),
+            triangle_wave( phi, M_PIf / 2.f ) * triangle_wave( theta, M_PIf / 2.f ), triangle_wave( theta ) )
+            * triangle_wave( time, M_PIf / 2.f ) * 2.f;
+    }
+
+    out_vertices[tidx * 3 + 0] = v0 + v;
+    out_vertices[tidx * 3 + 1] = v1 + v;
+    out_vertices[tidx * 3 + 2] = v2 + v;
+}
+
+__forceinline__ __device__ float3 deform_vertex( const float3& c, AnimationMode mode, float time )
+{
+    // Apply sine wave to the y coordinate of the sphere vertices
+    if( mode == AnimationMode::DEFORM )
+        return make_float3( c.x, c.y * ( 0.5f + 0.4f * cosf( 4 * ( c.x + time ) ) ), c.z );
+    return c;
+}
+
+extern "C" __global__ void generate_vertices(float3* out_vertices, AnimationMode mode, float time, int width, int height)
+{
+    int idx = blockIdx.x * blockDim.x + threadIdx.x;
+
+    if( idx < width * height )
+    {
+        // generate a single patch (two unindexed triangles) of a tessellated sphere
+
+        int x = idx % width;
+        int y = idx / width;
+
+        const float theta0 = ( ( float )M_PIf * ( y + 0 ) ) / height;
+        const float theta1 = ( ( float )M_PIf * ( y + 1 ) ) / height;
+        const float phi0 = ( ( float )( 2.0 * M_PIf ) * ( x + 0 ) ) / width;
+        const float phi1 = ( ( float )( 2.0 * M_PIf ) * ( x + 1 ) ) / width;
+
+        const float ct0 = cosf( theta0 );
+        const float st0 = sinf( theta0 );
+        const float ct1 = cosf( theta1 );
+        const float st1 = sinf( theta1 );
+
+        const float cp0 = cosf( phi0 );
+        const float sp0 = sinf( phi0 );
+        const float cp1 = cosf( phi1 );
+        const float sp1 = sinf( phi1 );
+
+        const float3 v00 = deform_vertex( make_float3( cp0 * st0, sp0 * st0, ct0 ), mode, time );
+        const float3 v10 = deform_vertex( make_float3( cp0 * st1, sp0 * st1, ct1 ), mode, time );
+        const float3 v01 = deform_vertex( make_float3( cp1 * st0, sp1 * st0, ct0 ), mode, time );
+        const float3 v11 = deform_vertex( make_float3( cp1 * st1, sp1 * st1, ct1 ), mode, time );
+
+        write_animated_triangle( out_vertices, idx * 2 + 0, v00, v10, v11, mode, time );
+        write_animated_triangle( out_vertices, idx * 2 + 1, v00, v11, v01, mode, time );
+    }
+}
diff --git a/examples/cuda/dynamic_materials.cu b/examples/cuda/dynamic_materials.cu
new file mode 100644
index 0000000..1822a18
--- /dev/null
+++ b/examples/cuda/dynamic_materials.cu
@@ -0,0 +1,178 @@
+//
+// Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+
+#include <optix.h>
+
+#include "dynamic_materials.h"
+#include "helpers.h"
+#include "vec_math.h"
+
+extern "C" __constant__ Params params;
+
+static __forceinline__ __device__ void trace( OptixTraversableHandle handle, float3 ray_origin, float3 ray_direction, float tmin, float tmax, float3* prd )
+{
+    unsigned int p0, p1, p2;
+    p0 = float_as_int( prd->x );
+    p1 = float_as_int( prd->y );
+    p2 = float_as_int( prd->z );
+    optixTrace( handle, ray_origin, ray_direction, tmin, tmax,
+                0.0f,  // rayTime
+                OptixVisibilityMask( 1 ), OPTIX_RAY_FLAG_NONE,
+                0,     // SBT offset
+                0,     // SBT stride
+                0,     // missSBTIndex
+                p0, p1, p2 );
+    prd->x = int_as_float( p0 );
+    prd->y = int_as_float( p1 );
+    prd->z = int_as_float( p2 );
+}
+
+
+static __forceinline__ __device__ void setPayload( float3 p )
+{
+    optixSetPayload_0( float_as_int( p.x ) );
+    optixSetPayload_1( float_as_int( p.y ) );
+    optixSetPayload_2( float_as_int( p.z ) );
+}
+
+
+static __forceinline__ __device__ float3 getPayload()
+{
+    return make_float3( int_as_float( optixGetPayload_0() ),
+                        int_as_float( optixGetPayload_1() ),
+                        int_as_float( optixGetPayload_2() ) );
+}
+
+
+static __forceinline__ __device__ float3 getShadingNormal()
+{
+    return make_float3( int_as_float( optixGetAttribute_0() ),
+                        int_as_float( optixGetAttribute_1() ),
+                        int_as_float( optixGetAttribute_2() ) );
+}
+
+
+extern "C" __global__ void __raygen__rg()
+{
+    const uint3 idx = optixGetLaunchIndex();
+    const uint3 dim = optixGetLaunchDimensions();
+
+    const float3 U = params.camera_u;
+    const float3 V = params.camera_v;
+    const float3 W = params.camera_w;
+    const float2 d = 2.0f * make_float2( static_cast<float>( idx.x ) / static_cast<float>( dim.x ),
+                                         static_cast<float>( idx.y ) / static_cast<float>( dim.y ) ) - 1.0f;
+
+    const float3 origin      = params.cam_eye;
+    const float3 direction   = normalize( d.x * U + d.y * V + W );
+    float3       payload_rgb = make_float3( 0.5f, 0.5f, 0.5f );
+    trace( params.trav_handle, origin, direction,
+           0.00f,  // tmin
+           1e16f,  // tmax
+           &payload_rgb );
+
+    params.image[idx.y * params.image_width + idx.x] = make_color( payload_rgb );
+}
+
+
+extern "C" __global__ void __miss__ms()
+{
+    MissData* missData = reinterpret_cast<MissData*>( optixGetSbtDataPointer() );
+    float3    payload  = getPayload();
+    setPayload( missData->color );
+}
+
+
+extern "C" __global__ void __intersection__is()
+{
+    HitGroupData* hgData = reinterpret_cast<HitGroupData*>( optixGetSbtDataPointer() );
+    const float3  orig   = optixGetObjectRayOrigin();
+    const float3  dir    = optixGetObjectRayDirection();
+
+    const float3 center = {0.f, 0.f, 0.f};
+    const float3 O      = orig - center;
+    const float  l      = 1 / length( dir );
+    const float3 D      = dir * l;
+
+    const float b    = dot( O, D );
+    const float c    = dot( O, O ) - params.radius * params.radius;
+    const float disc = b * b - c;
+    if( disc > 0.0f )
+    {
+        const float sdisc = sqrtf( disc );
+        const float root1 = ( -b - sdisc );
+
+        const float  root11         = 0.0f;
+        const float3 shading_normal = ( O + ( root1 + root11 ) * D ) / params.radius;
+        unsigned int p0, p1, p2;
+        p0 = float_as_int( shading_normal.x );
+        p1 = float_as_int( shading_normal.y );
+        p2 = float_as_int( shading_normal.z );
+
+        optixReportIntersection( root1,  // t hit
+                                 0,      // user hit kind
+                                 p0, p1, p2 );
+    }
+}
+
+
+__forceinline__ __device__ void closesthit_impl( float3 baseColor )
+{
+    float3 normals = normalize( optixTransformNormalFromObjectToWorldSpace( getShadingNormal() ) ) * 0.5f + 0.5f;
+
+    // add material effects
+    normals *= baseColor;
+    setPayload( normals );
+}
+
+extern "C" __global__ void __closesthit__ch()
+{
+    HitGroupData* hgData = reinterpret_cast<HitGroupData*>( optixGetSbtDataPointer() );
+    closesthit_impl( hgData->color );
+}
+
+extern "C" __global__ void __closesthit__normal()
+{
+    float3 normals = normalize( optixTransformNormalFromObjectToWorldSpace( getShadingNormal() ) ) * 0.5f + 0.5f;
+    setPayload( normals );
+}
+
+extern "C" __global__ void __closesthit__red()
+{
+    closesthit_impl( make_float3( 1.f, 0.f, 0.f ) );
+}
+
+extern "C" __global__ void __closesthit__green()
+{
+    closesthit_impl( make_float3( 0.f, 1.f, 0.f ) );
+}
+
+extern "C" __global__ void __closesthit__blue()
+{
+    closesthit_impl( make_float3( 0.f, 0.f, 1.f ) );
+}
diff --git a/examples/cuda/dynamic_materials.h b/examples/cuda/dynamic_materials.h
new file mode 100644
index 0000000..8fc62ae
--- /dev/null
+++ b/examples/cuda/dynamic_materials.h
@@ -0,0 +1,51 @@
+//
+// Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+
+struct Params
+{
+    OptixTraversableHandle trav_handle;
+    uchar4*                image;
+    unsigned int           image_width;
+    unsigned int           image_height;
+    float                  radius;
+    float3                 cam_eye;
+    float3                 camera_u, camera_v, camera_w;
+};
+
+
+struct MissData
+{
+    float3 color;
+};
+
+
+struct HitGroupData
+{
+    float3       color;
+    unsigned int geometryIndex;
+};
diff --git a/examples/cuda/triangle.h b/examples/cuda/triangle.h
index 2ba482d..1d1a0ce 100644
--- a/examples/cuda/triangle.h
+++ b/examples/cuda/triangle.h
@@ -28,7 +28,6 @@
 
 #pragma once
 
-
 struct Params
 {
     uchar4*                image;
diff --git a/examples/dynamic_geometry.py b/examples/dynamic_geometry.py
new file mode 100644
index 0000000..0507183
--- /dev/null
+++ b/examples/dynamic_geometry.py
@@ -0,0 +1,478 @@
+import os, sys, enum, logging, collections
+
+import cupy as cp
+import numpy as np
+import optix as ox
+
+import glfw, imgui
+
+from optix.sutil.gui import init_ui, display_stats
+from optix.sutil.gl_display import GLDisplay
+from optix.sutil.trackball import Trackball, TrackballViewMode
+from optix.sutil.cuda_output_buffer import CudaOutputBuffer, CudaOutputBufferType, BufferImageFormat
+
+script_dir = os.path.dirname(os.path.abspath(__file__))
+
+logging.basicConfig(stream=sys.stdout, level=logging.DEBUG)
+log = logging.getLogger()
+
+DEBUG=False
+
+#------------------------------------------------------------------------------
+# Local types
+#------------------------------------------------------------------------------
+
+class Params:
+    _params = collections.OrderedDict([
+            ('frame_buffer',   'u8'),
+            ('width',          'u4'),
+            ('height',         'u4'),
+            ('eye',            '3f4'),
+            ('u',              '3f4'),
+            ('v',              '3f4'),
+            ('w',              '3f4'),
+            ('trav_handle',    'u8'),
+            ('subframe_index', 'i4'),
+        ])
+
+    def __init__(self):
+        self.handle = ox.LaunchParamsRecord(names=tuple(self._params.keys()),
+                                            formats=tuple(self._params.values()))
+
+    def __getattribute__(self, name):
+        if name in Params._params.keys():
+            return self.__dict__['handle'][name]
+        else:
+            return super().__getattribute__(name)
+
+    def __setattr__(self, name, value):
+        if name in Params._params.keys():
+            self.handle[name] = value
+        elif name in {'handle'}:
+            super().__setattr__(name, value)
+        else:
+            raise AttributeError(name)
+
+    def __str__(self):
+        return '\n'.join(f'{k}:  {self.handle[k]}' for k in self._params)
+
+
+class DynamicGeometryState:
+    __slots__ = ['params', 'time', 'ctx', 'module', 'pipeline', 'pipeline_opts',
+            'raygen_grp', 'miss_grp', 'hit_grp', 'sbt',
+            'generate_vertices_kernel', 'd_temp_vertices', 'last_exploding_sphere_rebuild_time',
+            'gas_build_input', 'static_gas', 'deforming_gas', 'exploding_gas',
+            'ias_build_input', 'ias',
+            'trackball', 'camera_changed', 'mouse_button', 'resize_dirty', 'minimized']
+
+    def __init__(self):
+        for slot in self.__slots__:
+            setattr(self, slot, None)
+        self.params = Params()
+
+        self.trackball = Trackball()
+        self.camera_changed = True
+        self.mouse_button = -1
+        self.resize_dirty = False
+        self.minimized = False
+
+    @property
+    def camera(self):
+        return self.trackball.camera
+
+    @property
+    def launch_dimensions(self):
+        return (int(self.params.width), int(self.params.height))
+
+
+class AnimationMode(enum.Enum):
+    NONE = 0
+    DEFORM = 1
+    EXPLODE = 2
+
+
+#------------------------------------------------------------------------------
+# Scene data
+#------------------------------------------------------------------------------
+g_tessellation_resolution = 128
+g_exploding_gas_rebuild_frequency = 10.0
+
+g_diffuse_colors = np.asarray([
+    [0.7, 0.7, 0.7],
+    [0.8, 0.8, 0.8],
+    [0.9, 0.9, 0.9],
+    [1.0, 1.0, 1.0],
+], dtype=np.float32)
+
+INST_COUNT = g_diffuse_colors.shape[0]
+
+g_instances = np.asarray([
+    [1, 0, 0, -4.5,
+     0, 1, 0, 0,
+     0, 0, 1, 0],
+    [1, 0, 0, -1.5,
+     0, 1, 0, 0,
+     0, 0, 1, 0],
+    [1, 0, 0, 1.5,
+     0, 1, 0, 0,
+     0, 0, 1, 0],
+    [1, 0, 0, 4.5,
+     0, 1, 0, 0,
+     0, 0, 1, 0],
+], dtype=np.float32).reshape(INST_COUNT, 3, 4)
+
+
+#------------------------------------------------------------------------------
+# GLFW callbacks
+#------------------------------------------------------------------------------
+def mouse_button_callback(window, button, action, mods):
+    state = glfw.get_window_user_pointer(window)
+    (x, y) = glfw.get_cursor_pos(window)
+    if action is glfw.PRESS:
+        state.mouse_button = button
+        state.trackball.start_tracking(x, y)
+    else:
+        state.mouse_button = -1
+
+def cursor_position_callback(window, x, y):
+    state = glfw.get_window_user_pointer(window)
+    if state.mouse_button is glfw.MOUSE_BUTTON_LEFT:
+        state.trackball.view_mode = TrackballViewMode.LookAtFixed
+        state.trackball.update_tracking(x, y, state.params.width, state.params.height)
+        state.camera_changed = True
+    elif state.mouse_button is glfw.MOUSE_BUTTON_RIGHT:
+        state.trackball.view_mode = TrackballViewMode.EyeFixed
+        state.trackball.update_tracking(x, y, state.params.width, state.params.height)
+        state.camera_changed = True
+
+def window_size_callback(window, res_x, res_y):
+    state = glfw.get_window_user_pointer(window)
+    if state.minimized:
+        return
+
+    res_x = max(res_x, 1)
+    res_y = max(res_y, 1)
+
+    state.params.width = res_x
+    state.params.height = res_y
+    state.camera_changed = True
+    state.resize_dirty = True
+
+def window_iconify_callback(window, iconified):
+    state = glfw.get_window_user_pointer(window)
+    state.minimized = (iconified > 0)
+
+def key_callback(window, key, scancode, action, mods):
+    if action is glfw.PRESS:
+        if key in {glfw.KEY_Q, glfw.KEY_ESCAPE}:
+            glfw.set_window_should_close(window, True)
+
+def scroll_callback(window, xscroll, yscroll):
+    state = glfw.get_window_user_pointer(window)
+    if state.trackball.wheel_event(yscroll):
+        state.camera_changed = True
+
+#------------------------------------------------------------------------------
+# Helper functions
+#------------------------------------------------------------------------------
+def init_launch_params(state):
+    state.params.frame_buffer = 0
+    state.params.subframe_index = 0
+
+def handle_camera_update(state):
+    if not state.camera_changed:
+        return
+    state.camera_changed = False
+
+    camera = state.camera
+    params = state.params
+
+    camera.aspect_ratio = params.width / float(params.height)
+    params.eye = camera.eye
+
+    u,v,w = camera.uvw_frame()
+    params.u = u
+    params.v = v
+    params.w = w
+
+def handle_resize(output_buffer, state):
+    if not state.resize_dirty:
+        return
+    state.resize_dirty = False
+
+    output_buffer.resize(state.params.width, state.params.height)
+
+def update_state(output_buffer, state):
+    handle_camera_update(state)
+    handle_resize(output_buffer, state)
+
+def launch_subframe(output_buffer, state):
+    state.params.frame_buffer = output_buffer.map()
+
+    state.pipeline.launch(state.sbt, dimensions=state.launch_dimensions,
+            params=state.params.handle, stream=output_buffer.stream)
+
+    output_buffer.unmap()
+
+def display_subframe(output_buffer, gl_display, window):
+    (framebuf_res_x, framebuf_res_y) = glfw.get_framebuffer_size(window)
+    gl_display.display( output_buffer.width, output_buffer.height,
+                        framebuf_res_x, framebuf_res_y,
+                        output_buffer.get_pbo() )
+
+def init_camera_state(state):
+    camera = state.camera
+    camera.eye = (0, 1, -20)
+    camera.look_at = (0, 0, 0)
+    camera.up = (0, 1, 0)
+    camera.fov_y = 35
+    camera_changed = True
+
+    trackball = state.trackball
+    trackball.move_speed = 10.0
+    trackball.set_reference_frame([1,0,0], [0,0,1], [0,1,0])
+    trackball.reinitialize_orientation_from_camera()
+
+def create_context(state):
+    logger = ox.Logger(log)
+    ctx = ox.DeviceContext(validation_mode=False, log_callback_function=logger, log_callback_level=4)
+    ctx.cache_enabled = False
+    state.ctx = ctx
+
+def generate_animated_vertices(out_vertices, animation_mode, time, width, height):
+    threads_per_block = 128
+    num_blocks = (width*height + threads_per_block - 1) // threads_per_block
+
+    args = (out_vertices, np.int32(animation_mode.value), np.float32(time), np.int32(width), np.int32(height))
+
+    state.generate_vertices_kernel(grid=(num_blocks,1,1), block=(threads_per_block,1,1), args=args)
+
+
+def launch_generate_animated_vertices(state, animation_mode):
+    generate_animated_vertices(state.d_temp_vertices, animation_mode, state.time, g_tessellation_resolution, g_tessellation_resolution)
+
+def update_mesh_accel(state):
+    # first sphere is static
+
+    # second sphere moves by updating its transform matrix
+    transform = state.ias_build_input.get_transform_view(1)
+    transform[1,-1] = np.sin(4*state.time)
+
+    # third sphere deforms
+    launch_generate_animated_vertices(state, AnimationMode.DEFORM)
+    state.deforming_gas.update(state.gas_build_input)
+
+    # fourth sphere explodes
+    launch_generate_animated_vertices(state, AnimationMode.EXPLODE)
+
+    # we occasionally rebuild the exploding sphere to maintain AS quality
+    if state.time - state.last_exploding_sphere_rebuild_time > 1 / g_exploding_gas_rebuild_frequency:
+        state.last_exploding_sphere_rebuild_time = state.time
+        state.exploding_gas = ox.AccelerationStructure(state.ctx, state.gas_build_input,
+                compact=True, allow_update=True, random_vertex_access=True)
+        state.ias_build_input.instances[3].update_traversable(state.exploding_gas)
+        state.ias_build_input.update_instance(3)
+    else:
+        state.exploding_gas.update(state.gas_build_input)
+
+    state.ias.update(state.ias_build_input)
+
+def build_vertex_generation_kernel(state):
+    cuda_source = os.path.join(script_dir, 'cuda', 'dynamic_geometry_vertex_generation.cu')
+    example_include_path = os.path.dirname(cuda_source)
+
+    build_flags = ox.module.get_default_nvrtc_compile_flags() + (f'-I{example_include_path}',)
+
+    with open(cuda_source, 'r') as f:
+        code = f.read()
+
+    state.generate_vertices_kernel = cp.RawKernel(code=code, backend='nvrtc',
+            options=build_flags, name='generate_vertices')
+
+def build_mesh_accel(state):
+    # Allocate temporary space for vertex generation.
+    # The same memory space is reused for generating the deformed and exploding vertices before updates.
+    num_vertices = g_tessellation_resolution * g_tessellation_resolution * 6
+    state.d_temp_vertices = cp.empty(shape=(num_vertices,3), dtype=np.float32)
+
+    # Build static triangulated sphere.
+    build_vertex_generation_kernel(state)
+    launch_generate_animated_vertices(state, AnimationMode.NONE)
+
+    #V = cp.asnumpy(state.d_temp_vertices)
+    #import trimesh
+    #trimesh.Trimesh(vertices=V, faces=np.arange(V.shape[0]).reshape(-1,3)).show()
+
+    # Build an AS over the triangles.
+    # We use un-indexed triangles so we can explode the sphere per triangle.
+    state.gas_build_input = ox.BuildInputTriangleArray(state.d_temp_vertices, flags=[ox.GeometryFlags.NONE])
+    state.static_gas = ox.AccelerationStructure(state.ctx, state.gas_build_input,
+            compact=True, allow_update=False, random_vertex_access=True)
+
+    state.deforming_gas = ox.AccelerationStructure(state.ctx, state.gas_build_input,
+            compact=True, allow_update=True, random_vertex_access=True)
+
+    state.exploding_gas = ox.AccelerationStructure(state.ctx, state.gas_build_input,
+            compact=True, allow_update=True, random_vertex_access=True)
+
+    traversables = [state.static_gas, state.static_gas,
+                    state.deforming_gas, state.exploding_gas]
+    instances = []
+    for i in range(INST_COUNT):
+        instance = ox.Instance(traversable=traversables[i], instance_id=0, flags=ox.InstanceFlags.NONE,
+                sbt_offset=i, transform=g_instances[i])
+        instances.append(instance)
+
+    state.ias_build_input = ox.BuildInputInstanceArray(instances)
+    state.ias = ox.AccelerationStructure(context=state.ctx,
+            build_inputs=state.ias_build_input, compact=True, allow_update=True)
+    state.params.trav_handle = state.ias.handle
+
+
+def create_module(state):
+    if DEBUG:
+        exception_flags=ox.ExceptionFlags.DEBUG | ox.ExceptionFlags.TRACE_DEPTH | ox.ExceptionFlags.STACK_OVERFLOW,
+    else:
+        exception_flags=ox.ExceptionFlags.NONE
+
+    pipeline_opts = ox.PipelineCompileOptions(
+            uses_motion_blur=False,
+            uses_primitive_type_flags = ox.PrimitiveTypeFlags.TRIANGLE,
+            traversable_graph_flags=ox.TraversableGraphFlags.ALLOW_SINGLE_LEVEL_INSTANCING,
+            exception_flags=exception_flags,
+            num_payload_values=3,
+            num_attribute_values=2,
+            pipeline_launch_params_variable_name="params")
+
+    compile_opts = ox.ModuleCompileOptions(
+            max_register_count=ox.ModuleCompileOptions.DEFAULT_MAX_REGISTER_COUNT,
+            opt_level=ox.CompileOptimizationLevel.DEFAULT,
+            debug_level=ox.CompileDebugLevel.LINEINFO)
+
+    cuda_source = os.path.join(script_dir, 'cuda', 'dynamic_geometry.cu')
+    state.module = ox.Module(state.ctx, cuda_source, compile_opts, pipeline_opts)
+    state.pipeline_opts = pipeline_opts
+
+def create_program_groups(state):
+    ctx, module = state.ctx, state.module
+
+    state.raygen_grp = ox.ProgramGroup.create_raygen(ctx, module, "__raygen__rg")
+    state.miss_grp = ox.ProgramGroup.create_miss(ctx, module, "__miss__ms")
+    state.hit_grp = ox.ProgramGroup.create_hitgroup(ctx, module, entry_function_CH="__closesthit__ch")
+
+def create_pipeline(state):
+    program_grps = [state.raygen_grp, state.miss_grp, state.hit_grp]
+
+    link_opts = ox.PipelineLinkOptions(max_trace_depth=1,
+                                       debug_level=ox.CompileDebugLevel.LINEINFO)
+
+    pipeline = ox.Pipeline(state.ctx,
+                           compile_options=state.pipeline_opts,
+                           link_options=link_opts,
+                           program_groups=program_grps,
+                           max_traversable_graph_depth=2)
+
+    pipeline.compute_stack_sizes(1,  # max_trace_depth
+                                 0,  # max_cc_depth
+                                 0)  # max_dc_depth
+
+    state.pipeline = pipeline
+
+def create_sbt(state):
+    raygen_grp, miss_grp, hit_grp = state.raygen_grp, state.miss_grp, state.hit_grp
+
+    raygen_sbt = ox.SbtRecord(raygen_grp)
+
+    miss_sbt = ox.SbtRecord(miss_grp, names=('bg_color',), formats=('4f4',))
+    miss_sbt['bg_color'] = [0.0, 0.0, 0.0, 0.0]
+
+    hit_groups = [hit_grp]*INST_COUNT
+    hit_sbts = ox.SbtRecord(hit_groups, names=('color',), formats=('3f4',))
+    for i in range(INST_COUNT):
+        hit_sbts['color'][i] = g_diffuse_colors[i]
+
+    state.sbt = ox.ShaderBindingTable(raygen_record=raygen_sbt, miss_records=miss_sbt,
+            hitgroup_records=hit_sbts)
+
+#------------------------------------------------------------------------------
+# Main
+#------------------------------------------------------------------------------
+if __name__ == '__main__':
+    state = DynamicGeometryState()
+    state.params.width = 1024
+    state.params.height = 768
+    state.time = 0.0
+
+    num_frames = 16
+    animation_time = 1.0
+
+    buffer_format = BufferImageFormat.UCHAR4
+    output_buffer_type = CudaOutputBufferType.CUDA_DEVICE
+
+    init_camera_state(state)
+    create_context(state)
+    create_module(state)
+    create_program_groups(state)
+    create_pipeline(state)
+    create_sbt(state)
+    init_launch_params(state)
+    build_mesh_accel(state)
+
+    window, impl = init_ui("optixDynamicGeometry", state.params.width, state.params.height)
+    glfw.set_mouse_button_callback(window, mouse_button_callback)
+    glfw.set_cursor_pos_callback(window, cursor_position_callback)
+    glfw.set_window_size_callback(window, window_size_callback)
+    glfw.set_window_iconify_callback(window, window_iconify_callback)
+    glfw.set_key_callback(window, key_callback)
+    glfw.set_scroll_callback(window, scroll_callback)
+    glfw.set_window_user_pointer(window, state)
+
+    output_buffer = CudaOutputBuffer(output_buffer_type, buffer_format,
+            state.params.width, state.params.height)
+
+    gl_display = GLDisplay(buffer_format)
+
+    state_update_time = 0.0
+    render_time = 0.0
+    display_time = 0.0
+
+    tstart = glfw.get_time()
+
+    state.last_exploding_sphere_rebuild_time = 0.0
+
+    while not glfw.window_should_close(window):
+        t0 = glfw.get_time()
+        glfw.poll_events()
+
+        state.time = glfw.get_time() - tstart
+
+        update_mesh_accel(state)
+
+        update_state(output_buffer, state)
+
+        t1 = glfw.get_time()
+        state_update_time += t1 - t0
+        t0 = t1
+
+        launch_subframe(output_buffer, state)
+        t1 = glfw.get_time()
+        render_time += t1 - t0
+        t0 = t1
+
+        display_subframe(output_buffer, gl_display, window)
+        display_time += t1 - t0
+
+        if display_stats(state_update_time, render_time, display_time):
+            state_update_time = 0.0
+            render_time = 0.0
+            display_time = 0.0
+
+        imgui.render()
+        impl.render(imgui.get_draw_data())
+
+        glfw.swap_buffers(window)
+
+        state.params.subframe_index = state.params.subframe_index.item() + 1
+
+    impl.shutdown()
+    glfw.terminate()
diff --git a/examples/dynamic_materials.py b/examples/dynamic_materials.py
new file mode 100644
index 0000000..cf45988
--- /dev/null
+++ b/examples/dynamic_materials.py
@@ -0,0 +1,399 @@
+import os, sys, logging, collections
+
+import cupy as cp
+import numpy as np
+import optix as ox
+
+import glfw, imgui
+
+from optix.sutil.gui import init_ui, display_text
+from optix.sutil.camera import Camera
+from optix.sutil.gl_display import GLDisplay
+from optix.sutil.cuda_output_buffer import CudaOutputBuffer, CudaOutputBufferType, BufferImageFormat
+
+script_dir = os.path.dirname(os.path.abspath(__file__))
+
+logging.basicConfig(stream=sys.stdout, level=logging.DEBUG)
+log = logging.getLogger()
+
+DEBUG=False
+
+
+class Params:
+    _params = collections.OrderedDict([
+            ('trav_handle',  'u8'),
+            ('image',        'u8'),
+            ('image_width',  'u4'),
+            ('image_height', 'u4'),
+            ('radius',       'f4'),
+            ('cam_eye',      '3f4'),
+            ('camera_u',     '3f4'),
+            ('camera_v',     '3f4'),
+            ('camera_w',     '3f4'),
+        ])
+
+    def __init__(self):
+        self.handle = ox.LaunchParamsRecord(names=tuple(self._params.keys()),
+                                            formats=tuple(self._params.values()))
+
+    def __getattribute__(self, name):
+        if name in Params._params.keys():
+            return self.__dict__['handle'][name]
+        else:
+            return super().__getattribute__(name)
+
+    def __setattr__(self, name, value):
+        if name in Params._params.keys():
+            self.handle[name] = value
+        elif name in {'handle'}:
+            super().__setattr__(name, value)
+        else:
+            raise AttributeError(name)
+
+    def __str__(self):
+        return '\n'.join(f'{k}:  {self.handle[k]}' for k in self._params)
+
+
+class MaterialIndex:
+    def __init__(self, max_index):
+        self._index = 0
+        self._max_index = max_index
+
+    def _get_index(self):
+        return self._index
+    def _set_index(self, value):
+        assert value >= 0, value
+        self._index = int(value % self._max_index)
+    index = property(_get_index, _set_index)
+
+    def nextval(self):
+        self.index = self.index + 1
+        return self.index
+
+
+class SampleState:
+    __slots__ = ['params', 'ctx', 'gas', 'ias', 'module',
+                 'raygen_grp', 'miss_grp', 'hit_grps',
+                 'raygen_sbt', 'miss_sbt', 'hit_sbts',
+                 'sbt', 'pipeline', 'pipeline_opts',
+                 'material_index_0', 'material_index_1', 'material_index_2',
+                 'has_data_changed', 'has_offset_changed', 'has_sbt_changed']
+
+    def __init__(self, width, height):
+        for slot in self.__slots__:
+            setattr(self, slot, None)
+
+        self.params = Params()
+        self.params.image_width = width
+        self.params.image_height = height
+
+        self.material_index_0 = MaterialIndex(3)
+        self.material_index_1 = MaterialIndex(2)
+        self.material_index_2 = MaterialIndex(3)
+        self.has_data_changed = False
+        self.has_offset_changed = False
+        self.has_sbt_changed = False
+
+    @property
+    def launch_dimensions(self):
+        return (int(self.params.image_width), int(self.params.image_height))
+
+
+def key_callback(window, key, scancode, action, mods):
+    state = glfw.get_window_user_pointer(window)
+    if action == glfw.PRESS:
+        if key in {glfw.KEY_Q, glfw.KEY_ESCAPE}:
+            glfw.set_window_should_close(window, True)
+        elif key == glfw.KEY_LEFT:
+            state.has_data_changed = True
+        elif key == glfw.KEY_RIGHT:
+            state.has_sbt_changed = True
+        elif key == glfw.KEY_UP:
+            state.has_offset_changed = True
+
+
+# Transforms for instances - one on the left (sphere 0), one in the center and one on the right (sphere 2).
+transforms = np.asarray([
+    [1, 0, 0, -6,
+     0, 1, 0, 0,
+     0, 0, 1, -10],
+    [1, 0, 0, 0,
+     0, 1, 0, 0,
+     0, 0, 1, -10],
+    [1, 0, 0, 6,
+     0, 1, 0, 0,
+     0, 0, 1, -10],
+], dtype=np.float32).reshape(3,3,4)
+
+# Offsets into SBT for each instance. Hence this needs to be in sync with transforms!
+# The middle sphere has two SBT records, the two other instances have one each.
+sbt_offsets = np.asarray([0, 1, 3], dtype=np.uint32)
+
+g_colors = np.asarray([[1, 0, 0],
+                       [0, 1, 0],
+                       [0, 0, 1]], dtype=np.float32)
+
+##------------------------------------------------------------------------------
+##
+## Helper Functions
+##
+##------------------------------------------------------------------------------
+
+def init_camera(state):
+    camera = Camera()
+    camera.eye = (0, 0, 3)
+    camera.look_at = (0, 0, 0)
+    camera.up = (0, 1, 0)
+    camera.fov_y = 60
+    camera.aspect_ratio = state.params.image_width  / state.params.image_height
+
+    u,v,w = camera.uvw_frame()
+    state.params.camera_u = u
+    state.params.camera_v = v
+    state.params.camera_w = w
+    state.params.cam_eye = camera.eye
+
+def create_context(state):
+    logger = ox.Logger(log)
+    ctx = ox.DeviceContext(validation_mode=True, log_callback_function=logger, log_callback_level=4)
+    ctx.cache_enabled = False
+    state.ctx = ctx
+
+def build_gas(state):
+    aabb = cp.asarray([[-1.5, -1.5, -1.5, 1.5, 1.5, 1.5]], dtype=np.float32)
+    build_input = ox.BuildInputCustomPrimitiveArray([aabb], num_sbt_records=1, flags=[ox.GeometryFlags.NONE])
+    state.gas = ox.AccelerationStructure(state.ctx, [build_input], compact=True)
+    state.params.radius = 1.5
+
+def build_ias(state):
+    instances = []
+    for i in range(transforms.shape[0]):
+        instance = ox.Instance(traversable=state.gas, instance_id=0,
+                sbt_offset=sbt_offsets[i], transform=transforms[i])
+        instances.append(instance)
+
+    build_input = ox.BuildInputInstanceArray(instances)
+    state.ias = ox.AccelerationStructure(context=state.ctx, build_inputs=build_input)
+    state.params.trav_handle = state.ias.handle
+
+def create_module(state):
+    if DEBUG:
+        exception_flags=ox.ExceptionFlags.DEBUG | ox.ExceptionFlags.TRACE_DEPTH | ox.ExceptionFlags.STACK_OVERFLOW
+    else:
+        exception_flags=ox.ExceptionFlags.NONE
+
+    pipeline_opts = ox.PipelineCompileOptions(
+            uses_motion_blur=False,
+            traversable_graph_flags=ox.TraversableGraphFlags.ALLOW_SINGLE_LEVEL_INSTANCING,
+            uses_primitive_type_flags=ox.PrimitiveTypeFlags.CUSTOM,
+            num_payload_values=3,
+            num_attribute_values=3,
+            exception_flags=exception_flags,
+            pipeline_launch_params_variable_name="params")
+
+    compile_opts = ox.ModuleCompileOptions(
+            max_register_count=ox.ModuleCompileOptions.DEFAULT_MAX_REGISTER_COUNT,
+            opt_level=ox.CompileOptimizationLevel.DEFAULT,
+            debug_level=ox.CompileDebugLevel.LINEINFO)
+
+    source = os.path.join(script_dir, 'cuda', 'dynamic_materials.cu')
+    state.module = ox.Module(state.ctx, source, compile_opts, pipeline_opts)
+    state.pipeline_opts = pipeline_opts
+
+def create_program_groups(state):
+    ctx, module = state.ctx, state.module
+
+    state.raygen_grp = ox.ProgramGroup.create_raygen(ctx, module, "__raygen__rg")
+    state.miss_grp = ox.ProgramGroup.create_miss(ctx, module, "__miss__ms")
+
+
+    # The left sphere has a single CH program
+    # The middle sphere toggles between two CH programs
+    # The right sphere uses the g_material_index_2.index'th of these CH programs
+    ch_names = ('__closesthit__ch' ,
+                '__closesthit__ch', '__closesthit__normal',
+                '__closesthit__blue', '__closesthit__green', '__closesthit__red')
+
+    hit_grps = []
+    for ch_name in ch_names:
+        hit_grp = ox.ProgramGroup.create_hitgroup(ctx, module,
+                                                  entry_function_CH=ch_name,
+                                                  entry_function_IS='__intersection__is')
+        hit_grps.append(hit_grp)
+
+    state.hit_grps = hit_grps
+
+def create_pipeline(state):
+    program_grps = [state.raygen_grp, state.miss_grp] + state.hit_grps
+
+    link_opts = ox.PipelineLinkOptions(max_trace_depth=1,
+                                       debug_level=ox.CompileDebugLevel.FULL)
+
+    pipeline = ox.Pipeline(state.ctx,
+                           compile_options=state.pipeline_opts,
+                           link_options=link_opts,
+                           program_groups=program_grps,
+                           max_traversable_graph_depth=2)
+
+    pipeline.compute_stack_sizes(1,  # max_trace_depth
+                                 0,  # max_cc_depth
+                                 0)  # max_dc_depth
+
+    state.pipeline = pipeline
+
+def create_sbt(state):
+    raygen_grp, miss_grp, hit_grps = state.raygen_grp, state.miss_grp, state.hit_grps
+
+    raygen_sbt = ox.SbtRecord(raygen_grp)
+
+    miss_sbt = ox.SbtRecord(miss_grp, names=('color',), formats=('3f4',))
+    miss_sbt['color'] = [0.3, 0.1, 0.2]
+
+    hit_groups = [hit_grps[0], hit_grps[1], hit_grps[2], hit_grps[state.material_index_2.index + 3]]
+    hit_sbts = ox.SbtRecord(hit_groups, names=('color', 'idx'), formats=('3f4', 'u4'))
+
+    # The left sphere cycles through three colors by updating the data field of the SBT record.
+    hit_sbts['color'][0] = g_colors[0]
+    hit_sbts['idx'][0] = np.uint32(0)
+
+    # The middle sphere toggles between two SBT records by adjusting the SBT
+    # offset field of the sphere instance. The IAS needs to be rebuilt for the
+    # update to take effect.
+    hit_sbts['color'][1] = g_colors[1]
+    hit_sbts['idx'][1] = np.uint32(1)
+
+    hit_sbts['color'][2] = g_colors[1]
+    hit_sbts['idx'][2] = np.uint32(1)
+
+    # The right sphere cycles through colors by modifying the SBT. On update, a
+    # different prebuilt CH program is packed into the corresponding SBT
+    # record.
+    hit_sbts['color'][3] = [0,0,0]
+    hit_sbts['idx'][3] = np.uint32(2)
+
+    state.raygen_sbt = raygen_sbt
+    state.miss_sbt = miss_sbt
+    state.hit_sbts = hit_sbts
+
+    state.sbt = ox.ShaderBindingTable(raygen_record=raygen_sbt, miss_records=miss_sbt,
+            hitgroup_records=hit_sbts)
+
+
+def update_state(output_buffer, state):
+    # Change the material properties using one of three different approaches.
+    if state.has_data_changed:
+        update_hit_group_data(state)
+    if state.has_offset_changed:
+        update_instance_offset(state)
+    if state.has_sbt_changed:
+        update_sbt_header(state)
+
+def update_hit_group_data(state):
+    # Method 1:
+    # Change the material parameters for the left sphere by directly modifying
+    # the HitGroupData for the first SBT record.
+
+    # Cycle through three base colors.
+    material_index = state.material_index_0.nextval()
+
+    # Update the data field of the SBT record for the left sphere with the new base color.
+    state.hit_sbts['color'][0] = g_colors[material_index]
+    state.sbt = ox.ShaderBindingTable(raygen_record=state.raygen_sbt, miss_records=state.miss_sbt,
+            hitgroup_records=state.hit_sbts)
+
+    state.has_data_changed = False
+
+def update_instance_offset(state):
+    # Method 2:
+    # Update the SBT offset of the middle sphere. The offset is used to select
+    # an SBT record during traversal, which dertermines the CH & AH programs
+    # that will be invoked for shading.
+
+    material_index = state.material_index_1.nextval()
+    sbt_offsets[1] = 1 + material_index
+
+    # It's necessary to rebuild the IAS for the updated offset to take effect.
+    build_ias(state)
+
+    state.has_offset_changed = False
+
+def update_sbt_header(state):
+    # Method 3:
+    # Select a new material by re-packing the SBT header for the right sphere
+    # with a different CH program.
+
+    # The right sphere will use the next compiled program group.
+    material_index = state.material_index_2.nextval()
+
+    state.hit_sbts.update_program_group(3, state.hit_grps[3 + material_index])
+
+    state.sbt = ox.ShaderBindingTable(raygen_record=state.raygen_sbt, miss_records=state.miss_sbt,
+            hitgroup_records=state.hit_sbts)
+
+    state.has_sbt_changed = False
+
+def launch(state, output_buffer):
+    state.params.image = output_buffer.map()
+
+    state.pipeline.launch(state.sbt, dimensions=state.launch_dimensions,
+            params=state.params.handle, stream=output_buffer.stream)
+
+    output_buffer.unmap()
+
+def display(output_buffer, gl_display, window):
+    (framebuf_res_x, framebuf_res_y) = glfw.get_framebuffer_size(window)
+    gl_display.display( output_buffer.width, output_buffer.height,
+                        framebuf_res_x, framebuf_res_y,
+                        output_buffer.get_pbo() )
+
+
+def display_usage():
+    usage = """Use the arrow keys to modify the materials
+  [LEFT]  left sphere
+  [UP]    middle sphere
+  [RIGHT] right sphere"""
+
+    imgui.new_frame()
+    display_text(usage, 20.0, 20.0)
+    imgui.end_frame()
+
+if __name__ == '__main__':
+    state = SampleState(1024, 768)
+
+    buffer_format = BufferImageFormat.UCHAR4
+    output_buffer_type = CudaOutputBufferType.CUDA_DEVICE
+
+    init_camera(state)
+    create_context(state)
+    build_gas(state)
+    build_ias(state)
+    create_module(state)
+    create_program_groups(state)
+    create_pipeline(state)
+    create_sbt(state)
+
+    window, impl = init_ui("optixDynamicMaterials", state.params.image_width, state.params.image_height)
+
+    glfw.set_key_callback(window, key_callback)
+    glfw.set_window_user_pointer(window, state)
+
+    output_buffer = CudaOutputBuffer(output_buffer_type, buffer_format,
+            state.params.image_width, state.params.image_height)
+
+    gl_display = GLDisplay(buffer_format)
+
+    while not glfw.window_should_close(window):
+        glfw.poll_events()
+
+        update_state(output_buffer, state)
+        launch(state, output_buffer)
+        display(output_buffer, gl_display, window)
+        display_usage()
+
+        imgui.render()
+        impl.render(imgui.get_draw_data())
+        glfw.swap_buffers(window)
+
+    impl.shutdown()
+    glfw.terminate()
+
diff --git a/examples/hello.py b/examples/hello.py
index 1973b0e..0370d2b 100644
--- a/examples/hello.py
+++ b/examples/hello.py
@@ -1,15 +1,20 @@
+import os, sys, logging
+
 import optix as ox
 import cupy as cp
 import numpy as np
+
 from PIL import Image, ImageOps
-import logging
-import sys
+
+script_dir = os.path.dirname(os.path.abspath(__file__))
+
 logging.basicConfig(stream=sys.stdout, level=logging.DEBUG)
 log = logging.getLogger()
 
 def create_module(ctx, pipeline_opts):
     compile_opts = ox.ModuleCompileOptions(debug_level=ox.CompileDebugLevel.LINEINFO)
-    module = ox.Module(ctx, 'cuda/hello.cu', compile_opts, pipeline_opts)
+    source = os.path.join(script_dir, 'cuda', 'hello.cu')
+    module = ox.Module(ctx, source, compile_opts, pipeline_opts)
     return module
 
 
diff --git a/examples/spheres.py b/examples/spheres.py
index 0adf57a..20acf42 100644
--- a/examples/spheres.py
+++ b/examples/spheres.py
@@ -1,14 +1,17 @@
-import optix as ox
+import os, sys, logging
+
 import cupy as cp
 import numpy as np
+import optix as ox
+
 from PIL import Image, ImageOps
-import logging
-import sys
+
+script_dir = os.path.dirname(os.path.abspath(__file__))
+
 logging.basicConfig(stream=sys.stdout, level=logging.DEBUG)
 log = logging.getLogger()
 img_size = (1024, 768)
 
-
 def compute_spheres_bbox(centers, radii):
     out = cp.empty((centers.shape[0], 6), dtype='f4')
     out[:, :3] = centers - radii.reshape(-1, 1)
@@ -24,7 +27,8 @@ def create_acceleration_structure(ctx, bboxes):
 
 def create_module(ctx, pipeline_opts):
     compile_opts = ox.ModuleCompileOptions(debug_level=ox.CompileDebugLevel.LINEINFO)
-    module = ox.Module(ctx, 'cuda/spheres.cu', compile_opts, pipeline_opts)
+    source = os.path.join(script_dir, 'cuda', 'spheres.cu')
+    module = ox.Module(ctx, source, compile_opts, pipeline_opts)
     return module
 
 
@@ -43,7 +47,7 @@ def create_pipeline(ctx, program_grps, pipeline_options):
     pipeline = ox.Pipeline(ctx, compile_options=pipeline_options, link_options=link_opts, program_groups=program_grps)
     pipeline.compute_stack_sizes(1,  # max_trace_depth
                                  0,  # max_cc_depth
-                                 1)  # max_dc_depth
+                                 0)  # max_dc_depth
     return pipeline
 
 
diff --git a/examples/triangle.py b/examples/triangle.py
index e3517d3..d3d9159 100644
--- a/examples/triangle.py
+++ b/examples/triangle.py
@@ -1,8 +1,13 @@
-import optix as ox
+import os
+
 import cupy as cp
 import numpy as np
+import optix as ox
+
 from PIL import Image, ImageOps
 
+script_dir = os.path.dirname(os.path.abspath(__file__))
+
 img_size = (1024, 768)
 
 # use a regular function for logging
@@ -19,7 +24,8 @@ def create_acceleration_structure(ctx, vertices):
 
 def create_module(ctx, pipeline_opts):
     compile_opts = ox.ModuleCompileOptions(debug_level=ox.CompileDebugLevel.LINEINFO)
-    module = ox.Module(ctx, 'cuda/triangle.cu', compile_opts, pipeline_opts)
+    source = os.path.join(script_dir, 'cuda', 'triangle.cu')
+    module = ox.Module(ctx, source, compile_opts, pipeline_opts)
     return module
 
 
@@ -118,5 +124,3 @@ def launch_pipeline(pipeline : ox.Pipeline, sbt, gas):
     img = img.reshape(img_size[1], img_size[0], 4)
     img = ImageOps.flip(Image.fromarray(img, 'RGBA'))
     img.show()
-
-
diff --git a/optix/build.pxd b/optix/build.pxd
index ac24bb3..854612a 100644
--- a/optix/build.pxd
+++ b/optix/build.pxd
@@ -183,7 +183,6 @@ cdef extern from "optix.h" nogil:
         unsigned int flags
         OptixTraversableHandle traversableHandle
 
-
     OptixResult optixAccelComputeMemoryUsage(OptixDeviceContext context,
                                  const OptixAccelBuildOptions * accelOptions,
                                  const OptixBuildInput * buildInputs,
@@ -282,7 +281,7 @@ cdef class Instance(OptixObject):
 
 cdef class BuildInputInstanceArray(BuildInputArray):
     cdef OptixBuildInputInstanceArray build_input
-    cdef object instances
+    cdef public object instances
     cdef object _d_instances
 
 
@@ -295,4 +294,4 @@ cdef class AccelerationStructure(OptixContextObject):
     cdef OptixTraversableHandle _handle
     cdef void _init_build_inputs(self, build_inputs, vector[OptixBuildInput]& ret)
     cdef void _init_accel_options(self, size_t num_build_inputs, unsigned int build_flags, OptixBuildOperation operation, vector[OptixAccelBuildOptions]& ret)
-    cdef void build(self, build_inputs, stream=*)
\ No newline at end of file
+    cdef void build(self, build_inputs, stream=*)
diff --git a/optix/build.pyx b/optix/build.pyx
index c594ae9..fb42afc 100644
--- a/optix/build.pyx
+++ b/optix/build.pyx
@@ -250,7 +250,9 @@ cdef class BuildInputCustomPrimitiveArray(BuildInputArray):
 
         self.build_input.aabbBuffers = self._d_aabb_buffer_ptrs.const_data()
         self.build_input.numPrimitives = shape[0]
-        self.build_input.strideInBytes = self._d_aabb_buffers[0].strides[0]
+        
+        # https://github.com/cupy/cupy/issues/5897
+        self.build_input.strideInBytes = 6*np.float32().itemsize
 
         self._flags.resize(num_sbt_records)
         if flags is None:
@@ -429,18 +431,24 @@ cdef class Instance(OptixObject):
         self.instance.instanceId = instance_id
         self.instance.flags = flags.value
         self.instance.sbtOffset = sbt_offset
-        visibility_mask = int(visibility_mask) if visibility_mask is not None else (2**(sizeof(unsigned int) * 8) - 1)
+
+        max_visibility_mask_bits = self.traversable.context.num_bits_instances_visibility_mask
+        visibility_mask = int(visibility_mask) if visibility_mask is not None else (2**max_visibility_mask_bits - 1)
         if visibility_mask.bit_length() > self.traversable.context.num_bits_instances_visibility_mask:
-            raise ValueError(f"Too many entries in visibility mask. Got {visibility_mask.bit_length()} but supported are only {self.traversable.context.num_bits_instances_visibility_mask}")
+            raise ValueError(f"Too many entries in visibility mask. Got {visibility_mask.bit_length()} but supported are only {max_visibility_mask_bits}")
         self.instance.visibilityMask = visibility_mask
 
+    def update_traversable(self, AccelerationStructure traversable):
+        self.traversable = traversable
+        self.instance.traversableHandle = self.traversable.handle
+
     def __deepcopy__(self, memodict={}):
         from copy import deepcopy
         cls = self.__class__
         result = cls.__new__(cls)
         memodict[id(self)] = result
-        result._instance = self.instance
-        result._traversable = deepcopy(self.traversable)
+        result.instance = self.instance
+        result.traversable = deepcopy(self.traversable)
 
         return result
 
@@ -477,7 +485,15 @@ cdef class BuildInputInstanceArray(BuildInputArray):
 
     cdef size_t num_elements(self):
         return self.build_input.numInstances
+    
+    def update_instance(self, index):
+        src_ptr = <size_t>&((<Instance>(self.instances[index])).instance)
+        dst_ptr = self._d_instances.ptr + index*sizeof(OptixInstance)
+        cp.cuda.runtime.memcpy(dst_ptr, src_ptr, sizeof(OptixInstance), cp.cuda.runtime.memcpyHostToDevice)
 
+    def get_transform_view(self, index):
+        device_ptr = cp.cuda.MemoryPointer(mem=self._d_instances.mem, offset=<int>index*sizeof(OptixInstance))
+        return cp.ndarray(shape=(3,4), dtype=np.float32, memptr=device_ptr)
 
 
 cdef class AccelerationStructure(OptixContextObject):
@@ -738,7 +754,7 @@ cdef class AccelerationStructure(OptixContextObject):
         result._build_flags = self._build_flags
         result._buffer_sizes = self._buffer_sizes
         result._instances = deepcopy(self._instances) # copy all instances and their AccelerationStructures first
-
+    
         buffer_size = round_up(self._buffer_sizes.outputSizeInBytes, 8) + 8
         result._gas_buffer = cp.cuda.alloc(buffer_size)
         cp.cuda.runtime.memcpy(result._gas_buffer.ptr, self._gas_buffer.ptr, buffer_size, cp.cuda.runtime.memcpyDeviceToDevice)
diff --git a/optix/module.pxd b/optix/module.pxd
index 140c030..45509d2 100644
--- a/optix/module.pxd
+++ b/optix/module.pxd
@@ -72,4 +72,5 @@ cdef class Module(OptixContextObject):
     cdef OptixModule module
     cdef list _compile_flags
 
-    #cpdef size_t c_obj(self)
\ No newline at end of file
+    #cpdef size_t c_obj(self)
+
diff --git a/optix/module.pyx b/optix/module.pyx
index 93fe88e..7dd3326 100644
--- a/optix/module.pyx
+++ b/optix/module.pyx
@@ -63,6 +63,12 @@ cdef class ModuleCompileOptions(OptixObject):
 
 cdef tuple _nvrtc_compile_flags_default = ('-use_fast_math', '-lineinfo', '-default-device', '-std=c++11', '-rdc', 'true')
 
+def get_default_nvrtc_compile_flags(std=None, rdc=False):
+    flags = list(_nvrtc_compile_flags_default[:-3])
+    flags.append('-std=c++11' if std is None else f'-std=c++{std}')
+    if rdc:
+        flags.extend(['-rdc', 'true'])
+    return tuple(flags)
 
 cdef _is_ptx(src):
     if not isinstance(src, (bytes, bytearray)):
diff --git a/optix/pipeline.pxd b/optix/pipeline.pxd
index 0e70650..ceaac1d 100644
--- a/optix/pipeline.pxd
+++ b/optix/pipeline.pxd
@@ -171,4 +171,4 @@ cdef class Pipeline(OptixContextObject):
                                                  ProgramGroup program_group_miss_1,
                                                  object program_groups_closesthit_1,
                                                  ProgramGroup program_group_miss_2,
-                                                 object program_groups_closesthit_2)
\ No newline at end of file
+                                                 object program_groups_closesthit_2)
diff --git a/optix/pipeline.pyx b/optix/pipeline.pyx
index d3d41ae..b3a3142 100644
--- a/optix/pipeline.pyx
+++ b/optix/pipeline.pyx
@@ -76,7 +76,7 @@ cdef class PipelineCompileOptions(OptixObject):
         self.compile_options.numAttributeValues = num_attribute_values
         self.compile_options.exceptionFlags = exception_flags.value
         self.pipeline_launch_params_variable_name = pipeline_launch_params_variable_name
-        self.compile_options.usesPrimitiveTypeFlags = uses_primitive_type_flags.value
+        self.compile_options.usesPrimitiveTypeFlags = <int>(uses_primitive_type_flags.value)
 
     @property
     def uses_motion_blur(self):
@@ -420,4 +420,4 @@ cdef class Pipeline(OptixContextObject):
         cdef const OptixShaderBindingTable* c_sbt = &sbt.sbt
 
         with nogil:
-            optix_check_return(optixLaunch(self.pipeline, <CUstream>c_stream, d_params_ptr, c_itemsize, c_sbt, c_dims[0], c_dims[1], c_dims[2]))
\ No newline at end of file
+            optix_check_return(optixLaunch(self.pipeline, <CUstream>c_stream, d_params_ptr, c_itemsize, c_sbt, c_dims[0], c_dims[1], c_dims[2]))
diff --git a/optix/program_group.pyx b/optix/program_group.pyx
index cfc01a9..2d95c24 100644
--- a/optix/program_group.pyx
+++ b/optix/program_group.pyx
@@ -178,7 +178,7 @@ cdef class ProgramGroup(OptixContextObject):
         context: DeviceContext
             The context to use for this ProgramGroup.
         module: Module
-            The module containig the raygen function.
+            The module containing the raygen function.
         entry_function_name: str
             The name of the raygen function in the module.
 
@@ -199,7 +199,7 @@ cdef class ProgramGroup(OptixContextObject):
         context: DeviceContext
             The context to use for this ProgramGroup.
         module: Module
-            The module containig the miss function.
+            The module containing the miss function.
         entry_function_name: str
             The name of the miss function in the module.
 
@@ -220,7 +220,7 @@ cdef class ProgramGroup(OptixContextObject):
         context: DeviceContext
             The context to use for this ProgramGroup.
         module: Module
-            The module containig the exception function.
+            The module containing the exception function.
         entry_function_name: str
             The name of the exception function in the module.
 
@@ -361,4 +361,4 @@ cdef class ProgramGroup(OptixContextObject):
 
     @property
     def kind(self):
-        return self._kind
\ No newline at end of file
+        return self._kind
diff --git a/optix/struct.pxd b/optix/struct.pxd
index df12b10..e68a783 100644
--- a/optix/struct.pxd
+++ b/optix/struct.pxd
@@ -18,9 +18,9 @@ cdef class _StructHelper(OptixObject):
 
 
 cdef class SbtRecord(_StructHelper):
-    cdef ProgramGroup program_group
+    cdef list program_groups
     cdef str header_format
 
 
 cdef class LaunchParamsRecord(_StructHelper):
-    pass
\ No newline at end of file
+    pass
diff --git a/optix/struct.pyx b/optix/struct.pyx
index 9f35e5f..7f1e53f 100644
--- a/optix/struct.pyx
+++ b/optix/struct.pyx
@@ -22,7 +22,6 @@ def  _aligned_itemsize( formats, alignment ):
         } )
     return round_up( temp_dtype.itemsize, alignment )
 
-
 def array_to_device_memory(numpy_array, stream=None):
     """
     Transfer a numpy array to cuda device memory. This does not generate a full cupy.ndarray, but an
@@ -262,15 +261,22 @@ cdef class SbtRecord(_StructHelper):
     All options are the same as in the base class.
 .   The alignment parameter is ignored though and only present for the interface.
     """
-    def __init__(self, ProgramGroup program_group, names=(), formats=(), values=None, size=1, alignment=1):
+    def __init__(self, program_groups, names=(), formats=(), values=None):
+        program_groups = list(ensure_iterable(program_groups))
         names = ensure_iterable(names)
         formats = ensure_iterable(formats)
+        
+        if not all(isinstance(p, ProgramGroup) for p in program_groups):
+            raise TypeError("Only program groups")
+        
+        cdef unsigned int num_program_groups = len(program_groups)
+        
+        self.program_groups = program_groups
 
-        self.program_group = program_group
         header_format = '{}B'.format(OPTIX_SBT_RECORD_HEADER_SIZE)
         names = ('header',) + names
         formats = (header_format,) + formats
-        super().__init__(names, formats, values=values, size=size, alignment=OPTIX_SBT_RECORD_ALIGNMENT)
+        super().__init__(names, formats, values=values, size=num_program_groups, alignment=OPTIX_SBT_RECORD_ALIGNMENT)
 
     @cython.boundscheck(False)
     @cython.wraparound(False)
@@ -280,9 +286,18 @@ cdef class SbtRecord(_StructHelper):
         cdef size_t size = array.shape[0]
         cdef unsigned char[:, ::1] buffer =  array.view('B').reshape(-1, itemsize)
         for i in range(size):
-            optixSbtRecordPackHeader(self.program_group.program_group, <void *>(&buffer[i, 0]))
+            optixSbtRecordPackHeader((<ProgramGroup>self.program_groups[i]).program_group, <void *>(&buffer[i, 0]))
         return array
 
+    def update_program_group(self, i, program_group):
+        if not isinstance(program_group, ProgramGroup):
+            raise TypeError("Expected a program group as second argument.")
+        self.program_groups[i] = program_group
+        
+        cdef size_t itemsize = self._array.dtype.itemsize
+        cdef unsigned char[:, ::1] buffer = self._array.view('B').reshape(-1, itemsize)
+        optixSbtRecordPackHeader((<ProgramGroup>self.program_groups[<size_t>i]).program_group, <void *>(&buffer[<size_t>i, 0]))
+
 
 cdef class LaunchParamsRecord(_StructHelper):
     """
diff --git a/optix/sutil/__init__.py b/optix/sutil/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/optix/sutil/camera.py b/optix/sutil/camera.py
new file mode 100644
index 0000000..1697323
--- /dev/null
+++ b/optix/sutil/camera.py
@@ -0,0 +1,46 @@
+import numpy as np
+
+from optix.sutil.vecmath import length, normalize, cross
+from optix.sutil.properties import get_member, set_float, set_float3
+
+class Camera:
+    """Implements a perspective camera."""
+
+    __slots__ = ['_eye', '_look_at', '_up', '_fov_y', '_aspect_ratio']
+
+    def __init__(self, eye=None, look_at=None, up=None, fov_y=None, aspect_ratio=None):
+        self.eye = eye
+        self.look_at = look_at
+        self.up = up
+        self.fov_y = fov_y
+        self.aspect_ratio = aspect_ratio
+
+    eye = property(get_member("_eye"), set_float3("_eye", 1.0))
+    look_at = property(get_member("_look_at"), set_float3("_look_at", 0.0))
+    up = property(get_member("_up"), set_float3("_up", [0.0,1.0,0.0]))
+
+    fov_y = property(get_member("_fov_y"), set_float("_fov_y", 35.0))
+    aspect_ratio = property(get_member("_aspect_ratio"), set_float("_aspect_ratio", 1.0))
+
+    def _get_direction(self):
+        return normalize(self.look_at - self.eye)
+    def _set_direction(self, value):
+        self.look_at = self.eye + length(self.look_at - self.eye)*value;
+    direction = property(_get_direction, _set_direction)
+
+    def uvw_frame(self):
+        # do not normalize W -- it implies focal length
+        W = self.look_at - self.eye
+        wlen = length(W)
+        assert wlen > 0, (self.eye, self.look_at)
+
+        U = normalize(cross(W, self.up))
+        V = normalize(cross(U, W))
+
+        vlen = wlen * np.tan(0.5 * np.deg2rad(self.fov_y))
+        V *= vlen
+
+        ulen = vlen * self.aspect_ratio
+        U *= ulen
+
+        return (U,V,W)
diff --git a/optix/sutil/cuda_output_buffer.py b/optix/sutil/cuda_output_buffer.py
new file mode 100644
index 0000000..fe5e215
--- /dev/null
+++ b/optix/sutil/cuda_output_buffer.py
@@ -0,0 +1,216 @@
+import enum, re
+
+import numpy as np
+import cupy as cp
+
+import OpenGL.GL as gl
+
+from optix.sutil.vecmath import vtype_to_dtype
+
+class BufferImageFormat(enum.Enum):
+    UCHAR4=0
+    FLOAT3=1
+    FLOAT4=2
+
+    @property
+    def dtype(self):
+        if self is BufferImageFormat.UCHAR4:
+            return vtype_to_dtype('uchar4')
+        elif self is BufferImageFormat.FLOAT3:
+            return vtype_to_dtype('float3')
+        elif self is BufferImageFormat.FLOAT4:
+            return vtype_to_dtype('float4')
+        else:
+            raise NotImplementedError(self)
+
+    @property
+    def itemsize(self):
+        return self.dtype.itemsize
+
+class CudaOutputBufferType(enum.Enum):
+    CUDA_DEVICE = 0, # not preferred, typically slower than ZERO_COPY
+    GL_INTEROP  = 1, # single device only, preferred for single device
+    ZERO_COPY   = 2, # general case, preferred for multi-gpu if not fully nvlink connected
+    CUDA_P2P    = 3, # fully connected only, preferred for fully nvlink connected
+
+
+class CudaOutputBuffer:
+    __slots__ = ['_pixel_format', '_buffer_type', '_width', '_height',
+            '_device', '_device_idx', '_device', '_stream',
+            '_host_buffer', '_device_buffer', '_pbo']
+
+    def __init__(self, buffer_type, pixel_format, width, height, device_idx=0):
+        for attr in self.__slots__:
+            setattr(self, attr, None)
+
+        self.device_idx = device_idx
+        self.pixel_format = pixel_format
+        self.buffer_type = buffer_type
+        self.resize(width, height)
+        self.stream = None
+
+        self._reallocate_buffers()
+
+
+    def resize(self, width, height):
+        self.width = width
+        self.height = height
+
+    def get_host_buffer(self):
+        if buffer_type is CudaOutputBufferType.CUDA_DEVICE:
+            self.copy_device_to_host()
+            return self._host_buffer
+        else:
+            msg = f'Buffer type {buffer_type} has not been implemented yet.'
+            raise NotImplementedError(msg)
+
+    def map(self):
+        self._make_current()
+        if (self._host_buffer is None) or (self._device_buffer is None):
+            self._reallocate_buffers()
+        return self._device_buffer.data.ptr
+
+    def unmap(self):
+        self._make_current()
+        buffer_type = self.buffer_type
+        if buffer_type is CudaOutputBufferType.CUDA_DEVICE:
+            self._stream.synchronize()
+        else:
+            msg = f'Buffer type {buffer_type} has not been implemented yet.'
+            raise NotImplementedError(msg)
+
+    def get_pbo(self):
+        buffer_type = self.buffer_type
+
+        self._make_current()
+
+        if self._pbo is None:
+            self._pbo = gl.glGenBuffers(1)
+
+        if buffer_type is CudaOutputBufferType.CUDA_DEVICE:
+            self.copy_device_to_host()
+            self.copy_host_to_pbo()
+        else:
+            msg = f'Buffer type {buffer_type} has not been implemented yet.'
+            raise NotImplementedError(msg)
+
+        return self._pbo
+
+    def delete_pbo(self):
+        if self._pbo is None:
+            return
+        gl.glBindBuffer(gl.GL_ARRAY_BUFFER, 0)
+        gl.glDeleteBuffers(1, self._pbo)
+        self._pbo = None
+
+    def copy_device_to_host(self):
+        cp.cuda.runtime.memcpy(self._host_buffer.__array_interface__['data'][0],
+                self._device_buffer.data.ptr, self._host_buffer.nbytes, cp.cuda.runtime.memcpyDeviceToHost)
+
+    def copy_host_to_pbo(self):
+        gl.glBindBuffer(gl.GL_ARRAY_BUFFER, self._pbo)
+        gl.glBufferData(gl.GL_ARRAY_BUFFER, self._host_buffer, gl.GL_STREAM_DRAW)
+        gl.glBindBuffer(gl.GL_ARRAY_BUFFER, 0)
+
+    def _make_current(self):
+        self._device.use()
+
+    def _reallocate_buffers(self):
+        buffer_type = self.buffer_type
+
+        dtype = self.pixel_format
+        shape = (self.height, self.width)
+
+        if buffer_type is CudaOutputBufferType.CUDA_DEVICE:
+            self._host_buffer = np.empty(shape=shape, dtype=dtype)
+            self._device_buffer = cp.empty(shape=shape, dtype=dtype)
+            if self._pbo is not None:
+                gl.glBindBuffer(gl.GL_ARRAY_BUFFER, self._pbo)
+                gl.glBufferData(gl.GL_ARRAY_BUFFER, self._host_buffer, gl.GL_STREAM_DRAW)
+                gl.glBindBuffer(gl.GL_ARRAY_BUFFER, 0)
+        else:
+            msg = f'Buffer type {buffer_type} has not been implemented yet.'
+            raise NotImplementedError(msg)
+
+    def _get_pixel_format(self):
+        return self._pixel_format
+    def _set_pixel_format(self, value):
+        if value is None:
+            value = BufferImageFormat.UCHAR4
+        if isinstance(value, BufferImageFormat):
+            value = value.dtype
+        elif isinstance(value, str):
+            value = vtype_to_dtype(value)
+        assert isinstance(value, np.dtype) or issubclass(value, np.generic), value
+        if value != getattr(self, '_pixel_format', None):
+            self._pixel_format = value
+            self._host_buffer = None
+            self._device_buffer = None
+    pixel_format = property(_get_pixel_format, _set_pixel_format)
+
+    def _get_buffer_type(self):
+        return self._buffer_type
+    def _set_buffer_type(self, value):
+        if value is None:
+            value = CudaOutputBufferType.CUDA_DEVICE
+        assert isinstance(value, CudaOutputBufferType), type(value)
+        if value != getattr(self, '_buffer_type', None):
+            self._buffer_type = value
+            self._host_buffer = None
+            self._device_buffer = None
+    buffer_type = property(_get_buffer_type, _set_buffer_type)
+
+    def _get_width(self):
+        return self._width
+    def _set_width(self, value):
+        if value is None:
+            value = 1
+        assert value >= 1, value
+        try:
+            value = np.int32(np.asscalar(value))
+        except AttributeError:
+            value = np.int32(value)
+        if value != getattr(self, '_width', None):
+            self._width = value
+            self._host_buffer = None
+            self._device_buffer = None
+    width = property(_get_width, _set_width)
+
+    def _get_height(self):
+        return self._height
+    def _set_height(self, value):
+        if value is None:
+            value = 1
+        assert value >= 1, value
+        try:
+            value = np.int32(np.asscalar(value))
+        except AttributeError:
+            value = np.int32(value)
+        if value != getattr(self, '_height', None):
+            self._height = value
+            self._host_buffer = None
+            self._device_buffer = None
+    height = property(_get_height, _set_height)
+
+    def _get_device_idx(self):
+        return self._device
+    def _set_device_idx(self, value):
+        if value is None:
+            value = 0
+        assert value >= 0, value
+        value = int(value)
+        if value != getattr(self, '_device_idx', None):
+            self._device_idx = value
+            self._device = cp.cuda.Device(value)
+            self._host_buffer = None
+            self._device_buffer = None
+    device_idx = property(_get_device_idx, _set_device_idx)
+
+    def _get_stream(self):
+        return self._stream
+    def _set_stream(self, value):
+        if value is None:
+            value = cp.cuda.Stream.null
+        assert isinstance(value, cp.cuda.Stream), type(value)
+        self._stream = value
+    stream = property(_get_stream, _set_stream)
diff --git a/optix/sutil/gl_display.py b/optix/sutil/gl_display.py
new file mode 100644
index 0000000..b00ce38
--- /dev/null
+++ b/optix/sutil/gl_display.py
@@ -0,0 +1,139 @@
+import ctypes
+import numpy as np
+
+import OpenGL.GL as gl
+import OpenGL.GL.shaders
+
+from optix.sutil.cuda_output_buffer import BufferImageFormat
+
+class GLDisplay:
+    vert_source = \
+"""
+#version 330 core
+
+layout(location = 0) in vec3 position;
+out vec2 UV;
+
+void main()
+{
+    gl_Position =  vec4(position, 1);
+    UV = (vec2(position.x, position.y) + vec2(1,1)) / 2.0;
+}
+"""
+
+    frag_source = \
+"""
+#version 330 core
+
+in vec2 UV;
+layout(location=0) out vec4 color;
+
+uniform sampler2D render_tex;
+
+void main()
+{
+    color = texture(render_tex, UV).xyzw;
+}
+"""
+
+    quad_vertex_buffer_data = np.asarray([
+        [-1.0, -1.0, 0.0],
+        [ 1.0, -1.0, 0.0],
+        [-1.0,  1.0, 0.0],
+        [-1.0,  1.0, 0.0],
+        [ 1.0, -1.0, 0.0],
+        [ 1.0,  1.0, 0.0],
+        ], dtype=np.float32)
+
+    __slots__ = ['_image_format', '_render_tex', '_program', '_render_tex_uniforloc',
+            '_quad_vertex_buffer', '_image_format']
+
+    def __init__(self, image_format):
+        assert isinstance(image_format, BufferImageFormat), type(fmt)
+
+        vertex_array = gl.glGenVertexArrays(1)
+        gl.glBindVertexArray(vertex_array)
+
+        program = self.create_gl_program()
+        render_tex_uniforloc = gl.glGetUniformLocation(program, "render_tex")
+
+        render_tex = gl.glGenTextures(1)
+        gl.glBindTexture(gl.GL_TEXTURE_2D, render_tex)
+
+        gl.glTexParameteri(gl.GL_TEXTURE_2D, gl.GL_TEXTURE_MAG_FILTER, gl.GL_NEAREST)
+        gl.glTexParameteri(gl.GL_TEXTURE_2D, gl.GL_TEXTURE_MIN_FILTER, gl.GL_NEAREST)
+        gl.glTexParameteri(gl.GL_TEXTURE_2D, gl.GL_TEXTURE_WRAP_S, gl.GL_CLAMP_TO_EDGE)
+        gl.glTexParameteri(gl.GL_TEXTURE_2D, gl.GL_TEXTURE_WRAP_T, gl.GL_CLAMP_TO_EDGE)
+
+        quad_vertex_buffer = gl.glGenBuffers(1)
+        gl.glBindBuffer(gl.GL_ARRAY_BUFFER, quad_vertex_buffer)
+        gl.glBufferData(gl.GL_ARRAY_BUFFER,
+            self.quad_vertex_buffer_data,
+            gl.GL_STATIC_DRAW)
+
+        self._image_format = image_format
+        self._program = program
+        self._render_tex = render_tex
+        self._render_tex_uniforloc = render_tex_uniforloc
+        self._quad_vertex_buffer = quad_vertex_buffer
+
+    @classmethod
+    def create_gl_program(cls):
+        return gl.shaders.compileProgram(
+            gl.shaders.compileShader(cls.vert_source, gl.GL_VERTEX_SHADER),
+            gl.shaders.compileShader(cls.frag_source, gl.GL_FRAGMENT_SHADER),
+        )
+
+    def display(self, screen_res_x, screen_res_y, framebuf_res_x, framebuf_res_y, pbo):
+        gl.glBindFramebuffer(gl.GL_FRAMEBUFFER, 0)
+        gl.glViewport(0, 0, framebuf_res_x, framebuf_res_y)
+        gl.glClear(gl.GL_COLOR_BUFFER_BIT | gl.GL_DEPTH_BUFFER_BIT)
+        gl.glUseProgram(self._program)
+        gl.glPolygonMode(gl.GL_FRONT_AND_BACK, gl.GL_FILL)
+
+        gl.glActiveTexture(gl.GL_TEXTURE0)
+        gl.glBindTexture(gl.GL_TEXTURE_2D, self._render_tex)
+        gl.glBindBuffer(gl.GL_PIXEL_UNPACK_BUFFER, pbo)
+
+        itemsize = self._image_format.itemsize
+        if (itemsize % 8 == 0):
+            gl.glPixelStorei(gl.GL_UNPACK_ALIGNMENT, 8)
+        elif (itemsize % 4 == 0):
+            gl.glPixelStorei(gl.GL_UNPACK_ALIGNMENT, 4)
+        elif (itemsize % 2 == 0):
+            gl.glPixelStorei(gl.GL_UNPACK_ALIGNMENT, 2)
+        else:
+            gl.glPixelStorei(gl.GL_UNPACK_ALIGNMENT, 1)
+
+        convert_to_srgb = True
+
+        image_format = self._image_format
+        if(image_format == BufferImageFormat.UCHAR4):
+            gl.glTexImage2D(gl.GL_TEXTURE_2D, 0, gl.GL_RGBA8, screen_res_x, screen_res_y,
+                            0, gl.GL_RGBA, gl.GL_UNSIGNED_BYTE, None)
+            convert_to_srgb = False
+        elif image_format is BufferImageFormat.FLOAT3:
+            gl.glTexImage2D(gl.GL_TEXTURE_2D, 0, gl.GL_RGB32F, screen_res_x, screen_res_y,
+                            0, gl.GL_RGB, gl.GL_FLOAT, None)
+        elif image_format is BufferImageFormat.FLOAT4:
+            gl.glTexImage2D(gl.GL_TEXTURE_2D, 0, gl.GL_RGBA32F, screen_res_x, screen_res_y,
+                            0, gl.GL_RGBA, gl.GL_FLOAT, None)
+        else:
+            raise NotImplementedError(f"Unknown image format {image_format}.")
+
+        if convert_to_srgb:
+            gl.glEnable(gl.GL_FRAMEBUFFER_SRGB)
+        else:
+            gl.glDisable(gl.GL_FRAMEBUFFER_SRGB)
+
+        gl.glBindBuffer(gl.GL_PIXEL_UNPACK_BUFFER, 0)
+        gl.glUniform1i(self._render_tex_uniforloc, 0)
+
+        # 1st attribute buffer : vertices
+        gl.glEnableVertexAttribArray(0)
+        gl.glBindBuffer(gl.GL_ARRAY_BUFFER, self._quad_vertex_buffer)
+        gl.glVertexAttribPointer(0, 3, gl.GL_FLOAT, gl.GL_FALSE, 0, ctypes.c_void_p(0))
+        gl.glDrawArrays(gl.GL_TRIANGLES, 0, 6)
+        gl.glDisableVertexAttribArray(0)
+
+        gl.glDisable(gl.GL_FRAMEBUFFER_SRGB)
diff --git a/optix/sutil/gui.py b/optix/sutil/gui.py
new file mode 100644
index 0000000..c2a22b3
--- /dev/null
+++ b/optix/sutil/gui.py
@@ -0,0 +1,99 @@
+
+import glfw
+import OpenGL.GL as gl
+
+import imgui
+from imgui.integrations.glfw import GlfwRenderer
+
+def static_vars(**kwargs):
+    """
+    Attach a static variables local to decorated function.
+    """
+    def decorate(f):
+        for k in kwargs:
+            setattr(f, k, kwargs[k])
+        return f
+    return decorate
+
+def init_gl():
+    gl.glClearColor(0.212, 0.271, 0.31, 1.0)
+    gl.glClear(gl.GL_COLOR_BUFFER_BIT)
+
+def init_imgui(window):
+    imgui.create_context()
+    impl = GlfwRenderer(window)
+    impl.io.fonts.add_font_default()
+    imgui.core.style_colors_dark();
+    imgui.get_style().window_border_size = 0.0
+    return impl
+
+def init_ui(window_title, width, height):
+    if not glfw.init():
+        raise RuntimeError("Could not initialize OpenGL context")
+
+    window = glfw.create_window(int(width), int(height), window_title, None, None)
+    glfw.make_context_current(window)
+
+    if not window:
+        raise RuntimeError("Could not initialize Window")
+
+    glfw.swap_interval(0)
+
+    init_gl()
+    impl = init_imgui(window)
+
+    return window, impl
+
+def display_text(text, x, y):
+    imgui.set_next_window_bg_alpha(0.0)
+    imgui.set_next_window_position(x, y)
+
+    flags = (imgui.WINDOW_NO_TITLE_BAR | imgui.WINDOW_NO_RESIZE | imgui.WINDOW_NO_MOVE |
+             imgui.WINDOW_NO_SCROLLBAR | imgui.WINDOW_NO_SAVED_SETTINGS | imgui.WINDOW_NO_INPUTS)
+
+    imgui.begin("TextOverlayFG", None, flags)
+    imgui.push_style_color(imgui.COLOR_TEXT, 0.7, 0.7, 0.7, 1.0)
+    imgui.text(text)
+    imgui.pop_style_color()
+    imgui.end()
+
+@static_vars(total_subframe_count=0, last_update_frames=0,
+        last_update_time=None, display_text="")
+def display_stats(state_update_time, render_time, display_time):
+    display_update_min_interval_time = 0.5
+
+    cur_time = glfw.get_time()
+
+    display_stats.last_update_frames += 1
+    last_update_time = display_stats.last_update_time or cur_time - 0.5
+    last_update_frames = display_stats.last_update_frames
+    total_subframe_count = display_stats.total_subframe_count
+
+    dt = cur_time - last_update_time
+
+    do_update = (dt > display_update_min_interval_time) or (total_subframe_count == 0)
+
+    if do_update:
+        fps = last_update_frames / dt
+        state_ms = 1000.0 * state_update_time / last_update_frames
+        render_ms = 1000.0 * render_time / last_update_frames
+        display_ms = 1000.0 * display_time / last_update_frames
+
+        display_stats.last_update_time = cur_time
+        display_stats.last_update_frames = 0
+
+        display_stats.display_text = \
+f"""{fps:5.1f} fps
+
+state update: {state_ms:8.1f} ms
+render      : {render_ms:8.1f} ms
+display     : {display_ms:8.1f} ms
+"""
+
+    imgui.new_frame()
+    display_text(display_stats.display_text, 10.0, 10.0)
+    imgui.end_frame()
+
+    display_stats.total_subframe_count += 1
+
+    return do_update
diff --git a/optix/sutil/properties.py b/optix/sutil/properties.py
new file mode 100644
index 0000000..e9bead3
--- /dev/null
+++ b/optix/sutil/properties.py
@@ -0,0 +1,58 @@
+import numpy as np
+
+def get_member(varname):
+
+    def getter(self, varname=varname):
+        return getattr(self, varname, None)
+
+    return getter
+
+
+def set_bool(varname, default_value=None):
+
+    def setter(self, value, varname=varname, default_value=default_value):
+        if value is None:
+            value = default_value
+        value = bool(value)
+        setattr(self, varname, value)
+
+    return setter
+
+
+def set_int(varname, default_value=None):
+
+    def setter(self, value, varname=varname, default_value=default_value):
+        if value is None:
+            value = default_value
+        value = np.int32(value)
+        setattr(self, varname, value)
+
+    return setter
+
+
+def set_float(varname, default_value=None):
+
+    def setter(self, value, varname=varname, default_value=default_value):
+        if value is None:
+            value = default_value
+        value = np.float32(value)
+        setattr(self, varname, value)
+
+    return setter
+
+
+def set_float3(varname, default_value=None):
+
+    def setter(self, value, varname=varname, default_value=default_value):
+        if value is None:
+            value = default_value
+
+        if value is None:
+            pass
+        elif np.isscalar(value):
+            value = np.full(shape=(3,), dtype=np.float32, fill_value=value)
+        else:
+            value = np.asarray(value, dtype=np.float32)
+        setattr(self, varname, value)
+
+    return setter
diff --git a/optix/sutil/trackball.py b/optix/sutil/trackball.py
new file mode 100644
index 0000000..8643f27
--- /dev/null
+++ b/optix/sutil/trackball.py
@@ -0,0 +1,199 @@
+import enum
+
+import numpy as np
+
+from optix.sutil.properties import get_member, set_bool, set_float, set_float3
+from optix.sutil.vecmath import dot, length, normalize
+from optix.sutil.camera import Camera
+
+class TrackballViewMode(enum.Enum):
+    EyeFixed = 0
+    LookAtFixed = 1
+
+class Trackball:
+    __slots__ = ['_gimbal_lock','_view_mode', '_camera', '_camera_eye_lookat_distance',
+                 '_zoom_multiplier', '_move_speed', '_roll_speed', '_latitude', '_longitude',
+                 '_previous_position_x', '_previous_position_y', '_perform_tracking',
+                 '_u', '_v', '_w']
+
+    def __init__(self):
+        # initialize all attributes to default values
+        for slot in self.__slots__:
+            setattr(self, slot[1:], None)
+
+    camera_eye_lookat_distance = property(get_member('_camera_eye_lookat_distance'),
+                                          set_float('_camera_eye_lookat_distance', 0.0))
+    zoom_multiplier = property(get_member('_zoom_multiplier'), set_float('_zoom_multiplier', 1.1))
+    move_speed = property(get_member('_move_speed'), set_float('_move_speed', 1.0))
+    roll_speed = property(get_member('_roll_speed'), set_float('_roll_speed', 0.5))
+    latitude = property(get_member('_latitude'), set_float('_latitude', 0.0))
+    longitude = property(get_member('_longitude'), set_float('_longitude', 0.0))
+    previous_position_x = property(get_member('_previous_position_x'), set_float('_previous_position_x', 0))
+    previous_position_y = property(get_member('_previous_position_y'), set_float('_previous_position_y', 0))
+
+    gimbal_lock = property(get_member('_gimbal_lock'), set_bool('_gimbal_lock', False))
+    perform_tracking = property(get_member('_perform_tracking'), set_bool('_perform_tracking', False))
+
+    u = property(get_member("_u"), set_float3("_u", 0.0))
+    v = property(get_member("_v"), set_float3("_v", 0.0))
+    w = property(get_member("_w"), set_float3("_w", 0.0))
+
+    def _get_view_mode(self):
+        return self._view_mode
+    def _set_view_mode(self, view_mode):
+        if view_mode is None:
+            view_mode = TrackballViewMode.LookAtFixed
+        assert isinstance(view_mode, TrackballViewMode), type(view_mode)
+        self._view_mode = view_mode
+    view_mode = property(_get_view_mode, _set_view_mode)
+
+    def _get_camera(self):
+        return self._camera
+    def _set_camera(self, camera):
+        """
+        Set the camera that will be changed according to user input.
+        Warning, this also initializes the reference frame of the trackball from the camera.
+        The reference frame defines the orbit's singularity.
+        """
+        if camera is None:
+            camera = Camera()
+        assert isinstance(camera, Camera), type(camera)
+        self._camera = camera
+        self.reinitialize_orientation_from_camera()
+    camera = property(_get_camera, _set_camera)
+
+    def start_tracking(self, x, y):
+        self.previous_position_x = x
+        self.previous_position_y = y
+        self.perform_tracking = True
+
+    def update_tracking(self, x, y, canvas_width, canvas_height):
+        if not self._perform_tracking:
+            return self.start_tracking(x, y)
+
+        delta_x = x - self.previous_position_x
+        delta_y = y - self.previous_position_y
+
+        if delta_x == 0 and delta_y == 0:
+            return
+
+        self.previous_position_x = x
+        self.previous_position_y = y
+
+        self.latitude = np.deg2rad(min(+89.0, max(-89.0, np.rad2deg(self.latitude) + 0.5*delta_y)))
+        self.longitude = np.deg2rad(np.fmod(np.rad2deg(self.longitude) - 0.5*delta_x, 360.0))
+
+        self._update_camera()
+
+        if not self.gimbal_lock:
+            self.reinitialize_orientation_from_camera()
+            self.camera.up = self.w
+
+    def wheel_event(self, direction):
+        self.zoom(direction)
+        return True
+
+    def zoom(self, direction):
+        zoom = np.float32(1.0/self.zoom_multiplier if direction > 0 else self.zoom_multiplier)
+        self.camera_eye_lookat_distance *= zoom
+
+        look_at = self.camera.look_at
+        eye = self.camera.eye
+        self.camera.eye = look_at + (eye - look_at) * zoom
+
+    def reinitialize_orientation_from_camera(self):
+        """
+        Adopts the reference frame from the camera.
+        Note that the reference frame of the camera usually has a different 'up' than the 'up' of the camera.
+        Though, typically, it is desired that the trackball's reference frame aligns with the actual up of the camera.
+        """
+        u, v, w = self.camera.uvw_frame()
+
+        self.u = normalize(+u)
+        self.v = normalize(-w)
+        self.w = normalize(+v)
+
+        self.latitude = 0.0
+        self.longitude = 0.0
+
+        self.camera_eye_lookat_distance = length(self.camera.look_at - self.camera.eye)
+        assert(self.camera_eye_lookat_distance > 0)
+
+    def set_reference_frame(self, u, v, w):
+        """
+        Specify the frame of the orbit that the camera is orbiting around.
+        The important bit is the 'up' of that frame as this is defines the singularity.
+        Here, 'up' is the 'w' component.
+        Typically you want the up of the reference frame to align with the up of the camera.
+        However, to be able to really freely move around, you can also constantly update
+        the reference frame of the trackball. This can be done by calling reinitOrientationFromCamera().
+        In most cases it is not required though (set the frame/up once, leave it as is).
+        """
+        self.u = u
+        self.v = v
+        self.w = w
+
+        assert length(self.camera.look_at - self.camera.eye) != 0
+        dir_ws = -normalize(self.camera.look_at - self.camera.eye)
+
+        dirx = dot(dir_ws, u)
+        diry = dot(dir_ws, v)
+        dirz = dot(dir_ws, w)
+
+        self.longitude = np.arctan2(dirx, diry)
+        self.latitude = np.arcsin(dirz)
+
+
+    def _update_camera(self):
+        dirx = np.cos(self._latitude)*np.sin(self._longitude)
+        diry = np.cos(self._latitude)*np.cos(self._longitude)
+        dirz = np.sin(self._latitude)
+
+        dir_ws = self.u * dirx + self.v * diry + self.w * dirz
+
+        if self.view_mode is TrackballViewMode.EyeFixed:
+            eye = self.camera.eye
+            self.camera.look_at = eye - dir_ws * self.camera_eye_lookat_distance
+        elif self.view_mode is TrackballViewMode.LookAtFixed:
+            look_at = self.camera.look_at
+            self.camera.eye = look_at + dir_ws * self.camera_eye_lookat_distance
+        else:
+            raise NotImplementedError(self.view_mode)
+
+    def _move_backward(self, speed):
+        dir_ws = normalize(self.camera.look_at - self.camera.eye)
+        self.camera.eye -= dir_ws * speed
+        self.camera.look_at -= dir_ws * speed
+
+    def _move_forward(self, speed):
+        dir_ws = normalize(self.camera.look_at - self.camera.eye)
+        self.camera.eye += dir_ws * speed
+        self.camera.look_at += dir_ws * speed
+
+    def _move_left(self, speed):
+        u = normalize( self.camera.uvw_frame()[0] )
+        self.camera.eye -= u*speed
+        self.camera.look_at -= u*speed
+
+    def _move_right(self, speed):
+        u = normalize( self.camera.uvw_frame()[0] )
+        self.camera.eye += u*speed
+        self.camera.look_at += u*speed
+
+    def _move_down(self, speed):
+        v = normalize( self.camera.uvw_frame()[1] )
+        self.camera.eye -= v*speed
+        self.camera.look_at -= v*speed
+
+    def _move_up(self, speed):
+        v = normalize( self.camera.uvw_frame()[1] )
+        self.camera.eye += v*speed
+        self.camera.look_at += v*speed
+
+    def _roll_right(self, speed):
+        u, v, _ = map(normalize, self.camera.uvw_frame())
+        self.camera.up = u*np.cos(np.deg2rad(90.0 - speed)) + v*np.sin(np.deg2rad(90.0 - speed))
+
+    def _roll_left(self, speed):
+        u, v, _ = map(normalize, self.camera.uvw_frame())
+        self.camera.up = u*np.cos(np.deg2rad(90.0 + speed)) + v*np.sin(np.deg2rad(90.0 + speed))
diff --git a/optix/sutil/vecmath.py b/optix/sutil/vecmath.py
new file mode 100644
index 0000000..dc78e65
--- /dev/null
+++ b/optix/sutil/vecmath.py
@@ -0,0 +1,69 @@
+import re
+
+import numpy as np
+
+cross = np.cross
+
+def dot(a, b):
+    return (a*b).sum(axis=-1)
+
+def length(x):
+    return np.sqrt(dot(x, x))
+
+def normalize(x):
+    l = length(x)
+    assert l>0, x
+    return x/l
+
+def ctype_to_dtype(ctype):
+    _ctype_to_dtype = {
+        'float': np.float32,
+        'double': np.float64,
+        'char': np.int8,
+        'short': np.int16,
+        'int': np.int32,
+        'longlong': np.int64,
+        'uchar': np.uint8,
+        'ushort': np.uint16,
+        'uint': np.uint32,
+        'ulonglong': np.uint64,
+    }
+    ctype = ctype.replace('long int', 'long')
+    ctype = ctype.replace('long long', 'longlong')
+    ctype = ctype.replace('unsigned ', 'u')
+
+    if ctype not in _ctype_to_dtype:
+        msg = "Cannot determine dtype from ctype '{ctype}'."
+        raise ValueError(msg)
+
+    return _ctype_to_dtype[ctype]
+
+
+def vtype_to_dtype(vtype):
+    regexp = re.compile(r'((?:float|double)|u?(?:char|short|int|longlong))(\d*)')
+
+    match = regexp.match(vtype)
+    if not match:
+        msg = "Cannot extract format from '{pformat}'."
+        raise ValueError(msg)
+
+    dtype = ctype_to_dtype(match.group(1))
+
+    count = match.group(2)
+
+    if (count is None):
+        return dtype
+
+    count = int(count)
+
+    if count == 0:
+        return dtype
+
+    if count <= 4:
+        names = tuple('xyzw'[:count])
+        formats = [dtype,]*count
+        vec_dtype = np.dtype(dict(names=names, formats=formats))
+    else:
+        vec_dtype = np.dtype( (dtype, (count,)) )
+
+    return vec_dtype