diff --git a/.gitignore b/.gitignore index 59cf74c..451461d 100644 --- a/.gitignore +++ b/.gitignore @@ -1,10 +1,12 @@ .idea/ __pycache__/ build/ +dist/ +imgui.ini *.egg-info/ .*/ *.so *.html *.cpp *.c -.* \ No newline at end of file +.* diff --git a/examples/cuda/dynamic_geometry.cu b/examples/cuda/dynamic_geometry.cu new file mode 100644 index 0000000..4a7936c --- /dev/null +++ b/examples/cuda/dynamic_geometry.cu @@ -0,0 +1,146 @@ +// +// Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// * Neither the name of NVIDIA CORPORATION nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// + +#include + +#include "dynamic_geometry.h" +#include "vec_math.h" +#include "helpers.h" + +extern "C" { + __constant__ Params params; +} + + +static __forceinline__ __device__ void trace( + OptixTraversableHandle handle, + float3 ray_origin, + float3 ray_direction, + float tmin, + float tmax, + float3* prd +) +{ + unsigned int p0, p1, p2; + p0 = float_as_int( prd->x ); + p1 = float_as_int( prd->y ); + p2 = float_as_int( prd->z ); + optixTrace( + handle, + ray_origin, + ray_direction, + tmin, + tmax, + 0.0f, // rayTime + OptixVisibilityMask( 1 ), + OPTIX_RAY_FLAG_NONE, + 0, // SBT offset + 0, // SBT stride + 0, // missSBTIndex + p0, p1, p2 ); + prd->x = int_as_float( p0 ); + prd->y = int_as_float( p1 ); + prd->z = int_as_float( p2 ); +} + + +static __forceinline__ __device__ void setPayload( float3 p ) +{ + optixSetPayload_0( float_as_int( p.x ) ); + optixSetPayload_1( float_as_int( p.y ) ); + optixSetPayload_2( float_as_int( p.z ) ); +} + + +static __forceinline__ __device__ float3 getPayload() +{ + return make_float3( + int_as_float( optixGetPayload_0() ), + int_as_float( optixGetPayload_1() ), + int_as_float( optixGetPayload_2() ) + ); +} + + +extern "C" __global__ void __raygen__rg() +{ + const uint3 idx = optixGetLaunchIndex(); + const uint3 dim = optixGetLaunchDimensions(); + + const float3 eye = params.eye; + const float3 U = params.U; + const float3 V = params.V; + const float3 W = params.W; + const float2 d = 2.0f * make_float2( + static_cast< float >( idx.x ) / static_cast< float >( dim.x ), + static_cast< float >( idx.y ) / static_cast< float >( dim.y ) + ) - 1.0f; + + const float3 direction = normalize( d.x * U + d.y * V + W ); + float3 payload_rgb = make_float3( 0.5f, 0.5f, 0.5f ); + + trace( params.trav_handle, + eye, + direction, + 0.00f, // tmin + 1e16f, // tmax + &payload_rgb ); + + params.frame_buffer[idx.y * params.width + idx.x] = make_color( payload_rgb ); +} + + +extern "C" __global__ void __miss__ms() +{ + MissData* rt_data = reinterpret_cast< MissData* >( optixGetSbtDataPointer() ); + float3 payload = getPayload(); + setPayload( make_float3( rt_data->bg_color.x, rt_data->bg_color.y, rt_data->bg_color.z ) ); +} + + +extern "C" __global__ void __closesthit__ch() +{ + HitGroupData* rt_data = reinterpret_cast< HitGroupData* >( optixGetSbtDataPointer() ); + + // fetch current triangle vertices + float3 data[3]; + optixGetTriangleVertexData( optixGetGASTraversableHandle(), optixGetPrimitiveIndex(), optixGetSbtGASIndex(), + optixGetRayTime(), data ); + + // compute triangle normal + data[1] -= data[0]; + data[2] -= data[0]; + float3 normal = make_float3( + data[1].y*data[2].z - data[1].z*data[2].y, + data[1].z*data[2].x - data[1].x*data[2].z, + data[1].x*data[2].y - data[1].y*data[2].x ); + const float s = 0.5f / sqrtf( normal.x*normal.x + normal.y*normal.y + normal.z*normal.z ); + + // convert normal to color and store in payload + setPayload( (normal*s + make_float3( 0.5 )) * rt_data->color ); +} diff --git a/examples/cuda/dynamic_geometry.h b/examples/cuda/dynamic_geometry.h new file mode 100644 index 0000000..3e799a3 --- /dev/null +++ b/examples/cuda/dynamic_geometry.h @@ -0,0 +1,56 @@ +// +// Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// * Neither the name of NVIDIA CORPORATION nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// + +struct Params +{ + uchar4* frame_buffer; + unsigned int width; + unsigned int height; + float3 eye, U, V, W; + OptixTraversableHandle trav_handle; + int subframe_index; +}; + +struct RayGenData +{ + float3 cam_eye; + float3 camera_u, camera_v, camera_w; +}; + + +struct MissData +{ + float4 bg_color; +}; + + +struct HitGroupData +{ + float3 color; +}; diff --git a/examples/cuda/dynamic_geometry_vertex_generation.cu b/examples/cuda/dynamic_geometry_vertex_generation.cu new file mode 100644 index 0000000..5b1de7e --- /dev/null +++ b/examples/cuda/dynamic_geometry_vertex_generation.cu @@ -0,0 +1,108 @@ +// +// Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// * Neither the name of NVIDIA CORPORATION nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// + +#include "vec_math.h" + + +enum struct AnimationMode: int +{ + NONE = 0, + DEFORM = 1, + EXPLODE = 2, +}; + + +__forceinline__ __device__ float triangle_wave( float x, float shift = 0.f, float period = 2.f * M_PIf, float amplitude = 1.f ) +{ + return fabsf( fmodf( ( 4.f / period ) * ( x - shift ), 4.f * amplitude ) - 2.f * amplitude ) - amplitude; +} + +__forceinline__ __device__ void write_animated_triangle( float3* out_vertices, int tidx, float3 v0, float3 v1, float3 v2, AnimationMode mode, float time ) +{ + float3 v = make_float3( 0 ); + + if( mode == AnimationMode::EXPLODE ) + { + // Generate displacement vector from triangle index + const float theta = ( (float)M_PIf * ( ( tidx + 1 ) * ( 13 / M_PIf ) ) ); + const float phi = ( (float)( 2.0 * M_PIf ) * ( ( tidx + 1 ) * ( 97 / M_PIf ) ) ); + + // Apply displacement to the sphere triangles + v = make_float3( triangle_wave( phi ) * triangle_wave( theta, M_PIf / 2.f ), + triangle_wave( phi, M_PIf / 2.f ) * triangle_wave( theta, M_PIf / 2.f ), triangle_wave( theta ) ) + * triangle_wave( time, M_PIf / 2.f ) * 2.f; + } + + out_vertices[tidx * 3 + 0] = v0 + v; + out_vertices[tidx * 3 + 1] = v1 + v; + out_vertices[tidx * 3 + 2] = v2 + v; +} + +__forceinline__ __device__ float3 deform_vertex( const float3& c, AnimationMode mode, float time ) +{ + // Apply sine wave to the y coordinate of the sphere vertices + if( mode == AnimationMode::DEFORM ) + return make_float3( c.x, c.y * ( 0.5f + 0.4f * cosf( 4 * ( c.x + time ) ) ), c.z ); + return c; +} + +extern "C" __global__ void generate_vertices(float3* out_vertices, AnimationMode mode, float time, int width, int height) +{ + int idx = blockIdx.x * blockDim.x + threadIdx.x; + + if( idx < width * height ) + { + // generate a single patch (two unindexed triangles) of a tessellated sphere + + int x = idx % width; + int y = idx / width; + + const float theta0 = ( ( float )M_PIf * ( y + 0 ) ) / height; + const float theta1 = ( ( float )M_PIf * ( y + 1 ) ) / height; + const float phi0 = ( ( float )( 2.0 * M_PIf ) * ( x + 0 ) ) / width; + const float phi1 = ( ( float )( 2.0 * M_PIf ) * ( x + 1 ) ) / width; + + const float ct0 = cosf( theta0 ); + const float st0 = sinf( theta0 ); + const float ct1 = cosf( theta1 ); + const float st1 = sinf( theta1 ); + + const float cp0 = cosf( phi0 ); + const float sp0 = sinf( phi0 ); + const float cp1 = cosf( phi1 ); + const float sp1 = sinf( phi1 ); + + const float3 v00 = deform_vertex( make_float3( cp0 * st0, sp0 * st0, ct0 ), mode, time ); + const float3 v10 = deform_vertex( make_float3( cp0 * st1, sp0 * st1, ct1 ), mode, time ); + const float3 v01 = deform_vertex( make_float3( cp1 * st0, sp1 * st0, ct0 ), mode, time ); + const float3 v11 = deform_vertex( make_float3( cp1 * st1, sp1 * st1, ct1 ), mode, time ); + + write_animated_triangle( out_vertices, idx * 2 + 0, v00, v10, v11, mode, time ); + write_animated_triangle( out_vertices, idx * 2 + 1, v00, v11, v01, mode, time ); + } +} diff --git a/examples/cuda/dynamic_materials.cu b/examples/cuda/dynamic_materials.cu new file mode 100644 index 0000000..1822a18 --- /dev/null +++ b/examples/cuda/dynamic_materials.cu @@ -0,0 +1,178 @@ +// +// Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// * Neither the name of NVIDIA CORPORATION nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// + +#include + +#include "dynamic_materials.h" +#include "helpers.h" +#include "vec_math.h" + +extern "C" __constant__ Params params; + +static __forceinline__ __device__ void trace( OptixTraversableHandle handle, float3 ray_origin, float3 ray_direction, float tmin, float tmax, float3* prd ) +{ + unsigned int p0, p1, p2; + p0 = float_as_int( prd->x ); + p1 = float_as_int( prd->y ); + p2 = float_as_int( prd->z ); + optixTrace( handle, ray_origin, ray_direction, tmin, tmax, + 0.0f, // rayTime + OptixVisibilityMask( 1 ), OPTIX_RAY_FLAG_NONE, + 0, // SBT offset + 0, // SBT stride + 0, // missSBTIndex + p0, p1, p2 ); + prd->x = int_as_float( p0 ); + prd->y = int_as_float( p1 ); + prd->z = int_as_float( p2 ); +} + + +static __forceinline__ __device__ void setPayload( float3 p ) +{ + optixSetPayload_0( float_as_int( p.x ) ); + optixSetPayload_1( float_as_int( p.y ) ); + optixSetPayload_2( float_as_int( p.z ) ); +} + + +static __forceinline__ __device__ float3 getPayload() +{ + return make_float3( int_as_float( optixGetPayload_0() ), + int_as_float( optixGetPayload_1() ), + int_as_float( optixGetPayload_2() ) ); +} + + +static __forceinline__ __device__ float3 getShadingNormal() +{ + return make_float3( int_as_float( optixGetAttribute_0() ), + int_as_float( optixGetAttribute_1() ), + int_as_float( optixGetAttribute_2() ) ); +} + + +extern "C" __global__ void __raygen__rg() +{ + const uint3 idx = optixGetLaunchIndex(); + const uint3 dim = optixGetLaunchDimensions(); + + const float3 U = params.camera_u; + const float3 V = params.camera_v; + const float3 W = params.camera_w; + const float2 d = 2.0f * make_float2( static_cast( idx.x ) / static_cast( dim.x ), + static_cast( idx.y ) / static_cast( dim.y ) ) - 1.0f; + + const float3 origin = params.cam_eye; + const float3 direction = normalize( d.x * U + d.y * V + W ); + float3 payload_rgb = make_float3( 0.5f, 0.5f, 0.5f ); + trace( params.trav_handle, origin, direction, + 0.00f, // tmin + 1e16f, // tmax + &payload_rgb ); + + params.image[idx.y * params.image_width + idx.x] = make_color( payload_rgb ); +} + + +extern "C" __global__ void __miss__ms() +{ + MissData* missData = reinterpret_cast( optixGetSbtDataPointer() ); + float3 payload = getPayload(); + setPayload( missData->color ); +} + + +extern "C" __global__ void __intersection__is() +{ + HitGroupData* hgData = reinterpret_cast( optixGetSbtDataPointer() ); + const float3 orig = optixGetObjectRayOrigin(); + const float3 dir = optixGetObjectRayDirection(); + + const float3 center = {0.f, 0.f, 0.f}; + const float3 O = orig - center; + const float l = 1 / length( dir ); + const float3 D = dir * l; + + const float b = dot( O, D ); + const float c = dot( O, O ) - params.radius * params.radius; + const float disc = b * b - c; + if( disc > 0.0f ) + { + const float sdisc = sqrtf( disc ); + const float root1 = ( -b - sdisc ); + + const float root11 = 0.0f; + const float3 shading_normal = ( O + ( root1 + root11 ) * D ) / params.radius; + unsigned int p0, p1, p2; + p0 = float_as_int( shading_normal.x ); + p1 = float_as_int( shading_normal.y ); + p2 = float_as_int( shading_normal.z ); + + optixReportIntersection( root1, // t hit + 0, // user hit kind + p0, p1, p2 ); + } +} + + +__forceinline__ __device__ void closesthit_impl( float3 baseColor ) +{ + float3 normals = normalize( optixTransformNormalFromObjectToWorldSpace( getShadingNormal() ) ) * 0.5f + 0.5f; + + // add material effects + normals *= baseColor; + setPayload( normals ); +} + +extern "C" __global__ void __closesthit__ch() +{ + HitGroupData* hgData = reinterpret_cast( optixGetSbtDataPointer() ); + closesthit_impl( hgData->color ); +} + +extern "C" __global__ void __closesthit__normal() +{ + float3 normals = normalize( optixTransformNormalFromObjectToWorldSpace( getShadingNormal() ) ) * 0.5f + 0.5f; + setPayload( normals ); +} + +extern "C" __global__ void __closesthit__red() +{ + closesthit_impl( make_float3( 1.f, 0.f, 0.f ) ); +} + +extern "C" __global__ void __closesthit__green() +{ + closesthit_impl( make_float3( 0.f, 1.f, 0.f ) ); +} + +extern "C" __global__ void __closesthit__blue() +{ + closesthit_impl( make_float3( 0.f, 0.f, 1.f ) ); +} diff --git a/examples/cuda/dynamic_materials.h b/examples/cuda/dynamic_materials.h new file mode 100644 index 0000000..8fc62ae --- /dev/null +++ b/examples/cuda/dynamic_materials.h @@ -0,0 +1,51 @@ +// +// Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// * Neither the name of NVIDIA CORPORATION nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// + +struct Params +{ + OptixTraversableHandle trav_handle; + uchar4* image; + unsigned int image_width; + unsigned int image_height; + float radius; + float3 cam_eye; + float3 camera_u, camera_v, camera_w; +}; + + +struct MissData +{ + float3 color; +}; + + +struct HitGroupData +{ + float3 color; + unsigned int geometryIndex; +}; diff --git a/examples/cuda/triangle.h b/examples/cuda/triangle.h index 2ba482d..1d1a0ce 100644 --- a/examples/cuda/triangle.h +++ b/examples/cuda/triangle.h @@ -28,7 +28,6 @@ #pragma once - struct Params { uchar4* image; diff --git a/examples/dynamic_geometry.py b/examples/dynamic_geometry.py new file mode 100644 index 0000000..0507183 --- /dev/null +++ b/examples/dynamic_geometry.py @@ -0,0 +1,478 @@ +import os, sys, enum, logging, collections + +import cupy as cp +import numpy as np +import optix as ox + +import glfw, imgui + +from optix.sutil.gui import init_ui, display_stats +from optix.sutil.gl_display import GLDisplay +from optix.sutil.trackball import Trackball, TrackballViewMode +from optix.sutil.cuda_output_buffer import CudaOutputBuffer, CudaOutputBufferType, BufferImageFormat + +script_dir = os.path.dirname(os.path.abspath(__file__)) + +logging.basicConfig(stream=sys.stdout, level=logging.DEBUG) +log = logging.getLogger() + +DEBUG=False + +#------------------------------------------------------------------------------ +# Local types +#------------------------------------------------------------------------------ + +class Params: + _params = collections.OrderedDict([ + ('frame_buffer', 'u8'), + ('width', 'u4'), + ('height', 'u4'), + ('eye', '3f4'), + ('u', '3f4'), + ('v', '3f4'), + ('w', '3f4'), + ('trav_handle', 'u8'), + ('subframe_index', 'i4'), + ]) + + def __init__(self): + self.handle = ox.LaunchParamsRecord(names=tuple(self._params.keys()), + formats=tuple(self._params.values())) + + def __getattribute__(self, name): + if name in Params._params.keys(): + return self.__dict__['handle'][name] + else: + return super().__getattribute__(name) + + def __setattr__(self, name, value): + if name in Params._params.keys(): + self.handle[name] = value + elif name in {'handle'}: + super().__setattr__(name, value) + else: + raise AttributeError(name) + + def __str__(self): + return '\n'.join(f'{k}: {self.handle[k]}' for k in self._params) + + +class DynamicGeometryState: + __slots__ = ['params', 'time', 'ctx', 'module', 'pipeline', 'pipeline_opts', + 'raygen_grp', 'miss_grp', 'hit_grp', 'sbt', + 'generate_vertices_kernel', 'd_temp_vertices', 'last_exploding_sphere_rebuild_time', + 'gas_build_input', 'static_gas', 'deforming_gas', 'exploding_gas', + 'ias_build_input', 'ias', + 'trackball', 'camera_changed', 'mouse_button', 'resize_dirty', 'minimized'] + + def __init__(self): + for slot in self.__slots__: + setattr(self, slot, None) + self.params = Params() + + self.trackball = Trackball() + self.camera_changed = True + self.mouse_button = -1 + self.resize_dirty = False + self.minimized = False + + @property + def camera(self): + return self.trackball.camera + + @property + def launch_dimensions(self): + return (int(self.params.width), int(self.params.height)) + + +class AnimationMode(enum.Enum): + NONE = 0 + DEFORM = 1 + EXPLODE = 2 + + +#------------------------------------------------------------------------------ +# Scene data +#------------------------------------------------------------------------------ +g_tessellation_resolution = 128 +g_exploding_gas_rebuild_frequency = 10.0 + +g_diffuse_colors = np.asarray([ + [0.7, 0.7, 0.7], + [0.8, 0.8, 0.8], + [0.9, 0.9, 0.9], + [1.0, 1.0, 1.0], +], dtype=np.float32) + +INST_COUNT = g_diffuse_colors.shape[0] + +g_instances = np.asarray([ + [1, 0, 0, -4.5, + 0, 1, 0, 0, + 0, 0, 1, 0], + [1, 0, 0, -1.5, + 0, 1, 0, 0, + 0, 0, 1, 0], + [1, 0, 0, 1.5, + 0, 1, 0, 0, + 0, 0, 1, 0], + [1, 0, 0, 4.5, + 0, 1, 0, 0, + 0, 0, 1, 0], +], dtype=np.float32).reshape(INST_COUNT, 3, 4) + + +#------------------------------------------------------------------------------ +# GLFW callbacks +#------------------------------------------------------------------------------ +def mouse_button_callback(window, button, action, mods): + state = glfw.get_window_user_pointer(window) + (x, y) = glfw.get_cursor_pos(window) + if action is glfw.PRESS: + state.mouse_button = button + state.trackball.start_tracking(x, y) + else: + state.mouse_button = -1 + +def cursor_position_callback(window, x, y): + state = glfw.get_window_user_pointer(window) + if state.mouse_button is glfw.MOUSE_BUTTON_LEFT: + state.trackball.view_mode = TrackballViewMode.LookAtFixed + state.trackball.update_tracking(x, y, state.params.width, state.params.height) + state.camera_changed = True + elif state.mouse_button is glfw.MOUSE_BUTTON_RIGHT: + state.trackball.view_mode = TrackballViewMode.EyeFixed + state.trackball.update_tracking(x, y, state.params.width, state.params.height) + state.camera_changed = True + +def window_size_callback(window, res_x, res_y): + state = glfw.get_window_user_pointer(window) + if state.minimized: + return + + res_x = max(res_x, 1) + res_y = max(res_y, 1) + + state.params.width = res_x + state.params.height = res_y + state.camera_changed = True + state.resize_dirty = True + +def window_iconify_callback(window, iconified): + state = glfw.get_window_user_pointer(window) + state.minimized = (iconified > 0) + +def key_callback(window, key, scancode, action, mods): + if action is glfw.PRESS: + if key in {glfw.KEY_Q, glfw.KEY_ESCAPE}: + glfw.set_window_should_close(window, True) + +def scroll_callback(window, xscroll, yscroll): + state = glfw.get_window_user_pointer(window) + if state.trackball.wheel_event(yscroll): + state.camera_changed = True + +#------------------------------------------------------------------------------ +# Helper functions +#------------------------------------------------------------------------------ +def init_launch_params(state): + state.params.frame_buffer = 0 + state.params.subframe_index = 0 + +def handle_camera_update(state): + if not state.camera_changed: + return + state.camera_changed = False + + camera = state.camera + params = state.params + + camera.aspect_ratio = params.width / float(params.height) + params.eye = camera.eye + + u,v,w = camera.uvw_frame() + params.u = u + params.v = v + params.w = w + +def handle_resize(output_buffer, state): + if not state.resize_dirty: + return + state.resize_dirty = False + + output_buffer.resize(state.params.width, state.params.height) + +def update_state(output_buffer, state): + handle_camera_update(state) + handle_resize(output_buffer, state) + +def launch_subframe(output_buffer, state): + state.params.frame_buffer = output_buffer.map() + + state.pipeline.launch(state.sbt, dimensions=state.launch_dimensions, + params=state.params.handle, stream=output_buffer.stream) + + output_buffer.unmap() + +def display_subframe(output_buffer, gl_display, window): + (framebuf_res_x, framebuf_res_y) = glfw.get_framebuffer_size(window) + gl_display.display( output_buffer.width, output_buffer.height, + framebuf_res_x, framebuf_res_y, + output_buffer.get_pbo() ) + +def init_camera_state(state): + camera = state.camera + camera.eye = (0, 1, -20) + camera.look_at = (0, 0, 0) + camera.up = (0, 1, 0) + camera.fov_y = 35 + camera_changed = True + + trackball = state.trackball + trackball.move_speed = 10.0 + trackball.set_reference_frame([1,0,0], [0,0,1], [0,1,0]) + trackball.reinitialize_orientation_from_camera() + +def create_context(state): + logger = ox.Logger(log) + ctx = ox.DeviceContext(validation_mode=False, log_callback_function=logger, log_callback_level=4) + ctx.cache_enabled = False + state.ctx = ctx + +def generate_animated_vertices(out_vertices, animation_mode, time, width, height): + threads_per_block = 128 + num_blocks = (width*height + threads_per_block - 1) // threads_per_block + + args = (out_vertices, np.int32(animation_mode.value), np.float32(time), np.int32(width), np.int32(height)) + + state.generate_vertices_kernel(grid=(num_blocks,1,1), block=(threads_per_block,1,1), args=args) + + +def launch_generate_animated_vertices(state, animation_mode): + generate_animated_vertices(state.d_temp_vertices, animation_mode, state.time, g_tessellation_resolution, g_tessellation_resolution) + +def update_mesh_accel(state): + # first sphere is static + + # second sphere moves by updating its transform matrix + transform = state.ias_build_input.get_transform_view(1) + transform[1,-1] = np.sin(4*state.time) + + # third sphere deforms + launch_generate_animated_vertices(state, AnimationMode.DEFORM) + state.deforming_gas.update(state.gas_build_input) + + # fourth sphere explodes + launch_generate_animated_vertices(state, AnimationMode.EXPLODE) + + # we occasionally rebuild the exploding sphere to maintain AS quality + if state.time - state.last_exploding_sphere_rebuild_time > 1 / g_exploding_gas_rebuild_frequency: + state.last_exploding_sphere_rebuild_time = state.time + state.exploding_gas = ox.AccelerationStructure(state.ctx, state.gas_build_input, + compact=True, allow_update=True, random_vertex_access=True) + state.ias_build_input.instances[3].update_traversable(state.exploding_gas) + state.ias_build_input.update_instance(3) + else: + state.exploding_gas.update(state.gas_build_input) + + state.ias.update(state.ias_build_input) + +def build_vertex_generation_kernel(state): + cuda_source = os.path.join(script_dir, 'cuda', 'dynamic_geometry_vertex_generation.cu') + example_include_path = os.path.dirname(cuda_source) + + build_flags = ox.module.get_default_nvrtc_compile_flags() + (f'-I{example_include_path}',) + + with open(cuda_source, 'r') as f: + code = f.read() + + state.generate_vertices_kernel = cp.RawKernel(code=code, backend='nvrtc', + options=build_flags, name='generate_vertices') + +def build_mesh_accel(state): + # Allocate temporary space for vertex generation. + # The same memory space is reused for generating the deformed and exploding vertices before updates. + num_vertices = g_tessellation_resolution * g_tessellation_resolution * 6 + state.d_temp_vertices = cp.empty(shape=(num_vertices,3), dtype=np.float32) + + # Build static triangulated sphere. + build_vertex_generation_kernel(state) + launch_generate_animated_vertices(state, AnimationMode.NONE) + + #V = cp.asnumpy(state.d_temp_vertices) + #import trimesh + #trimesh.Trimesh(vertices=V, faces=np.arange(V.shape[0]).reshape(-1,3)).show() + + # Build an AS over the triangles. + # We use un-indexed triangles so we can explode the sphere per triangle. + state.gas_build_input = ox.BuildInputTriangleArray(state.d_temp_vertices, flags=[ox.GeometryFlags.NONE]) + state.static_gas = ox.AccelerationStructure(state.ctx, state.gas_build_input, + compact=True, allow_update=False, random_vertex_access=True) + + state.deforming_gas = ox.AccelerationStructure(state.ctx, state.gas_build_input, + compact=True, allow_update=True, random_vertex_access=True) + + state.exploding_gas = ox.AccelerationStructure(state.ctx, state.gas_build_input, + compact=True, allow_update=True, random_vertex_access=True) + + traversables = [state.static_gas, state.static_gas, + state.deforming_gas, state.exploding_gas] + instances = [] + for i in range(INST_COUNT): + instance = ox.Instance(traversable=traversables[i], instance_id=0, flags=ox.InstanceFlags.NONE, + sbt_offset=i, transform=g_instances[i]) + instances.append(instance) + + state.ias_build_input = ox.BuildInputInstanceArray(instances) + state.ias = ox.AccelerationStructure(context=state.ctx, + build_inputs=state.ias_build_input, compact=True, allow_update=True) + state.params.trav_handle = state.ias.handle + + +def create_module(state): + if DEBUG: + exception_flags=ox.ExceptionFlags.DEBUG | ox.ExceptionFlags.TRACE_DEPTH | ox.ExceptionFlags.STACK_OVERFLOW, + else: + exception_flags=ox.ExceptionFlags.NONE + + pipeline_opts = ox.PipelineCompileOptions( + uses_motion_blur=False, + uses_primitive_type_flags = ox.PrimitiveTypeFlags.TRIANGLE, + traversable_graph_flags=ox.TraversableGraphFlags.ALLOW_SINGLE_LEVEL_INSTANCING, + exception_flags=exception_flags, + num_payload_values=3, + num_attribute_values=2, + pipeline_launch_params_variable_name="params") + + compile_opts = ox.ModuleCompileOptions( + max_register_count=ox.ModuleCompileOptions.DEFAULT_MAX_REGISTER_COUNT, + opt_level=ox.CompileOptimizationLevel.DEFAULT, + debug_level=ox.CompileDebugLevel.LINEINFO) + + cuda_source = os.path.join(script_dir, 'cuda', 'dynamic_geometry.cu') + state.module = ox.Module(state.ctx, cuda_source, compile_opts, pipeline_opts) + state.pipeline_opts = pipeline_opts + +def create_program_groups(state): + ctx, module = state.ctx, state.module + + state.raygen_grp = ox.ProgramGroup.create_raygen(ctx, module, "__raygen__rg") + state.miss_grp = ox.ProgramGroup.create_miss(ctx, module, "__miss__ms") + state.hit_grp = ox.ProgramGroup.create_hitgroup(ctx, module, entry_function_CH="__closesthit__ch") + +def create_pipeline(state): + program_grps = [state.raygen_grp, state.miss_grp, state.hit_grp] + + link_opts = ox.PipelineLinkOptions(max_trace_depth=1, + debug_level=ox.CompileDebugLevel.LINEINFO) + + pipeline = ox.Pipeline(state.ctx, + compile_options=state.pipeline_opts, + link_options=link_opts, + program_groups=program_grps, + max_traversable_graph_depth=2) + + pipeline.compute_stack_sizes(1, # max_trace_depth + 0, # max_cc_depth + 0) # max_dc_depth + + state.pipeline = pipeline + +def create_sbt(state): + raygen_grp, miss_grp, hit_grp = state.raygen_grp, state.miss_grp, state.hit_grp + + raygen_sbt = ox.SbtRecord(raygen_grp) + + miss_sbt = ox.SbtRecord(miss_grp, names=('bg_color',), formats=('4f4',)) + miss_sbt['bg_color'] = [0.0, 0.0, 0.0, 0.0] + + hit_groups = [hit_grp]*INST_COUNT + hit_sbts = ox.SbtRecord(hit_groups, names=('color',), formats=('3f4',)) + for i in range(INST_COUNT): + hit_sbts['color'][i] = g_diffuse_colors[i] + + state.sbt = ox.ShaderBindingTable(raygen_record=raygen_sbt, miss_records=miss_sbt, + hitgroup_records=hit_sbts) + +#------------------------------------------------------------------------------ +# Main +#------------------------------------------------------------------------------ +if __name__ == '__main__': + state = DynamicGeometryState() + state.params.width = 1024 + state.params.height = 768 + state.time = 0.0 + + num_frames = 16 + animation_time = 1.0 + + buffer_format = BufferImageFormat.UCHAR4 + output_buffer_type = CudaOutputBufferType.CUDA_DEVICE + + init_camera_state(state) + create_context(state) + create_module(state) + create_program_groups(state) + create_pipeline(state) + create_sbt(state) + init_launch_params(state) + build_mesh_accel(state) + + window, impl = init_ui("optixDynamicGeometry", state.params.width, state.params.height) + glfw.set_mouse_button_callback(window, mouse_button_callback) + glfw.set_cursor_pos_callback(window, cursor_position_callback) + glfw.set_window_size_callback(window, window_size_callback) + glfw.set_window_iconify_callback(window, window_iconify_callback) + glfw.set_key_callback(window, key_callback) + glfw.set_scroll_callback(window, scroll_callback) + glfw.set_window_user_pointer(window, state) + + output_buffer = CudaOutputBuffer(output_buffer_type, buffer_format, + state.params.width, state.params.height) + + gl_display = GLDisplay(buffer_format) + + state_update_time = 0.0 + render_time = 0.0 + display_time = 0.0 + + tstart = glfw.get_time() + + state.last_exploding_sphere_rebuild_time = 0.0 + + while not glfw.window_should_close(window): + t0 = glfw.get_time() + glfw.poll_events() + + state.time = glfw.get_time() - tstart + + update_mesh_accel(state) + + update_state(output_buffer, state) + + t1 = glfw.get_time() + state_update_time += t1 - t0 + t0 = t1 + + launch_subframe(output_buffer, state) + t1 = glfw.get_time() + render_time += t1 - t0 + t0 = t1 + + display_subframe(output_buffer, gl_display, window) + display_time += t1 - t0 + + if display_stats(state_update_time, render_time, display_time): + state_update_time = 0.0 + render_time = 0.0 + display_time = 0.0 + + imgui.render() + impl.render(imgui.get_draw_data()) + + glfw.swap_buffers(window) + + state.params.subframe_index = state.params.subframe_index.item() + 1 + + impl.shutdown() + glfw.terminate() diff --git a/examples/dynamic_materials.py b/examples/dynamic_materials.py new file mode 100644 index 0000000..cf45988 --- /dev/null +++ b/examples/dynamic_materials.py @@ -0,0 +1,399 @@ +import os, sys, logging, collections + +import cupy as cp +import numpy as np +import optix as ox + +import glfw, imgui + +from optix.sutil.gui import init_ui, display_text +from optix.sutil.camera import Camera +from optix.sutil.gl_display import GLDisplay +from optix.sutil.cuda_output_buffer import CudaOutputBuffer, CudaOutputBufferType, BufferImageFormat + +script_dir = os.path.dirname(os.path.abspath(__file__)) + +logging.basicConfig(stream=sys.stdout, level=logging.DEBUG) +log = logging.getLogger() + +DEBUG=False + + +class Params: + _params = collections.OrderedDict([ + ('trav_handle', 'u8'), + ('image', 'u8'), + ('image_width', 'u4'), + ('image_height', 'u4'), + ('radius', 'f4'), + ('cam_eye', '3f4'), + ('camera_u', '3f4'), + ('camera_v', '3f4'), + ('camera_w', '3f4'), + ]) + + def __init__(self): + self.handle = ox.LaunchParamsRecord(names=tuple(self._params.keys()), + formats=tuple(self._params.values())) + + def __getattribute__(self, name): + if name in Params._params.keys(): + return self.__dict__['handle'][name] + else: + return super().__getattribute__(name) + + def __setattr__(self, name, value): + if name in Params._params.keys(): + self.handle[name] = value + elif name in {'handle'}: + super().__setattr__(name, value) + else: + raise AttributeError(name) + + def __str__(self): + return '\n'.join(f'{k}: {self.handle[k]}' for k in self._params) + + +class MaterialIndex: + def __init__(self, max_index): + self._index = 0 + self._max_index = max_index + + def _get_index(self): + return self._index + def _set_index(self, value): + assert value >= 0, value + self._index = int(value % self._max_index) + index = property(_get_index, _set_index) + + def nextval(self): + self.index = self.index + 1 + return self.index + + +class SampleState: + __slots__ = ['params', 'ctx', 'gas', 'ias', 'module', + 'raygen_grp', 'miss_grp', 'hit_grps', + 'raygen_sbt', 'miss_sbt', 'hit_sbts', + 'sbt', 'pipeline', 'pipeline_opts', + 'material_index_0', 'material_index_1', 'material_index_2', + 'has_data_changed', 'has_offset_changed', 'has_sbt_changed'] + + def __init__(self, width, height): + for slot in self.__slots__: + setattr(self, slot, None) + + self.params = Params() + self.params.image_width = width + self.params.image_height = height + + self.material_index_0 = MaterialIndex(3) + self.material_index_1 = MaterialIndex(2) + self.material_index_2 = MaterialIndex(3) + self.has_data_changed = False + self.has_offset_changed = False + self.has_sbt_changed = False + + @property + def launch_dimensions(self): + return (int(self.params.image_width), int(self.params.image_height)) + + +def key_callback(window, key, scancode, action, mods): + state = glfw.get_window_user_pointer(window) + if action == glfw.PRESS: + if key in {glfw.KEY_Q, glfw.KEY_ESCAPE}: + glfw.set_window_should_close(window, True) + elif key == glfw.KEY_LEFT: + state.has_data_changed = True + elif key == glfw.KEY_RIGHT: + state.has_sbt_changed = True + elif key == glfw.KEY_UP: + state.has_offset_changed = True + + +# Transforms for instances - one on the left (sphere 0), one in the center and one on the right (sphere 2). +transforms = np.asarray([ + [1, 0, 0, -6, + 0, 1, 0, 0, + 0, 0, 1, -10], + [1, 0, 0, 0, + 0, 1, 0, 0, + 0, 0, 1, -10], + [1, 0, 0, 6, + 0, 1, 0, 0, + 0, 0, 1, -10], +], dtype=np.float32).reshape(3,3,4) + +# Offsets into SBT for each instance. Hence this needs to be in sync with transforms! +# The middle sphere has two SBT records, the two other instances have one each. +sbt_offsets = np.asarray([0, 1, 3], dtype=np.uint32) + +g_colors = np.asarray([[1, 0, 0], + [0, 1, 0], + [0, 0, 1]], dtype=np.float32) + +##------------------------------------------------------------------------------ +## +## Helper Functions +## +##------------------------------------------------------------------------------ + +def init_camera(state): + camera = Camera() + camera.eye = (0, 0, 3) + camera.look_at = (0, 0, 0) + camera.up = (0, 1, 0) + camera.fov_y = 60 + camera.aspect_ratio = state.params.image_width / state.params.image_height + + u,v,w = camera.uvw_frame() + state.params.camera_u = u + state.params.camera_v = v + state.params.camera_w = w + state.params.cam_eye = camera.eye + +def create_context(state): + logger = ox.Logger(log) + ctx = ox.DeviceContext(validation_mode=True, log_callback_function=logger, log_callback_level=4) + ctx.cache_enabled = False + state.ctx = ctx + +def build_gas(state): + aabb = cp.asarray([[-1.5, -1.5, -1.5, 1.5, 1.5, 1.5]], dtype=np.float32) + build_input = ox.BuildInputCustomPrimitiveArray([aabb], num_sbt_records=1, flags=[ox.GeometryFlags.NONE]) + state.gas = ox.AccelerationStructure(state.ctx, [build_input], compact=True) + state.params.radius = 1.5 + +def build_ias(state): + instances = [] + for i in range(transforms.shape[0]): + instance = ox.Instance(traversable=state.gas, instance_id=0, + sbt_offset=sbt_offsets[i], transform=transforms[i]) + instances.append(instance) + + build_input = ox.BuildInputInstanceArray(instances) + state.ias = ox.AccelerationStructure(context=state.ctx, build_inputs=build_input) + state.params.trav_handle = state.ias.handle + +def create_module(state): + if DEBUG: + exception_flags=ox.ExceptionFlags.DEBUG | ox.ExceptionFlags.TRACE_DEPTH | ox.ExceptionFlags.STACK_OVERFLOW + else: + exception_flags=ox.ExceptionFlags.NONE + + pipeline_opts = ox.PipelineCompileOptions( + uses_motion_blur=False, + traversable_graph_flags=ox.TraversableGraphFlags.ALLOW_SINGLE_LEVEL_INSTANCING, + uses_primitive_type_flags=ox.PrimitiveTypeFlags.CUSTOM, + num_payload_values=3, + num_attribute_values=3, + exception_flags=exception_flags, + pipeline_launch_params_variable_name="params") + + compile_opts = ox.ModuleCompileOptions( + max_register_count=ox.ModuleCompileOptions.DEFAULT_MAX_REGISTER_COUNT, + opt_level=ox.CompileOptimizationLevel.DEFAULT, + debug_level=ox.CompileDebugLevel.LINEINFO) + + source = os.path.join(script_dir, 'cuda', 'dynamic_materials.cu') + state.module = ox.Module(state.ctx, source, compile_opts, pipeline_opts) + state.pipeline_opts = pipeline_opts + +def create_program_groups(state): + ctx, module = state.ctx, state.module + + state.raygen_grp = ox.ProgramGroup.create_raygen(ctx, module, "__raygen__rg") + state.miss_grp = ox.ProgramGroup.create_miss(ctx, module, "__miss__ms") + + + # The left sphere has a single CH program + # The middle sphere toggles between two CH programs + # The right sphere uses the g_material_index_2.index'th of these CH programs + ch_names = ('__closesthit__ch' , + '__closesthit__ch', '__closesthit__normal', + '__closesthit__blue', '__closesthit__green', '__closesthit__red') + + hit_grps = [] + for ch_name in ch_names: + hit_grp = ox.ProgramGroup.create_hitgroup(ctx, module, + entry_function_CH=ch_name, + entry_function_IS='__intersection__is') + hit_grps.append(hit_grp) + + state.hit_grps = hit_grps + +def create_pipeline(state): + program_grps = [state.raygen_grp, state.miss_grp] + state.hit_grps + + link_opts = ox.PipelineLinkOptions(max_trace_depth=1, + debug_level=ox.CompileDebugLevel.FULL) + + pipeline = ox.Pipeline(state.ctx, + compile_options=state.pipeline_opts, + link_options=link_opts, + program_groups=program_grps, + max_traversable_graph_depth=2) + + pipeline.compute_stack_sizes(1, # max_trace_depth + 0, # max_cc_depth + 0) # max_dc_depth + + state.pipeline = pipeline + +def create_sbt(state): + raygen_grp, miss_grp, hit_grps = state.raygen_grp, state.miss_grp, state.hit_grps + + raygen_sbt = ox.SbtRecord(raygen_grp) + + miss_sbt = ox.SbtRecord(miss_grp, names=('color',), formats=('3f4',)) + miss_sbt['color'] = [0.3, 0.1, 0.2] + + hit_groups = [hit_grps[0], hit_grps[1], hit_grps[2], hit_grps[state.material_index_2.index + 3]] + hit_sbts = ox.SbtRecord(hit_groups, names=('color', 'idx'), formats=('3f4', 'u4')) + + # The left sphere cycles through three colors by updating the data field of the SBT record. + hit_sbts['color'][0] = g_colors[0] + hit_sbts['idx'][0] = np.uint32(0) + + # The middle sphere toggles between two SBT records by adjusting the SBT + # offset field of the sphere instance. The IAS needs to be rebuilt for the + # update to take effect. + hit_sbts['color'][1] = g_colors[1] + hit_sbts['idx'][1] = np.uint32(1) + + hit_sbts['color'][2] = g_colors[1] + hit_sbts['idx'][2] = np.uint32(1) + + # The right sphere cycles through colors by modifying the SBT. On update, a + # different prebuilt CH program is packed into the corresponding SBT + # record. + hit_sbts['color'][3] = [0,0,0] + hit_sbts['idx'][3] = np.uint32(2) + + state.raygen_sbt = raygen_sbt + state.miss_sbt = miss_sbt + state.hit_sbts = hit_sbts + + state.sbt = ox.ShaderBindingTable(raygen_record=raygen_sbt, miss_records=miss_sbt, + hitgroup_records=hit_sbts) + + +def update_state(output_buffer, state): + # Change the material properties using one of three different approaches. + if state.has_data_changed: + update_hit_group_data(state) + if state.has_offset_changed: + update_instance_offset(state) + if state.has_sbt_changed: + update_sbt_header(state) + +def update_hit_group_data(state): + # Method 1: + # Change the material parameters for the left sphere by directly modifying + # the HitGroupData for the first SBT record. + + # Cycle through three base colors. + material_index = state.material_index_0.nextval() + + # Update the data field of the SBT record for the left sphere with the new base color. + state.hit_sbts['color'][0] = g_colors[material_index] + state.sbt = ox.ShaderBindingTable(raygen_record=state.raygen_sbt, miss_records=state.miss_sbt, + hitgroup_records=state.hit_sbts) + + state.has_data_changed = False + +def update_instance_offset(state): + # Method 2: + # Update the SBT offset of the middle sphere. The offset is used to select + # an SBT record during traversal, which dertermines the CH & AH programs + # that will be invoked for shading. + + material_index = state.material_index_1.nextval() + sbt_offsets[1] = 1 + material_index + + # It's necessary to rebuild the IAS for the updated offset to take effect. + build_ias(state) + + state.has_offset_changed = False + +def update_sbt_header(state): + # Method 3: + # Select a new material by re-packing the SBT header for the right sphere + # with a different CH program. + + # The right sphere will use the next compiled program group. + material_index = state.material_index_2.nextval() + + state.hit_sbts.update_program_group(3, state.hit_grps[3 + material_index]) + + state.sbt = ox.ShaderBindingTable(raygen_record=state.raygen_sbt, miss_records=state.miss_sbt, + hitgroup_records=state.hit_sbts) + + state.has_sbt_changed = False + +def launch(state, output_buffer): + state.params.image = output_buffer.map() + + state.pipeline.launch(state.sbt, dimensions=state.launch_dimensions, + params=state.params.handle, stream=output_buffer.stream) + + output_buffer.unmap() + +def display(output_buffer, gl_display, window): + (framebuf_res_x, framebuf_res_y) = glfw.get_framebuffer_size(window) + gl_display.display( output_buffer.width, output_buffer.height, + framebuf_res_x, framebuf_res_y, + output_buffer.get_pbo() ) + + +def display_usage(): + usage = """Use the arrow keys to modify the materials + [LEFT] left sphere + [UP] middle sphere + [RIGHT] right sphere""" + + imgui.new_frame() + display_text(usage, 20.0, 20.0) + imgui.end_frame() + +if __name__ == '__main__': + state = SampleState(1024, 768) + + buffer_format = BufferImageFormat.UCHAR4 + output_buffer_type = CudaOutputBufferType.CUDA_DEVICE + + init_camera(state) + create_context(state) + build_gas(state) + build_ias(state) + create_module(state) + create_program_groups(state) + create_pipeline(state) + create_sbt(state) + + window, impl = init_ui("optixDynamicMaterials", state.params.image_width, state.params.image_height) + + glfw.set_key_callback(window, key_callback) + glfw.set_window_user_pointer(window, state) + + output_buffer = CudaOutputBuffer(output_buffer_type, buffer_format, + state.params.image_width, state.params.image_height) + + gl_display = GLDisplay(buffer_format) + + while not glfw.window_should_close(window): + glfw.poll_events() + + update_state(output_buffer, state) + launch(state, output_buffer) + display(output_buffer, gl_display, window) + display_usage() + + imgui.render() + impl.render(imgui.get_draw_data()) + glfw.swap_buffers(window) + + impl.shutdown() + glfw.terminate() + diff --git a/examples/hello.py b/examples/hello.py index 1973b0e..0370d2b 100644 --- a/examples/hello.py +++ b/examples/hello.py @@ -1,15 +1,20 @@ +import os, sys, logging + import optix as ox import cupy as cp import numpy as np + from PIL import Image, ImageOps -import logging -import sys + +script_dir = os.path.dirname(os.path.abspath(__file__)) + logging.basicConfig(stream=sys.stdout, level=logging.DEBUG) log = logging.getLogger() def create_module(ctx, pipeline_opts): compile_opts = ox.ModuleCompileOptions(debug_level=ox.CompileDebugLevel.LINEINFO) - module = ox.Module(ctx, 'cuda/hello.cu', compile_opts, pipeline_opts) + source = os.path.join(script_dir, 'cuda', 'hello.cu') + module = ox.Module(ctx, source, compile_opts, pipeline_opts) return module diff --git a/examples/spheres.py b/examples/spheres.py index 0adf57a..20acf42 100644 --- a/examples/spheres.py +++ b/examples/spheres.py @@ -1,14 +1,17 @@ -import optix as ox +import os, sys, logging + import cupy as cp import numpy as np +import optix as ox + from PIL import Image, ImageOps -import logging -import sys + +script_dir = os.path.dirname(os.path.abspath(__file__)) + logging.basicConfig(stream=sys.stdout, level=logging.DEBUG) log = logging.getLogger() img_size = (1024, 768) - def compute_spheres_bbox(centers, radii): out = cp.empty((centers.shape[0], 6), dtype='f4') out[:, :3] = centers - radii.reshape(-1, 1) @@ -24,7 +27,8 @@ def create_acceleration_structure(ctx, bboxes): def create_module(ctx, pipeline_opts): compile_opts = ox.ModuleCompileOptions(debug_level=ox.CompileDebugLevel.LINEINFO) - module = ox.Module(ctx, 'cuda/spheres.cu', compile_opts, pipeline_opts) + source = os.path.join(script_dir, 'cuda', 'spheres.cu') + module = ox.Module(ctx, source, compile_opts, pipeline_opts) return module @@ -43,7 +47,7 @@ def create_pipeline(ctx, program_grps, pipeline_options): pipeline = ox.Pipeline(ctx, compile_options=pipeline_options, link_options=link_opts, program_groups=program_grps) pipeline.compute_stack_sizes(1, # max_trace_depth 0, # max_cc_depth - 1) # max_dc_depth + 0) # max_dc_depth return pipeline diff --git a/examples/triangle.py b/examples/triangle.py index e3517d3..d3d9159 100644 --- a/examples/triangle.py +++ b/examples/triangle.py @@ -1,8 +1,13 @@ -import optix as ox +import os + import cupy as cp import numpy as np +import optix as ox + from PIL import Image, ImageOps +script_dir = os.path.dirname(os.path.abspath(__file__)) + img_size = (1024, 768) # use a regular function for logging @@ -19,7 +24,8 @@ def create_acceleration_structure(ctx, vertices): def create_module(ctx, pipeline_opts): compile_opts = ox.ModuleCompileOptions(debug_level=ox.CompileDebugLevel.LINEINFO) - module = ox.Module(ctx, 'cuda/triangle.cu', compile_opts, pipeline_opts) + source = os.path.join(script_dir, 'cuda', 'triangle.cu') + module = ox.Module(ctx, source, compile_opts, pipeline_opts) return module @@ -118,5 +124,3 @@ def launch_pipeline(pipeline : ox.Pipeline, sbt, gas): img = img.reshape(img_size[1], img_size[0], 4) img = ImageOps.flip(Image.fromarray(img, 'RGBA')) img.show() - - diff --git a/optix/build.pxd b/optix/build.pxd index ac24bb3..854612a 100644 --- a/optix/build.pxd +++ b/optix/build.pxd @@ -183,7 +183,6 @@ cdef extern from "optix.h" nogil: unsigned int flags OptixTraversableHandle traversableHandle - OptixResult optixAccelComputeMemoryUsage(OptixDeviceContext context, const OptixAccelBuildOptions * accelOptions, const OptixBuildInput * buildInputs, @@ -282,7 +281,7 @@ cdef class Instance(OptixObject): cdef class BuildInputInstanceArray(BuildInputArray): cdef OptixBuildInputInstanceArray build_input - cdef object instances + cdef public object instances cdef object _d_instances @@ -295,4 +294,4 @@ cdef class AccelerationStructure(OptixContextObject): cdef OptixTraversableHandle _handle cdef void _init_build_inputs(self, build_inputs, vector[OptixBuildInput]& ret) cdef void _init_accel_options(self, size_t num_build_inputs, unsigned int build_flags, OptixBuildOperation operation, vector[OptixAccelBuildOptions]& ret) - cdef void build(self, build_inputs, stream=*) \ No newline at end of file + cdef void build(self, build_inputs, stream=*) diff --git a/optix/build.pyx b/optix/build.pyx index c594ae9..fb42afc 100644 --- a/optix/build.pyx +++ b/optix/build.pyx @@ -250,7 +250,9 @@ cdef class BuildInputCustomPrimitiveArray(BuildInputArray): self.build_input.aabbBuffers = self._d_aabb_buffer_ptrs.const_data() self.build_input.numPrimitives = shape[0] - self.build_input.strideInBytes = self._d_aabb_buffers[0].strides[0] + + # https://github.com/cupy/cupy/issues/5897 + self.build_input.strideInBytes = 6*np.float32().itemsize self._flags.resize(num_sbt_records) if flags is None: @@ -429,18 +431,24 @@ cdef class Instance(OptixObject): self.instance.instanceId = instance_id self.instance.flags = flags.value self.instance.sbtOffset = sbt_offset - visibility_mask = int(visibility_mask) if visibility_mask is not None else (2**(sizeof(unsigned int) * 8) - 1) + + max_visibility_mask_bits = self.traversable.context.num_bits_instances_visibility_mask + visibility_mask = int(visibility_mask) if visibility_mask is not None else (2**max_visibility_mask_bits - 1) if visibility_mask.bit_length() > self.traversable.context.num_bits_instances_visibility_mask: - raise ValueError(f"Too many entries in visibility mask. Got {visibility_mask.bit_length()} but supported are only {self.traversable.context.num_bits_instances_visibility_mask}") + raise ValueError(f"Too many entries in visibility mask. Got {visibility_mask.bit_length()} but supported are only {max_visibility_mask_bits}") self.instance.visibilityMask = visibility_mask + def update_traversable(self, AccelerationStructure traversable): + self.traversable = traversable + self.instance.traversableHandle = self.traversable.handle + def __deepcopy__(self, memodict={}): from copy import deepcopy cls = self.__class__ result = cls.__new__(cls) memodict[id(self)] = result - result._instance = self.instance - result._traversable = deepcopy(self.traversable) + result.instance = self.instance + result.traversable = deepcopy(self.traversable) return result @@ -477,7 +485,15 @@ cdef class BuildInputInstanceArray(BuildInputArray): cdef size_t num_elements(self): return self.build_input.numInstances + + def update_instance(self, index): + src_ptr = &(((self.instances[index])).instance) + dst_ptr = self._d_instances.ptr + index*sizeof(OptixInstance) + cp.cuda.runtime.memcpy(dst_ptr, src_ptr, sizeof(OptixInstance), cp.cuda.runtime.memcpyHostToDevice) + def get_transform_view(self, index): + device_ptr = cp.cuda.MemoryPointer(mem=self._d_instances.mem, offset=index*sizeof(OptixInstance)) + return cp.ndarray(shape=(3,4), dtype=np.float32, memptr=device_ptr) cdef class AccelerationStructure(OptixContextObject): @@ -738,7 +754,7 @@ cdef class AccelerationStructure(OptixContextObject): result._build_flags = self._build_flags result._buffer_sizes = self._buffer_sizes result._instances = deepcopy(self._instances) # copy all instances and their AccelerationStructures first - + buffer_size = round_up(self._buffer_sizes.outputSizeInBytes, 8) + 8 result._gas_buffer = cp.cuda.alloc(buffer_size) cp.cuda.runtime.memcpy(result._gas_buffer.ptr, self._gas_buffer.ptr, buffer_size, cp.cuda.runtime.memcpyDeviceToDevice) diff --git a/optix/module.pxd b/optix/module.pxd index 140c030..45509d2 100644 --- a/optix/module.pxd +++ b/optix/module.pxd @@ -72,4 +72,5 @@ cdef class Module(OptixContextObject): cdef OptixModule module cdef list _compile_flags - #cpdef size_t c_obj(self) \ No newline at end of file + #cpdef size_t c_obj(self) + diff --git a/optix/module.pyx b/optix/module.pyx index 93fe88e..7dd3326 100644 --- a/optix/module.pyx +++ b/optix/module.pyx @@ -63,6 +63,12 @@ cdef class ModuleCompileOptions(OptixObject): cdef tuple _nvrtc_compile_flags_default = ('-use_fast_math', '-lineinfo', '-default-device', '-std=c++11', '-rdc', 'true') +def get_default_nvrtc_compile_flags(std=None, rdc=False): + flags = list(_nvrtc_compile_flags_default[:-3]) + flags.append('-std=c++11' if std is None else f'-std=c++{std}') + if rdc: + flags.extend(['-rdc', 'true']) + return tuple(flags) cdef _is_ptx(src): if not isinstance(src, (bytes, bytearray)): diff --git a/optix/pipeline.pxd b/optix/pipeline.pxd index 0e70650..ceaac1d 100644 --- a/optix/pipeline.pxd +++ b/optix/pipeline.pxd @@ -171,4 +171,4 @@ cdef class Pipeline(OptixContextObject): ProgramGroup program_group_miss_1, object program_groups_closesthit_1, ProgramGroup program_group_miss_2, - object program_groups_closesthit_2) \ No newline at end of file + object program_groups_closesthit_2) diff --git a/optix/pipeline.pyx b/optix/pipeline.pyx index d3d41ae..b3a3142 100644 --- a/optix/pipeline.pyx +++ b/optix/pipeline.pyx @@ -76,7 +76,7 @@ cdef class PipelineCompileOptions(OptixObject): self.compile_options.numAttributeValues = num_attribute_values self.compile_options.exceptionFlags = exception_flags.value self.pipeline_launch_params_variable_name = pipeline_launch_params_variable_name - self.compile_options.usesPrimitiveTypeFlags = uses_primitive_type_flags.value + self.compile_options.usesPrimitiveTypeFlags = (uses_primitive_type_flags.value) @property def uses_motion_blur(self): @@ -420,4 +420,4 @@ cdef class Pipeline(OptixContextObject): cdef const OptixShaderBindingTable* c_sbt = &sbt.sbt with nogil: - optix_check_return(optixLaunch(self.pipeline, c_stream, d_params_ptr, c_itemsize, c_sbt, c_dims[0], c_dims[1], c_dims[2])) \ No newline at end of file + optix_check_return(optixLaunch(self.pipeline, c_stream, d_params_ptr, c_itemsize, c_sbt, c_dims[0], c_dims[1], c_dims[2])) diff --git a/optix/program_group.pyx b/optix/program_group.pyx index cfc01a9..2d95c24 100644 --- a/optix/program_group.pyx +++ b/optix/program_group.pyx @@ -178,7 +178,7 @@ cdef class ProgramGroup(OptixContextObject): context: DeviceContext The context to use for this ProgramGroup. module: Module - The module containig the raygen function. + The module containing the raygen function. entry_function_name: str The name of the raygen function in the module. @@ -199,7 +199,7 @@ cdef class ProgramGroup(OptixContextObject): context: DeviceContext The context to use for this ProgramGroup. module: Module - The module containig the miss function. + The module containing the miss function. entry_function_name: str The name of the miss function in the module. @@ -220,7 +220,7 @@ cdef class ProgramGroup(OptixContextObject): context: DeviceContext The context to use for this ProgramGroup. module: Module - The module containig the exception function. + The module containing the exception function. entry_function_name: str The name of the exception function in the module. @@ -361,4 +361,4 @@ cdef class ProgramGroup(OptixContextObject): @property def kind(self): - return self._kind \ No newline at end of file + return self._kind diff --git a/optix/struct.pxd b/optix/struct.pxd index df12b10..e68a783 100644 --- a/optix/struct.pxd +++ b/optix/struct.pxd @@ -18,9 +18,9 @@ cdef class _StructHelper(OptixObject): cdef class SbtRecord(_StructHelper): - cdef ProgramGroup program_group + cdef list program_groups cdef str header_format cdef class LaunchParamsRecord(_StructHelper): - pass \ No newline at end of file + pass diff --git a/optix/struct.pyx b/optix/struct.pyx index 9f35e5f..7f1e53f 100644 --- a/optix/struct.pyx +++ b/optix/struct.pyx @@ -22,7 +22,6 @@ def _aligned_itemsize( formats, alignment ): } ) return round_up( temp_dtype.itemsize, alignment ) - def array_to_device_memory(numpy_array, stream=None): """ Transfer a numpy array to cuda device memory. This does not generate a full cupy.ndarray, but an @@ -262,15 +261,22 @@ cdef class SbtRecord(_StructHelper): All options are the same as in the base class. . The alignment parameter is ignored though and only present for the interface. """ - def __init__(self, ProgramGroup program_group, names=(), formats=(), values=None, size=1, alignment=1): + def __init__(self, program_groups, names=(), formats=(), values=None): + program_groups = list(ensure_iterable(program_groups)) names = ensure_iterable(names) formats = ensure_iterable(formats) + + if not all(isinstance(p, ProgramGroup) for p in program_groups): + raise TypeError("Only program groups") + + cdef unsigned int num_program_groups = len(program_groups) + + self.program_groups = program_groups - self.program_group = program_group header_format = '{}B'.format(OPTIX_SBT_RECORD_HEADER_SIZE) names = ('header',) + names formats = (header_format,) + formats - super().__init__(names, formats, values=values, size=size, alignment=OPTIX_SBT_RECORD_ALIGNMENT) + super().__init__(names, formats, values=values, size=num_program_groups, alignment=OPTIX_SBT_RECORD_ALIGNMENT) @cython.boundscheck(False) @cython.wraparound(False) @@ -280,9 +286,18 @@ cdef class SbtRecord(_StructHelper): cdef size_t size = array.shape[0] cdef unsigned char[:, ::1] buffer = array.view('B').reshape(-1, itemsize) for i in range(size): - optixSbtRecordPackHeader(self.program_group.program_group, (&buffer[i, 0])) + optixSbtRecordPackHeader((self.program_groups[i]).program_group, (&buffer[i, 0])) return array + def update_program_group(self, i, program_group): + if not isinstance(program_group, ProgramGroup): + raise TypeError("Expected a program group as second argument.") + self.program_groups[i] = program_group + + cdef size_t itemsize = self._array.dtype.itemsize + cdef unsigned char[:, ::1] buffer = self._array.view('B').reshape(-1, itemsize) + optixSbtRecordPackHeader((self.program_groups[i]).program_group, (&buffer[i, 0])) + cdef class LaunchParamsRecord(_StructHelper): """ diff --git a/optix/sutil/__init__.py b/optix/sutil/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/optix/sutil/camera.py b/optix/sutil/camera.py new file mode 100644 index 0000000..1697323 --- /dev/null +++ b/optix/sutil/camera.py @@ -0,0 +1,46 @@ +import numpy as np + +from optix.sutil.vecmath import length, normalize, cross +from optix.sutil.properties import get_member, set_float, set_float3 + +class Camera: + """Implements a perspective camera.""" + + __slots__ = ['_eye', '_look_at', '_up', '_fov_y', '_aspect_ratio'] + + def __init__(self, eye=None, look_at=None, up=None, fov_y=None, aspect_ratio=None): + self.eye = eye + self.look_at = look_at + self.up = up + self.fov_y = fov_y + self.aspect_ratio = aspect_ratio + + eye = property(get_member("_eye"), set_float3("_eye", 1.0)) + look_at = property(get_member("_look_at"), set_float3("_look_at", 0.0)) + up = property(get_member("_up"), set_float3("_up", [0.0,1.0,0.0])) + + fov_y = property(get_member("_fov_y"), set_float("_fov_y", 35.0)) + aspect_ratio = property(get_member("_aspect_ratio"), set_float("_aspect_ratio", 1.0)) + + def _get_direction(self): + return normalize(self.look_at - self.eye) + def _set_direction(self, value): + self.look_at = self.eye + length(self.look_at - self.eye)*value; + direction = property(_get_direction, _set_direction) + + def uvw_frame(self): + # do not normalize W -- it implies focal length + W = self.look_at - self.eye + wlen = length(W) + assert wlen > 0, (self.eye, self.look_at) + + U = normalize(cross(W, self.up)) + V = normalize(cross(U, W)) + + vlen = wlen * np.tan(0.5 * np.deg2rad(self.fov_y)) + V *= vlen + + ulen = vlen * self.aspect_ratio + U *= ulen + + return (U,V,W) diff --git a/optix/sutil/cuda_output_buffer.py b/optix/sutil/cuda_output_buffer.py new file mode 100644 index 0000000..fe5e215 --- /dev/null +++ b/optix/sutil/cuda_output_buffer.py @@ -0,0 +1,216 @@ +import enum, re + +import numpy as np +import cupy as cp + +import OpenGL.GL as gl + +from optix.sutil.vecmath import vtype_to_dtype + +class BufferImageFormat(enum.Enum): + UCHAR4=0 + FLOAT3=1 + FLOAT4=2 + + @property + def dtype(self): + if self is BufferImageFormat.UCHAR4: + return vtype_to_dtype('uchar4') + elif self is BufferImageFormat.FLOAT3: + return vtype_to_dtype('float3') + elif self is BufferImageFormat.FLOAT4: + return vtype_to_dtype('float4') + else: + raise NotImplementedError(self) + + @property + def itemsize(self): + return self.dtype.itemsize + +class CudaOutputBufferType(enum.Enum): + CUDA_DEVICE = 0, # not preferred, typically slower than ZERO_COPY + GL_INTEROP = 1, # single device only, preferred for single device + ZERO_COPY = 2, # general case, preferred for multi-gpu if not fully nvlink connected + CUDA_P2P = 3, # fully connected only, preferred for fully nvlink connected + + +class CudaOutputBuffer: + __slots__ = ['_pixel_format', '_buffer_type', '_width', '_height', + '_device', '_device_idx', '_device', '_stream', + '_host_buffer', '_device_buffer', '_pbo'] + + def __init__(self, buffer_type, pixel_format, width, height, device_idx=0): + for attr in self.__slots__: + setattr(self, attr, None) + + self.device_idx = device_idx + self.pixel_format = pixel_format + self.buffer_type = buffer_type + self.resize(width, height) + self.stream = None + + self._reallocate_buffers() + + + def resize(self, width, height): + self.width = width + self.height = height + + def get_host_buffer(self): + if buffer_type is CudaOutputBufferType.CUDA_DEVICE: + self.copy_device_to_host() + return self._host_buffer + else: + msg = f'Buffer type {buffer_type} has not been implemented yet.' + raise NotImplementedError(msg) + + def map(self): + self._make_current() + if (self._host_buffer is None) or (self._device_buffer is None): + self._reallocate_buffers() + return self._device_buffer.data.ptr + + def unmap(self): + self._make_current() + buffer_type = self.buffer_type + if buffer_type is CudaOutputBufferType.CUDA_DEVICE: + self._stream.synchronize() + else: + msg = f'Buffer type {buffer_type} has not been implemented yet.' + raise NotImplementedError(msg) + + def get_pbo(self): + buffer_type = self.buffer_type + + self._make_current() + + if self._pbo is None: + self._pbo = gl.glGenBuffers(1) + + if buffer_type is CudaOutputBufferType.CUDA_DEVICE: + self.copy_device_to_host() + self.copy_host_to_pbo() + else: + msg = f'Buffer type {buffer_type} has not been implemented yet.' + raise NotImplementedError(msg) + + return self._pbo + + def delete_pbo(self): + if self._pbo is None: + return + gl.glBindBuffer(gl.GL_ARRAY_BUFFER, 0) + gl.glDeleteBuffers(1, self._pbo) + self._pbo = None + + def copy_device_to_host(self): + cp.cuda.runtime.memcpy(self._host_buffer.__array_interface__['data'][0], + self._device_buffer.data.ptr, self._host_buffer.nbytes, cp.cuda.runtime.memcpyDeviceToHost) + + def copy_host_to_pbo(self): + gl.glBindBuffer(gl.GL_ARRAY_BUFFER, self._pbo) + gl.glBufferData(gl.GL_ARRAY_BUFFER, self._host_buffer, gl.GL_STREAM_DRAW) + gl.glBindBuffer(gl.GL_ARRAY_BUFFER, 0) + + def _make_current(self): + self._device.use() + + def _reallocate_buffers(self): + buffer_type = self.buffer_type + + dtype = self.pixel_format + shape = (self.height, self.width) + + if buffer_type is CudaOutputBufferType.CUDA_DEVICE: + self._host_buffer = np.empty(shape=shape, dtype=dtype) + self._device_buffer = cp.empty(shape=shape, dtype=dtype) + if self._pbo is not None: + gl.glBindBuffer(gl.GL_ARRAY_BUFFER, self._pbo) + gl.glBufferData(gl.GL_ARRAY_BUFFER, self._host_buffer, gl.GL_STREAM_DRAW) + gl.glBindBuffer(gl.GL_ARRAY_BUFFER, 0) + else: + msg = f'Buffer type {buffer_type} has not been implemented yet.' + raise NotImplementedError(msg) + + def _get_pixel_format(self): + return self._pixel_format + def _set_pixel_format(self, value): + if value is None: + value = BufferImageFormat.UCHAR4 + if isinstance(value, BufferImageFormat): + value = value.dtype + elif isinstance(value, str): + value = vtype_to_dtype(value) + assert isinstance(value, np.dtype) or issubclass(value, np.generic), value + if value != getattr(self, '_pixel_format', None): + self._pixel_format = value + self._host_buffer = None + self._device_buffer = None + pixel_format = property(_get_pixel_format, _set_pixel_format) + + def _get_buffer_type(self): + return self._buffer_type + def _set_buffer_type(self, value): + if value is None: + value = CudaOutputBufferType.CUDA_DEVICE + assert isinstance(value, CudaOutputBufferType), type(value) + if value != getattr(self, '_buffer_type', None): + self._buffer_type = value + self._host_buffer = None + self._device_buffer = None + buffer_type = property(_get_buffer_type, _set_buffer_type) + + def _get_width(self): + return self._width + def _set_width(self, value): + if value is None: + value = 1 + assert value >= 1, value + try: + value = np.int32(np.asscalar(value)) + except AttributeError: + value = np.int32(value) + if value != getattr(self, '_width', None): + self._width = value + self._host_buffer = None + self._device_buffer = None + width = property(_get_width, _set_width) + + def _get_height(self): + return self._height + def _set_height(self, value): + if value is None: + value = 1 + assert value >= 1, value + try: + value = np.int32(np.asscalar(value)) + except AttributeError: + value = np.int32(value) + if value != getattr(self, '_height', None): + self._height = value + self._host_buffer = None + self._device_buffer = None + height = property(_get_height, _set_height) + + def _get_device_idx(self): + return self._device + def _set_device_idx(self, value): + if value is None: + value = 0 + assert value >= 0, value + value = int(value) + if value != getattr(self, '_device_idx', None): + self._device_idx = value + self._device = cp.cuda.Device(value) + self._host_buffer = None + self._device_buffer = None + device_idx = property(_get_device_idx, _set_device_idx) + + def _get_stream(self): + return self._stream + def _set_stream(self, value): + if value is None: + value = cp.cuda.Stream.null + assert isinstance(value, cp.cuda.Stream), type(value) + self._stream = value + stream = property(_get_stream, _set_stream) diff --git a/optix/sutil/gl_display.py b/optix/sutil/gl_display.py new file mode 100644 index 0000000..b00ce38 --- /dev/null +++ b/optix/sutil/gl_display.py @@ -0,0 +1,139 @@ +import ctypes +import numpy as np + +import OpenGL.GL as gl +import OpenGL.GL.shaders + +from optix.sutil.cuda_output_buffer import BufferImageFormat + +class GLDisplay: + vert_source = \ +""" +#version 330 core + +layout(location = 0) in vec3 position; +out vec2 UV; + +void main() +{ + gl_Position = vec4(position, 1); + UV = (vec2(position.x, position.y) + vec2(1,1)) / 2.0; +} +""" + + frag_source = \ +""" +#version 330 core + +in vec2 UV; +layout(location=0) out vec4 color; + +uniform sampler2D render_tex; + +void main() +{ + color = texture(render_tex, UV).xyzw; +} +""" + + quad_vertex_buffer_data = np.asarray([ + [-1.0, -1.0, 0.0], + [ 1.0, -1.0, 0.0], + [-1.0, 1.0, 0.0], + [-1.0, 1.0, 0.0], + [ 1.0, -1.0, 0.0], + [ 1.0, 1.0, 0.0], + ], dtype=np.float32) + + __slots__ = ['_image_format', '_render_tex', '_program', '_render_tex_uniforloc', + '_quad_vertex_buffer', '_image_format'] + + def __init__(self, image_format): + assert isinstance(image_format, BufferImageFormat), type(fmt) + + vertex_array = gl.glGenVertexArrays(1) + gl.glBindVertexArray(vertex_array) + + program = self.create_gl_program() + render_tex_uniforloc = gl.glGetUniformLocation(program, "render_tex") + + render_tex = gl.glGenTextures(1) + gl.glBindTexture(gl.GL_TEXTURE_2D, render_tex) + + gl.glTexParameteri(gl.GL_TEXTURE_2D, gl.GL_TEXTURE_MAG_FILTER, gl.GL_NEAREST) + gl.glTexParameteri(gl.GL_TEXTURE_2D, gl.GL_TEXTURE_MIN_FILTER, gl.GL_NEAREST) + gl.glTexParameteri(gl.GL_TEXTURE_2D, gl.GL_TEXTURE_WRAP_S, gl.GL_CLAMP_TO_EDGE) + gl.glTexParameteri(gl.GL_TEXTURE_2D, gl.GL_TEXTURE_WRAP_T, gl.GL_CLAMP_TO_EDGE) + + quad_vertex_buffer = gl.glGenBuffers(1) + gl.glBindBuffer(gl.GL_ARRAY_BUFFER, quad_vertex_buffer) + gl.glBufferData(gl.GL_ARRAY_BUFFER, + self.quad_vertex_buffer_data, + gl.GL_STATIC_DRAW) + + self._image_format = image_format + self._program = program + self._render_tex = render_tex + self._render_tex_uniforloc = render_tex_uniforloc + self._quad_vertex_buffer = quad_vertex_buffer + + @classmethod + def create_gl_program(cls): + return gl.shaders.compileProgram( + gl.shaders.compileShader(cls.vert_source, gl.GL_VERTEX_SHADER), + gl.shaders.compileShader(cls.frag_source, gl.GL_FRAGMENT_SHADER), + ) + + def display(self, screen_res_x, screen_res_y, framebuf_res_x, framebuf_res_y, pbo): + gl.glBindFramebuffer(gl.GL_FRAMEBUFFER, 0) + gl.glViewport(0, 0, framebuf_res_x, framebuf_res_y) + gl.glClear(gl.GL_COLOR_BUFFER_BIT | gl.GL_DEPTH_BUFFER_BIT) + gl.glUseProgram(self._program) + gl.glPolygonMode(gl.GL_FRONT_AND_BACK, gl.GL_FILL) + + gl.glActiveTexture(gl.GL_TEXTURE0) + gl.glBindTexture(gl.GL_TEXTURE_2D, self._render_tex) + gl.glBindBuffer(gl.GL_PIXEL_UNPACK_BUFFER, pbo) + + itemsize = self._image_format.itemsize + if (itemsize % 8 == 0): + gl.glPixelStorei(gl.GL_UNPACK_ALIGNMENT, 8) + elif (itemsize % 4 == 0): + gl.glPixelStorei(gl.GL_UNPACK_ALIGNMENT, 4) + elif (itemsize % 2 == 0): + gl.glPixelStorei(gl.GL_UNPACK_ALIGNMENT, 2) + else: + gl.glPixelStorei(gl.GL_UNPACK_ALIGNMENT, 1) + + convert_to_srgb = True + + image_format = self._image_format + if(image_format == BufferImageFormat.UCHAR4): + gl.glTexImage2D(gl.GL_TEXTURE_2D, 0, gl.GL_RGBA8, screen_res_x, screen_res_y, + 0, gl.GL_RGBA, gl.GL_UNSIGNED_BYTE, None) + convert_to_srgb = False + elif image_format is BufferImageFormat.FLOAT3: + gl.glTexImage2D(gl.GL_TEXTURE_2D, 0, gl.GL_RGB32F, screen_res_x, screen_res_y, + 0, gl.GL_RGB, gl.GL_FLOAT, None) + elif image_format is BufferImageFormat.FLOAT4: + gl.glTexImage2D(gl.GL_TEXTURE_2D, 0, gl.GL_RGBA32F, screen_res_x, screen_res_y, + 0, gl.GL_RGBA, gl.GL_FLOAT, None) + else: + raise NotImplementedError(f"Unknown image format {image_format}.") + + if convert_to_srgb: + gl.glEnable(gl.GL_FRAMEBUFFER_SRGB) + else: + gl.glDisable(gl.GL_FRAMEBUFFER_SRGB) + + gl.glBindBuffer(gl.GL_PIXEL_UNPACK_BUFFER, 0) + gl.glUniform1i(self._render_tex_uniforloc, 0) + + # 1st attribute buffer : vertices + gl.glEnableVertexAttribArray(0) + gl.glBindBuffer(gl.GL_ARRAY_BUFFER, self._quad_vertex_buffer) + gl.glVertexAttribPointer(0, 3, gl.GL_FLOAT, gl.GL_FALSE, 0, ctypes.c_void_p(0)) + gl.glDrawArrays(gl.GL_TRIANGLES, 0, 6) + gl.glDisableVertexAttribArray(0) + + gl.glDisable(gl.GL_FRAMEBUFFER_SRGB) diff --git a/optix/sutil/gui.py b/optix/sutil/gui.py new file mode 100644 index 0000000..c2a22b3 --- /dev/null +++ b/optix/sutil/gui.py @@ -0,0 +1,99 @@ + +import glfw +import OpenGL.GL as gl + +import imgui +from imgui.integrations.glfw import GlfwRenderer + +def static_vars(**kwargs): + """ + Attach a static variables local to decorated function. + """ + def decorate(f): + for k in kwargs: + setattr(f, k, kwargs[k]) + return f + return decorate + +def init_gl(): + gl.glClearColor(0.212, 0.271, 0.31, 1.0) + gl.glClear(gl.GL_COLOR_BUFFER_BIT) + +def init_imgui(window): + imgui.create_context() + impl = GlfwRenderer(window) + impl.io.fonts.add_font_default() + imgui.core.style_colors_dark(); + imgui.get_style().window_border_size = 0.0 + return impl + +def init_ui(window_title, width, height): + if not glfw.init(): + raise RuntimeError("Could not initialize OpenGL context") + + window = glfw.create_window(int(width), int(height), window_title, None, None) + glfw.make_context_current(window) + + if not window: + raise RuntimeError("Could not initialize Window") + + glfw.swap_interval(0) + + init_gl() + impl = init_imgui(window) + + return window, impl + +def display_text(text, x, y): + imgui.set_next_window_bg_alpha(0.0) + imgui.set_next_window_position(x, y) + + flags = (imgui.WINDOW_NO_TITLE_BAR | imgui.WINDOW_NO_RESIZE | imgui.WINDOW_NO_MOVE | + imgui.WINDOW_NO_SCROLLBAR | imgui.WINDOW_NO_SAVED_SETTINGS | imgui.WINDOW_NO_INPUTS) + + imgui.begin("TextOverlayFG", None, flags) + imgui.push_style_color(imgui.COLOR_TEXT, 0.7, 0.7, 0.7, 1.0) + imgui.text(text) + imgui.pop_style_color() + imgui.end() + +@static_vars(total_subframe_count=0, last_update_frames=0, + last_update_time=None, display_text="") +def display_stats(state_update_time, render_time, display_time): + display_update_min_interval_time = 0.5 + + cur_time = glfw.get_time() + + display_stats.last_update_frames += 1 + last_update_time = display_stats.last_update_time or cur_time - 0.5 + last_update_frames = display_stats.last_update_frames + total_subframe_count = display_stats.total_subframe_count + + dt = cur_time - last_update_time + + do_update = (dt > display_update_min_interval_time) or (total_subframe_count == 0) + + if do_update: + fps = last_update_frames / dt + state_ms = 1000.0 * state_update_time / last_update_frames + render_ms = 1000.0 * render_time / last_update_frames + display_ms = 1000.0 * display_time / last_update_frames + + display_stats.last_update_time = cur_time + display_stats.last_update_frames = 0 + + display_stats.display_text = \ +f"""{fps:5.1f} fps + +state update: {state_ms:8.1f} ms +render : {render_ms:8.1f} ms +display : {display_ms:8.1f} ms +""" + + imgui.new_frame() + display_text(display_stats.display_text, 10.0, 10.0) + imgui.end_frame() + + display_stats.total_subframe_count += 1 + + return do_update diff --git a/optix/sutil/properties.py b/optix/sutil/properties.py new file mode 100644 index 0000000..e9bead3 --- /dev/null +++ b/optix/sutil/properties.py @@ -0,0 +1,58 @@ +import numpy as np + +def get_member(varname): + + def getter(self, varname=varname): + return getattr(self, varname, None) + + return getter + + +def set_bool(varname, default_value=None): + + def setter(self, value, varname=varname, default_value=default_value): + if value is None: + value = default_value + value = bool(value) + setattr(self, varname, value) + + return setter + + +def set_int(varname, default_value=None): + + def setter(self, value, varname=varname, default_value=default_value): + if value is None: + value = default_value + value = np.int32(value) + setattr(self, varname, value) + + return setter + + +def set_float(varname, default_value=None): + + def setter(self, value, varname=varname, default_value=default_value): + if value is None: + value = default_value + value = np.float32(value) + setattr(self, varname, value) + + return setter + + +def set_float3(varname, default_value=None): + + def setter(self, value, varname=varname, default_value=default_value): + if value is None: + value = default_value + + if value is None: + pass + elif np.isscalar(value): + value = np.full(shape=(3,), dtype=np.float32, fill_value=value) + else: + value = np.asarray(value, dtype=np.float32) + setattr(self, varname, value) + + return setter diff --git a/optix/sutil/trackball.py b/optix/sutil/trackball.py new file mode 100644 index 0000000..8643f27 --- /dev/null +++ b/optix/sutil/trackball.py @@ -0,0 +1,199 @@ +import enum + +import numpy as np + +from optix.sutil.properties import get_member, set_bool, set_float, set_float3 +from optix.sutil.vecmath import dot, length, normalize +from optix.sutil.camera import Camera + +class TrackballViewMode(enum.Enum): + EyeFixed = 0 + LookAtFixed = 1 + +class Trackball: + __slots__ = ['_gimbal_lock','_view_mode', '_camera', '_camera_eye_lookat_distance', + '_zoom_multiplier', '_move_speed', '_roll_speed', '_latitude', '_longitude', + '_previous_position_x', '_previous_position_y', '_perform_tracking', + '_u', '_v', '_w'] + + def __init__(self): + # initialize all attributes to default values + for slot in self.__slots__: + setattr(self, slot[1:], None) + + camera_eye_lookat_distance = property(get_member('_camera_eye_lookat_distance'), + set_float('_camera_eye_lookat_distance', 0.0)) + zoom_multiplier = property(get_member('_zoom_multiplier'), set_float('_zoom_multiplier', 1.1)) + move_speed = property(get_member('_move_speed'), set_float('_move_speed', 1.0)) + roll_speed = property(get_member('_roll_speed'), set_float('_roll_speed', 0.5)) + latitude = property(get_member('_latitude'), set_float('_latitude', 0.0)) + longitude = property(get_member('_longitude'), set_float('_longitude', 0.0)) + previous_position_x = property(get_member('_previous_position_x'), set_float('_previous_position_x', 0)) + previous_position_y = property(get_member('_previous_position_y'), set_float('_previous_position_y', 0)) + + gimbal_lock = property(get_member('_gimbal_lock'), set_bool('_gimbal_lock', False)) + perform_tracking = property(get_member('_perform_tracking'), set_bool('_perform_tracking', False)) + + u = property(get_member("_u"), set_float3("_u", 0.0)) + v = property(get_member("_v"), set_float3("_v", 0.0)) + w = property(get_member("_w"), set_float3("_w", 0.0)) + + def _get_view_mode(self): + return self._view_mode + def _set_view_mode(self, view_mode): + if view_mode is None: + view_mode = TrackballViewMode.LookAtFixed + assert isinstance(view_mode, TrackballViewMode), type(view_mode) + self._view_mode = view_mode + view_mode = property(_get_view_mode, _set_view_mode) + + def _get_camera(self): + return self._camera + def _set_camera(self, camera): + """ + Set the camera that will be changed according to user input. + Warning, this also initializes the reference frame of the trackball from the camera. + The reference frame defines the orbit's singularity. + """ + if camera is None: + camera = Camera() + assert isinstance(camera, Camera), type(camera) + self._camera = camera + self.reinitialize_orientation_from_camera() + camera = property(_get_camera, _set_camera) + + def start_tracking(self, x, y): + self.previous_position_x = x + self.previous_position_y = y + self.perform_tracking = True + + def update_tracking(self, x, y, canvas_width, canvas_height): + if not self._perform_tracking: + return self.start_tracking(x, y) + + delta_x = x - self.previous_position_x + delta_y = y - self.previous_position_y + + if delta_x == 0 and delta_y == 0: + return + + self.previous_position_x = x + self.previous_position_y = y + + self.latitude = np.deg2rad(min(+89.0, max(-89.0, np.rad2deg(self.latitude) + 0.5*delta_y))) + self.longitude = np.deg2rad(np.fmod(np.rad2deg(self.longitude) - 0.5*delta_x, 360.0)) + + self._update_camera() + + if not self.gimbal_lock: + self.reinitialize_orientation_from_camera() + self.camera.up = self.w + + def wheel_event(self, direction): + self.zoom(direction) + return True + + def zoom(self, direction): + zoom = np.float32(1.0/self.zoom_multiplier if direction > 0 else self.zoom_multiplier) + self.camera_eye_lookat_distance *= zoom + + look_at = self.camera.look_at + eye = self.camera.eye + self.camera.eye = look_at + (eye - look_at) * zoom + + def reinitialize_orientation_from_camera(self): + """ + Adopts the reference frame from the camera. + Note that the reference frame of the camera usually has a different 'up' than the 'up' of the camera. + Though, typically, it is desired that the trackball's reference frame aligns with the actual up of the camera. + """ + u, v, w = self.camera.uvw_frame() + + self.u = normalize(+u) + self.v = normalize(-w) + self.w = normalize(+v) + + self.latitude = 0.0 + self.longitude = 0.0 + + self.camera_eye_lookat_distance = length(self.camera.look_at - self.camera.eye) + assert(self.camera_eye_lookat_distance > 0) + + def set_reference_frame(self, u, v, w): + """ + Specify the frame of the orbit that the camera is orbiting around. + The important bit is the 'up' of that frame as this is defines the singularity. + Here, 'up' is the 'w' component. + Typically you want the up of the reference frame to align with the up of the camera. + However, to be able to really freely move around, you can also constantly update + the reference frame of the trackball. This can be done by calling reinitOrientationFromCamera(). + In most cases it is not required though (set the frame/up once, leave it as is). + """ + self.u = u + self.v = v + self.w = w + + assert length(self.camera.look_at - self.camera.eye) != 0 + dir_ws = -normalize(self.camera.look_at - self.camera.eye) + + dirx = dot(dir_ws, u) + diry = dot(dir_ws, v) + dirz = dot(dir_ws, w) + + self.longitude = np.arctan2(dirx, diry) + self.latitude = np.arcsin(dirz) + + + def _update_camera(self): + dirx = np.cos(self._latitude)*np.sin(self._longitude) + diry = np.cos(self._latitude)*np.cos(self._longitude) + dirz = np.sin(self._latitude) + + dir_ws = self.u * dirx + self.v * diry + self.w * dirz + + if self.view_mode is TrackballViewMode.EyeFixed: + eye = self.camera.eye + self.camera.look_at = eye - dir_ws * self.camera_eye_lookat_distance + elif self.view_mode is TrackballViewMode.LookAtFixed: + look_at = self.camera.look_at + self.camera.eye = look_at + dir_ws * self.camera_eye_lookat_distance + else: + raise NotImplementedError(self.view_mode) + + def _move_backward(self, speed): + dir_ws = normalize(self.camera.look_at - self.camera.eye) + self.camera.eye -= dir_ws * speed + self.camera.look_at -= dir_ws * speed + + def _move_forward(self, speed): + dir_ws = normalize(self.camera.look_at - self.camera.eye) + self.camera.eye += dir_ws * speed + self.camera.look_at += dir_ws * speed + + def _move_left(self, speed): + u = normalize( self.camera.uvw_frame()[0] ) + self.camera.eye -= u*speed + self.camera.look_at -= u*speed + + def _move_right(self, speed): + u = normalize( self.camera.uvw_frame()[0] ) + self.camera.eye += u*speed + self.camera.look_at += u*speed + + def _move_down(self, speed): + v = normalize( self.camera.uvw_frame()[1] ) + self.camera.eye -= v*speed + self.camera.look_at -= v*speed + + def _move_up(self, speed): + v = normalize( self.camera.uvw_frame()[1] ) + self.camera.eye += v*speed + self.camera.look_at += v*speed + + def _roll_right(self, speed): + u, v, _ = map(normalize, self.camera.uvw_frame()) + self.camera.up = u*np.cos(np.deg2rad(90.0 - speed)) + v*np.sin(np.deg2rad(90.0 - speed)) + + def _roll_left(self, speed): + u, v, _ = map(normalize, self.camera.uvw_frame()) + self.camera.up = u*np.cos(np.deg2rad(90.0 + speed)) + v*np.sin(np.deg2rad(90.0 + speed)) diff --git a/optix/sutil/vecmath.py b/optix/sutil/vecmath.py new file mode 100644 index 0000000..dc78e65 --- /dev/null +++ b/optix/sutil/vecmath.py @@ -0,0 +1,69 @@ +import re + +import numpy as np + +cross = np.cross + +def dot(a, b): + return (a*b).sum(axis=-1) + +def length(x): + return np.sqrt(dot(x, x)) + +def normalize(x): + l = length(x) + assert l>0, x + return x/l + +def ctype_to_dtype(ctype): + _ctype_to_dtype = { + 'float': np.float32, + 'double': np.float64, + 'char': np.int8, + 'short': np.int16, + 'int': np.int32, + 'longlong': np.int64, + 'uchar': np.uint8, + 'ushort': np.uint16, + 'uint': np.uint32, + 'ulonglong': np.uint64, + } + ctype = ctype.replace('long int', 'long') + ctype = ctype.replace('long long', 'longlong') + ctype = ctype.replace('unsigned ', 'u') + + if ctype not in _ctype_to_dtype: + msg = "Cannot determine dtype from ctype '{ctype}'." + raise ValueError(msg) + + return _ctype_to_dtype[ctype] + + +def vtype_to_dtype(vtype): + regexp = re.compile(r'((?:float|double)|u?(?:char|short|int|longlong))(\d*)') + + match = regexp.match(vtype) + if not match: + msg = "Cannot extract format from '{pformat}'." + raise ValueError(msg) + + dtype = ctype_to_dtype(match.group(1)) + + count = match.group(2) + + if (count is None): + return dtype + + count = int(count) + + if count == 0: + return dtype + + if count <= 4: + names = tuple('xyzw'[:count]) + formats = [dtype,]*count + vec_dtype = np.dtype(dict(names=names, formats=formats)) + else: + vec_dtype = np.dtype( (dtype, (count,)) ) + + return vec_dtype