diff --git a/examples/cuda/opacity_micromap.cu b/examples/cuda/opacity_micromap.cu new file mode 100644 index 0000000..d523bfe --- /dev/null +++ b/examples/cuda/opacity_micromap.cu @@ -0,0 +1,143 @@ +// +// Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// * Neither the name of NVIDIA CORPORATION nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// + +#include + +#include "opacity_micromap.h" +#include "helpers.h" + +#include "vec_math.h" + +extern "C" { +__constant__ Params params; +} + + +static __forceinline__ __device__ void setPayloadColor( float3 p ) +{ + optixSetPayload_0( __float_as_uint( p.x ) ); + optixSetPayload_1( __float_as_uint( p.y ) ); + optixSetPayload_2( __float_as_uint( p.z ) ); +} + +static __forceinline__ __device__ void setPayloadAnyhit( unsigned int a ) +{ + optixSetPayload_3( a ); +} + + +static __forceinline__ __device__ void computeRay( uint3 idx, uint3 dim, float3& origin, float3& direction ) +{ + const float3 U = params.cam_u; + const float3 V = params.cam_v; + const float3 W = params.cam_w; + const float2 d = 2.0f * make_float2( + static_cast( idx.x ) / static_cast( dim.x ), + static_cast( idx.y ) / static_cast( dim.y ) + ) - 1.0f; + + origin = params.cam_eye; + direction = normalize( d.x * U + d.y * V + W ); +} + + +extern "C" __global__ void __raygen__rg() +{ + // Lookup our location within the launch grid + const uint3 idx = optixGetLaunchIndex(); + const uint3 dim = optixGetLaunchDimensions(); + + // Map our launch idx to a screen location and create a ray from the camera + // location through the screen + float3 ray_origin, ray_direction; + computeRay( idx, dim, ray_origin, ray_direction ); + + // Trace the ray against our scene hierarchy + unsigned int p0, p1, p2, p3=0; + optixTrace( + params.handle, + ray_origin, + ray_direction, + 0.0f, // Min intersection distance + 1e16f, // Max intersection distance + 0.0f, // rayTime -- used for motion blur + OptixVisibilityMask( 255 ), // Specify always visible + OPTIX_RAY_FLAG_NONE, + 0, // SBT offset -- See SBT discussion + 1, // SBT stride -- See SBT discussion + 0, // missSBTIndex -- See SBT discussion + p0, p1, p2, p3 ); + float3 result; + result.x = __uint_as_float( p0 ); + result.y = __uint_as_float( p1 ); + result.z = __uint_as_float( p2 ); + unsigned int anyhit_executed = p3; + + // If anyhit was executed, tint the pixel towards white + if( anyhit_executed ) + result = lerp( result, make_float3( 1.0f), 0.075f ); + + // Record results in our output raster + params.image[idx.y * params.image_width + idx.x] = make_color( result ); +} + + +extern "C" __global__ void __miss__ms() +{ + MissData* miss_data = reinterpret_cast( optixGetSbtDataPointer() ); + setPayloadColor( miss_data->bg_color ); +} + + +extern "C" __global__ void __closesthit__ch() +{ + // When built-in triangle intersection is used, a number of fundamental + // attributes are provided by the OptiX API, including barycentric coordinates. + const float2 barycentrics = optixGetTriangleBarycentrics(); + + setPayloadColor( make_float3( barycentrics*0.5f, 0.5f ) ); +} + + +extern "C" __global__ void __anyhit__opacity() +{ + setPayloadAnyhit( 1u ); // Register that anyhit was invoked + + const HitGroupData* rt_data = reinterpret_cast( optixGetSbtDataPointer() ); + const float2 barycentrics = optixGetTriangleBarycentrics(); + const int prim_idx = optixGetPrimitiveIndex(); + + const float2 uv0 = rt_data->uvs[ prim_idx*3 + 0 ]; + const float2 uv1 = rt_data->uvs[ prim_idx*3 + 1 ]; + const float2 uv2 = rt_data->uvs[ prim_idx*3 + 2 ]; + //printf("AH: uv0: (%f, %f) uv1: (%f, %f) uv2: (%f, %f)\n", uv0.x, uv0.y, uv1.x, uv1.y, uv2.x, uv2.y); + const float2 uv = computeUV( barycentrics, uv0, uv1, uv2 ); + + if( inCircle( uv ) ) + optixIgnoreIntersection(); +} diff --git a/examples/cuda/opacity_micromap.h b/examples/cuda/opacity_micromap.h new file mode 100644 index 0000000..290048e --- /dev/null +++ b/examples/cuda/opacity_micromap.h @@ -0,0 +1,82 @@ +// +// Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// * Neither the name of NVIDIA CORPORATION nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// + +#include "vec_math.h" + +constexpr float CIRCLE_RADIUS = 0.75f; + +//----------------------------------------------------------------------------- +// +// Helper functions to be used when pre-baking opacity into OMM and when +// evaluating opacity within anyhit function +// +//----------------------------------------------------------------------------- + +static __host__ __device__ __inline__ float2 computeUV( float2 bary, float2 uv0, float2 uv1, float2 uv2 ) +{ + return ( 1.0f - bary.x - bary.y )*uv0 + bary.x*uv1 + bary.y*uv2; +} + +static __host__ __device__ __inline__ bool inCircle( const float2 uv ) +{ + return ( uv.x * uv.x + uv.y * uv.y ) < ( CIRCLE_RADIUS * CIRCLE_RADIUS ); +}; + + +//----------------------------------------------------------------------------- +// +// Types +// +//----------------------------------------------------------------------------- +struct Params +{ + uchar4* image; + unsigned int image_width; + unsigned int image_height; + float3 cam_eye; + float3 cam_u, cam_v, cam_w; + OptixTraversableHandle handle; +}; + + +struct RayGenData +{ + // No data needed +}; + + +struct MissData +{ + float3 bg_color; +}; + + +struct HitGroupData +{ + float2* uvs; +}; diff --git a/examples/opacity_micromap.py b/examples/opacity_micromap.py new file mode 100644 index 0000000..8478e93 --- /dev/null +++ b/examples/opacity_micromap.py @@ -0,0 +1,308 @@ +# +# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# * Neither the name of NVIDIA CORPORATION nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY +# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# +# ----------------------------------------------------------------------------- +# +# A simple demonstration of opacity micromaps. +# +# * A single quad, made of two triangles ABC and ACD is rendered with a +# transparent circular cutout at its center. +# * OMMs are applied to the two triangles to accelerate the evaluation of the +# opacity function during traversal. +# * As a preproces, OMM microtriangles are marked as either completely +# transparent, completely opaque, or unknown. +# * During traversal, rays that hit opaque or transparent regions of the OMM +# can skip the anyhit function. +# * Rays that hit 'unknown' regions of the OMM evaluate the anyhit to get +# accurate evaluation of the opacity function. +# * Regions of the micromap which are unknown are tinted a lighter color to +# visualize the regions which required anyhit evaluation. +# +# ----------------------------------------------------------------------------- + + +import os, sys, logging, collections + +import cupy as cp +import numpy as np +import optix as ox +from sutil.camera import Camera +from PIL import Image, ImageOps + +script_dir = os.path.dirname(os.path.abspath(__file__)) +cuda_src = os.path.join(script_dir, "cuda", "opacity_micromap.cu") + + +logging.basicConfig(stream=sys.stdout, level=logging.DEBUG) +log = logging.getLogger() + +DEBUG = False + +OMM_SUBDIV_LEVEL = 4 +NUM_TRIS = 2 +DEFAULT_WIDTH = 1024 +DEFAULT_HEIGHT = 768 +CIRCLE_RADIUS = 0.75 + +g_uvs = np.array([[[1.0, -1.0], [-1.0, -1.0], [-1.0, 1.0]], + [[1.0, -1.0], [-1.0, 1.0], [1.0, 1.0]]], dtype=np.float32) + +d_uvs = cp.asarray(g_uvs) + +vertices = np.array([[-0.5, -0.5, 0.0], + [ 0.5, -0.5, 0.0], + [ 0.5, 0.5, 0.0], + + [-0.5, -0.5, 0.0], + [ 0.5, 0.5, 0.0], + [-0.5, 0.5, 0.0]], dtype=np.float32) + + +class Params: + _params = collections.OrderedDict([ + ('image', 'u8'), + ('image_width', 'u4'), + ('image_height', 'u4'), + ('cam_eye', '3f4'), + ('camera_u', '3f4'), + ('camera_v', '3f4'), + ('camera_w', '3f4'), + ('trav_handle', 'u8'), + ]) + + def __init__(self): + self.handle = ox.LaunchParamsRecord(names=tuple(self._params.keys()), + formats=tuple(self._params.values())) + + def __getattribute__(self, name): + if name in Params._params.keys(): + return self.__dict__['handle'][name] + else: + return super().__getattribute__(name) + + def __setattr__(self, name, value): + if name in Params._params.keys(): + self.handle[name] = value + elif name in {'handle'}: + super().__setattr__(name, value) + else: + raise AttributeError(name) + + def __str__(self): + return '\n'.join(f'{k}: {self.handle[k]}' for k in self._params) + + +##------------------------------------------------------------------------------ +## +## Helper Functions +## +##------------------------------------------------------------------------------ + + +def init_camera(params): + camera = Camera() + camera.eye = (0, 0, 1.5) + camera.look_at = (0, 0, 0) + camera.up = (0, 1, 3) + camera.fov_y = 45 + camera.aspect_ratio = params.image_width / params.image_height + + u, v, w = camera.uvw_frame() + params.camera_u = u + params.camera_v = v + params.camera_w = w + params.cam_eye = camera.eye + + +def compute_uv(bary, uv0, uv1, uv2): + bary = bary[np.newaxis, :, np.newaxis] + return (1.0 - bary[..., 0] - bary[..., 1]) * uv0[:, np.newaxis] + bary[..., 0] * uv1[:, np.newaxis] + bary[..., 1] * uv2[:, np.newaxis] + + +def in_circle(uv, radius): + return (uv[..., 0] * uv[..., 0] + uv[..., 1] * uv[..., 1]) < (radius * radius) + + +def evaluate_opacity(bary0, bary1, bary2, uvs, radius): + """ + Calculate the texture coordinate at the micromesh vertices of the triangle and + determine if the triangle is inside, outside, or spanning the boundary of the circle. + Note that the tex coords are in [-1, 1] and the circle is centered at uv=(0,0). + """ + + uv0 = compute_uv(bary0, uvs[:, 0, :], uvs[:, 1, :], uvs[:, 2, :]) + uv1 = compute_uv(bary1, uvs[:, 0, :], uvs[:, 1, :], uvs[:, 2, :]) + uv2 = compute_uv(bary2, uvs[:, 0, :], uvs[:, 1, :], uvs[:, 2, :]) + + in_circle0 = in_circle(uv0, radius) + in_circle1 = in_circle(uv1, radius) + in_circle2 = in_circle(uv2, radius) + + opacity = np.full_like(in_circle0, dtype=np.uint8, fill_value=ox.OpacityMicromapState.UNKNOWN_OPAQUE) + + transparent = np.logical_and.reduce((in_circle0, in_circle1, in_circle2)) + opaque = np.logical_and.reduce((~in_circle0, ~in_circle1, ~in_circle2)) + opacity[transparent] = ox.OpacityMicromapState.TRANSPARENT + opacity[opaque] = ox.OpacityMicromapState.OPAQUE + + return opacity + + +def create_opacity_micromap(ctx): + NUM_MICRO_TRIS = 1 << (OMM_SUBDIV_LEVEL * 2) + + # this has to be compressed later + uTriIs = np.arange(0, NUM_MICRO_TRIS, dtype=np.uint32) + + bary0, bary1, bary2 = ox.micromap_indices_to_base_barycentrics(uTriIs, OMM_SUBDIV_LEVEL) + omm_input_data = evaluate_opacity(bary0, bary1, bary2, g_uvs, CIRCLE_RADIUS) + + # construct the omm input from the data array (this will also bake the data into the format required by optix) + omm_input = ox.OpacityMicromapInput(omm_input_data, format=ox.OpacityMicromapFormat.FOUR_STATE) + omm = ox.OpacityMicromapArray(ctx, omm_input) + return omm + + +def create_acceleration_structure(ctx, vertices, omm): + usage_counts = [2] + index_buffer = np.array([0, 1], dtype=np.uint16) + omm_build_input = ox.BuildInputOpacityMicromap(omm, usage_counts, + ox.OpacityMicromapArrayIndexingMode.INDEXED, + index_buffer=index_buffer) + triangle_input = ox.BuildInputTriangleArray(vertices, + flags=[ox.GeometryFlags.NONE], + opacity_micromap=omm_build_input) + gas = ox.AccelerationStructure(ctx, triangle_input, compact=True) + return gas + + +def create_module(ctx, pipeline_opts): + compile_opts = ox.ModuleCompileOptions(debug_level=ox.CompileDebugLevel.NONE, opt_level=ox.CompileOptimizationLevel.LEVEL_3) + module = ox.Module(ctx, cuda_src, compile_opts, pipeline_opts) + return module + + +def create_program_groups(ctx, module): + raygen_grp = ox.ProgramGroup.create_raygen(ctx, module, "__raygen__rg") + miss_grp = ox.ProgramGroup.create_miss(ctx, module, "__miss__ms") + hit_grp = ox.ProgramGroup.create_hitgroup(ctx, module, + entry_function_CH="__closesthit__ch", + entry_function_AH="__anyhit__opacity") + return raygen_grp, miss_grp, hit_grp + + +def create_pipeline(ctx, program_grps, pipeline_options): + link_opts = ox.PipelineLinkOptions(max_trace_depth=1, + debug_level=ox.CompileDebugLevel.NONE) + + pipeline = ox.Pipeline(ctx, + compile_options=pipeline_options, + link_options=link_opts, + program_groups=program_grps) + pipeline.compute_stack_sizes(1, # max_trace_depth + 0, # max_cc_depth + 0) # max_dc_depth + + return pipeline + + +def create_sbt(program_grps): + raygen_grp, miss_grp, hit_grp = program_grps + + raygen_sbt = ox.SbtRecord(raygen_grp) + miss_sbt = ox.SbtRecord(miss_grp, names=('bg_color',), formats=('3f4',)) + miss_sbt['bg_color'] = [0.01, 0.01, 0.01] + + hit_sbt = ox.SbtRecord(hit_grp, names=('uvs',), formats=('u8',)) + hit_sbt["uvs"] = d_uvs.data.ptr + + sbt = ox.ShaderBindingTable(raygen_record=raygen_sbt, miss_records=miss_sbt, hitgroup_records=hit_sbt) + + return sbt + + +def launch_pipeline(pipeline: ox.Pipeline, sbt, params): + img_size = (params.image_width[0], params.image_height[0]) + output_image = np.zeros(img_size + (4, ), 'B') + output_image = cp.asarray(output_image) + + params.image = output_image.data.ptr + + stream = cp.cuda.Stream() + pipeline.launch(sbt, dimensions=img_size, params=params.handle, stream=stream) + + stream.synchronize() + return cp.asnumpy(output_image) + + +if __name__ == "__main__": + logger = ox.Logger(log) + ctx = ox.DeviceContext(validation_mode=True, + log_callback_function=logger, + log_callback_level=4) + ctx.cache_enabled = False + + params = Params() + params.image_width = DEFAULT_WIDTH + params.image_height = DEFAULT_HEIGHT + + init_camera(params) + + opacity_micromap = create_opacity_micromap(ctx) + gas = create_acceleration_structure(ctx, vertices, opacity_micromap) + params.trav_handle = gas.handle + + pipeline_options = ox.PipelineCompileOptions(traversable_graph_flags=ox.TraversableGraphFlags.ALLOW_SINGLE_GAS, + num_payload_values=4, + num_attribute_values=2, + uses_motion_blur=False, + exception_flags=ox.ExceptionFlags.NONE, + uses_primitive_type_flags=ox.PrimitiveTypeFlags.TRIANGLE, + pipeline_launch_params_variable_name="params", + allow_opacity_micromaps=True) + + module = create_module(ctx, pipeline_options) + program_grps = create_program_groups(ctx, module) + pipeline = create_pipeline(ctx, program_grps, pipeline_options) + + sbt = create_sbt(program_grps) + + img = launch_pipeline(pipeline, sbt, params) + + img = img.reshape(params.image_height[0], params.image_width[0], 4) + img = ImageOps.flip(Image.fromarray(img, 'RGBA')) + img.show() + + + + + + + + + diff --git a/optix/__init__.py b/optix/__init__.py index 9a364ce..41d1015 100644 --- a/optix/__init__.py +++ b/optix/__init__.py @@ -1,12 +1,12 @@ -from .context import DeviceContext, optix_version +from .context import * from .build import * -from .module import Module, ModuleCompileOptions, CompileOptimizationLevel, CompileDebugLevel, PayloadSemantics, Task -from .program_group import ProgramGroup -from .struct import SbtRecord, LaunchParamsRecord -from .shader_binding_table import ShaderBindingTable -from .pipeline import CompileDebugLevel, ExceptionFlags, TraversableGraphFlags, \ - PrimitiveTypeFlags, PipelineCompileOptions, PipelineLinkOptions, Pipeline +from .module import * +from .program_group import * +from .struct import * +from .shader_binding_table import * +from .pipeline import * from .denoiser import * +from .micromap import * from .logging_utility import Logger from ._version import __version__ diff --git a/optix/base.pyx b/optix/base.pyx index 0173530..d9fc39a 100644 --- a/optix/base.pyx +++ b/optix/base.pyx @@ -9,4 +9,4 @@ cdef class OptixObject: return "" def __repr__(self): - return f"" \ No newline at end of file + return f"optix.{self.__class__.__name__}({self._repr_details()})" \ No newline at end of file diff --git a/optix/build.pxd b/optix/build.pxd index 457c1d7..74570bb 100644 --- a/optix/build.pxd +++ b/optix/build.pxd @@ -2,7 +2,8 @@ from .common cimport OptixResult, CUstream, CUdeviceptr from .context cimport OptixDeviceContext, OptixContextObject from libcpp.vector cimport vector from .base cimport OptixObject -from libc.stdint cimport uintptr_t +from libc.stdint cimport uintptr_t, uint32_t +from .micromap cimport OptixBuildInputOpacityMicromap, BuildInputOpacityMicromap cdef extern from "optix.h" nogil: @@ -14,6 +15,8 @@ cdef extern from "optix.h" nogil: OPTIX_BUILD_FLAG_PREFER_FAST_BUILD, OPTIX_BUILD_FLAG_ALLOW_RANDOM_VERTEX_ACCESS, OPTIX_BUILD_FLAG_ALLOW_RANDOM_INSTANCE_ACCESS, + OPTIX_BUILD_FLAG_ALLOW_OPACITY_MICROMAP_UPDATE, + OPTIX_BUILD_FLAG_ALLOW_DISABLE_OPACITY_MICROMAPS cdef enum OptixBuildOperation: @@ -45,9 +48,9 @@ cdef extern from "optix.h" nogil: OPTIX_BUILD_INPUT_TYPE_CUSTOM_PRIMITIVES, OPTIX_BUILD_INPUT_TYPE_INSTANCES, OPTIX_BUILD_INPUT_TYPE_INSTANCE_POINTERS, - OPTIX_BUILD_INPUT_TYPE_CURVES + OPTIX_BUILD_INPUT_TYPE_CURVES, + OPTIX_BUILD_INPUT_TYPE_SPHERES - #ctypedef uintptr_t CUdeviceptr cdef struct OptixBuildInputInstanceArray: CUdeviceptr instances @@ -75,68 +78,36 @@ cdef extern from "optix.h" nogil: unsigned int primitiveIndexOffset - IF _OPTIX_VERSION_MAJOR == 7 and _OPTIX_VERSION_MINOR > 4: # switch to new instance flags - cdef enum OptixPrimitiveType: - OPTIX_PRIMITIVE_TYPE_CUSTOM, - OPTIX_PRIMITIVE_TYPE_ROUND_QUADRATIC_BSPLINE, - OPTIX_PRIMITIVE_TYPE_ROUND_CUBIC_BSPLINE, - OPTIX_PRIMITIVE_TYPE_ROUND_LINEAR, - OPTIX_PRIMITIVE_TYPE_ROUND_CATMULLROM, - OPTIX_PRIMITIVE_TYPE_SPHERE, - OPTIX_PRIMITIVE_TYPE_TRIANGLE, - ELIF _OPTIX_VERSION_MAJOR == 7 and _OPTIX_VERSION_MINOR > 3: - cdef enum OptixPrimitiveType: - OPTIX_PRIMITIVE_TYPE_CUSTOM, - OPTIX_PRIMITIVE_TYPE_ROUND_QUADRATIC_BSPLINE, - OPTIX_PRIMITIVE_TYPE_ROUND_CUBIC_BSPLINE, - OPTIX_PRIMITIVE_TYPE_ROUND_LINEAR, - OPTIX_PRIMITIVE_TYPE_ROUND_CATMULLROM, - OPTIX_PRIMITIVE_TYPE_TRIANGLE, - ELSE: - cdef enum OptixPrimitiveType: - OPTIX_PRIMITIVE_TYPE_CUSTOM, - OPTIX_PRIMITIVE_TYPE_ROUND_QUADRATIC_BSPLINE, - OPTIX_PRIMITIVE_TYPE_ROUND_CUBIC_BSPLINE, - OPTIX_PRIMITIVE_TYPE_ROUND_LINEAR, - OPTIX_PRIMITIVE_TYPE_TRIANGLE, - - - IF _OPTIX_VERSION_MAJOR == 7 and _OPTIX_VERSION_MINOR > 3: # switch to new instance flags - cdef enum OptixCurveEndcapFlags: - OPTIX_CURVE_ENDCAP_DEFAULT, - OPTIX_CURVE_ENDCAP_ON - - - cdef struct OptixBuildInputCurveArray: - OptixPrimitiveType curveType - unsigned int numPrimitives - const CUdeviceptr * vertexBuffers - unsigned int numVertices - unsigned int vertexStrideInBytes - const CUdeviceptr * widthBuffers - unsigned int widthStrideInBytes - const CUdeviceptr * normalBuffers - unsigned int normalStrideInBytes - CUdeviceptr indexBuffer - unsigned int indexStrideInBytes - unsigned int flag - unsigned int primitiveIndexOffset - unsigned int endcapFlags - ELSE: - cdef struct OptixBuildInputCurveArray: - OptixPrimitiveType curveType - unsigned int numPrimitives - const CUdeviceptr * vertexBuffers - unsigned int numVertices - unsigned int vertexStrideInBytes - const CUdeviceptr * widthBuffers - unsigned int widthStrideInBytes - const CUdeviceptr * normalBuffers - unsigned int normalStrideInBytes - CUdeviceptr indexBuffer - unsigned int indexStrideInBytes - unsigned int flag - unsigned int primitiveIndexOffset + cdef enum OptixPrimitiveType: + OPTIX_PRIMITIVE_TYPE_CUSTOM, + OPTIX_PRIMITIVE_TYPE_ROUND_QUADRATIC_BSPLINE, + OPTIX_PRIMITIVE_TYPE_ROUND_CUBIC_BSPLINE, + OPTIX_PRIMITIVE_TYPE_ROUND_LINEAR, + OPTIX_PRIMITIVE_TYPE_ROUND_CATMULLROM, + OPTIX_PRIMITIVE_TYPE_SPHERE, + OPTIX_PRIMITIVE_TYPE_TRIANGLE, + + + cdef enum OptixCurveEndcapFlags: + OPTIX_CURVE_ENDCAP_DEFAULT, + OPTIX_CURVE_ENDCAP_ON + + + cdef struct OptixBuildInputCurveArray: + OptixPrimitiveType curveType + unsigned int numPrimitives + const CUdeviceptr * vertexBuffers + unsigned int numVertices + unsigned int vertexStrideInBytes + const CUdeviceptr * widthBuffers + unsigned int widthStrideInBytes + const CUdeviceptr * normalBuffers + unsigned int normalStrideInBytes + CUdeviceptr indexBuffer + unsigned int indexStrideInBytes + unsigned int flag + unsigned int primitiveIndexOffset + unsigned int endcapFlags cdef enum OptixIndicesFormat: @@ -160,17 +131,12 @@ cdef extern from "optix.h" nogil: OPTIX_TRANSFORM_FORMAT_MATRIX_FLOAT12, - IF _OPTIX_VERSION > 70400: # switch to new geometry flags - cdef enum OptixGeometryFlags: - OPTIX_GEOMETRY_FLAG_NONE, - OPTIX_GEOMETRY_FLAG_DISABLE_ANYHIT, - OPTIX_GEOMETRY_FLAG_REQUIRE_SINGLE_ANYHIT_CALL - OPTIX_GEOMETRY_FLAG_DISABLE_TRIANGLE_FACE_CULLING - ELSE: - cdef enum OptixGeometryFlags: - OPTIX_GEOMETRY_FLAG_NONE, - OPTIX_GEOMETRY_FLAG_DISABLE_ANYHIT, - OPTIX_GEOMETRY_FLAG_REQUIRE_SINGLE_ANYHIT_CALL + cdef enum OptixGeometryFlags: + OPTIX_GEOMETRY_FLAG_NONE, + OPTIX_GEOMETRY_FLAG_DISABLE_ANYHIT, + OPTIX_GEOMETRY_FLAG_REQUIRE_SINGLE_ANYHIT_CALL + OPTIX_GEOMETRY_FLAG_DISABLE_TRIANGLE_FACE_CULLING + cdef struct OptixBuildInputTriangleArray: const CUdeviceptr * vertexBuffers @@ -189,39 +155,32 @@ cdef extern from "optix.h" nogil: unsigned int sbtIndexOffsetStrideInBytes unsigned int primitiveIndexOffset OptixTransformFormat transformFormat + OptixBuildInputOpacityMicromap opacityMicromap - IF _OPTIX_VERSION > 70400: # switch to new geometry flags - cdef struct OptixBuildInputSphereArray: - const CUdeviceptr* vertexBuffers - unsigned int vertexStrideInBytes - unsigned int numVertices - const CUdeviceptr *radiusBuffers - unsigned int radiusStrideInBytes - int singleRadius - const unsigned int *flags - unsigned int numSbtRecords - CUdeviceptr sbtIndexOffsetBuffer - unsigned int sbtIndexOffsetSizeInBytes - unsigned int sbtIndexOffsetStrideInBytes - unsigned int primitiveIndexOffset - - cdef struct OptixBuildInput: - OptixBuildInputType type - # union - OptixBuildInputTriangleArray triangleArray - OptixBuildInputCurveArray curveArray - OptixBuildInputSphereArray sphereArray - OptixBuildInputCustomPrimitiveArray customPrimitiveArray - OptixBuildInputInstanceArray instanceArray - ELSE: - cdef struct OptixBuildInput: - OptixBuildInputType type - # union - OptixBuildInputTriangleArray triangleArray - OptixBuildInputCurveArray curveArray - OptixBuildInputCustomPrimitiveArray customPrimitiveArray - OptixBuildInputInstanceArray instanceArray + cdef struct OptixBuildInputSphereArray: + const CUdeviceptr* vertexBuffers + unsigned int vertexStrideInBytes + unsigned int numVertices + const CUdeviceptr *radiusBuffers + unsigned int radiusStrideInBytes + int singleRadius + const unsigned int *flags + unsigned int numSbtRecords + CUdeviceptr sbtIndexOffsetBuffer + unsigned int sbtIndexOffsetSizeInBytes + unsigned int sbtIndexOffsetStrideInBytes + unsigned int primitiveIndexOffset + + + cdef struct OptixBuildInput: + OptixBuildInputType type + # union + OptixBuildInputTriangleArray triangleArray + OptixBuildInputCurveArray curveArray + OptixBuildInputSphereArray sphereArray + OptixBuildInputCustomPrimitiveArray customPrimitiveArray + OptixBuildInputInstanceArray instanceArray cdef struct OptixAccelBufferSizes: @@ -243,7 +202,7 @@ cdef extern from "optix.h" nogil: ctypedef uintptr_t OptixTraversableHandle - cdef struct OptixAccelRelocationInfo: + cdef struct OptixRelocationInfo: unsigned long long info[4] @@ -259,6 +218,8 @@ cdef extern from "optix.h" nogil: OPTIX_INSTANCE_FLAG_FLIP_TRIANGLE_FACING OPTIX_INSTANCE_FLAG_DISABLE_ANYHIT OPTIX_INSTANCE_FLAG_ENFORCE_ANYHIT + OPTIX_INSTANCE_FLAG_FORCE_OPACITY_MICROMAP_2_STATE + OPTIX_INSTANCE_FLAG_DISABLE_OPACITY_MICROMAPS cdef struct OptixInstance: @@ -269,6 +230,27 @@ cdef extern from "optix.h" nogil: unsigned int flags OptixTraversableHandle traversableHandle + + cdef struct OptixRelocateInputInstanceArray: + unsigned int numInstances + CUdeviceptr traversableHandles + + + cdef struct OptixRelocateInputOpacityMicromap: + CUdeviceptr opacityMicromapArray + + + cdef struct OptixRelocateInputTriangleArray: + unsigned int numSbtRecords + OptixRelocateInputOpacityMicromap opacityMicromap + + + cdef struct OptixRelocateInput: + OptixBuildInputType type + OptixRelocateInputInstanceArray instanceArray + OptixRelocateInputTriangleArray triangleArray + + OptixResult optixAccelComputeMemoryUsage(OptixDeviceContext context, const OptixAccelBuildOptions * accelOptions, const OptixBuildInput * buildInputs, @@ -291,6 +273,7 @@ cdef extern from "optix.h" nogil: unsigned int numEmittedProperties ) + OptixResult optixAccelCompact(OptixDeviceContext context, CUstream stream, OptixTraversableHandle inputHandle, @@ -299,33 +282,39 @@ cdef extern from "optix.h" nogil: OptixTraversableHandle * outputHandle ) + OptixResult optixAccelRelocate(OptixDeviceContext context, CUstream stream, - const OptixAccelRelocationInfo * info, - CUdeviceptr instanceTraversableHandles, - size_t numInstanceTraversableHandles, + const OptixRelocationInfo * info, + const OptixRelocateInput * relocateInputs, + size_t numRelocateInputs, CUdeviceptr targetAccel, size_t targetAccelSizeInBytes, OptixTraversableHandle * targetHandle ) - OptixResult optixAccelCheckRelocationCompatibility(OptixDeviceContext context, - const OptixAccelRelocationInfo * info, + + OptixResult optixCheckRelocationCompatibility(OptixDeviceContext context, + const OptixRelocationInfo * info, int * compatible ) + OptixResult optixAccelGetRelocationInfo(OptixDeviceContext context, OptixTraversableHandle handle, - OptixAccelRelocationInfo * info + OptixRelocationInfo * info ) + OptixResult optixConvertPointerToTraversableHandle(OptixDeviceContext onDevice, CUdeviceptr pointer, OptixTraversableType traversableType, OptixTraversableHandle * traversableHandle ) + cdef class BuildInputArray(OptixObject): + cdef OptixBuildInputType build_input_type cdef void prepare_build_input(self, OptixBuildInput* build_input) except * cdef size_t num_elements(self) @@ -338,6 +327,7 @@ cdef class BuildInputTriangleArray(BuildInputArray): cdef object _d_sbt_offset_buffer cdef object _d_pre_transform cdef vector[unsigned int] _flags + cdef BuildInputOpacityMicromap c_opacity_micromap cdef class BuildInputCustomPrimitiveArray(BuildInputArray): @@ -360,15 +350,14 @@ cdef class BuildInputCurveArray(BuildInputArray): cdef object _d_index_buffer -IF _OPTIX_VERSION > 70400: - cdef class BuildInputSphereArray(BuildInputArray): - cdef OptixBuildInputSphereArray build_input - cdef list _d_vertex_buffers - cdef vector[CUdeviceptr] _d_vertex_buffer_ptrs - cdef list _d_radius_buffers - cdef vector[CUdeviceptr] _d_radius_buffer_ptrs - cdef object _d_sbt_offset_buffer - cdef vector[unsigned int] _flags +cdef class BuildInputSphereArray(BuildInputArray): + cdef OptixBuildInputSphereArray build_input + cdef list _d_vertex_buffers + cdef vector[CUdeviceptr] _d_vertex_buffer_ptrs + cdef list _d_radius_buffers + cdef vector[CUdeviceptr] _d_radius_buffer_ptrs + cdef object _d_sbt_offset_buffer + cdef vector[unsigned int] _flags cdef class Instance(OptixObject): @@ -389,6 +378,8 @@ cdef class AccelerationStructure(OptixContextObject): cdef OptixAccelBufferSizes _buffer_sizes cdef object _instances cdef OptixTraversableHandle _handle + cdef list _relocate_deps + cdef void _init_build_inputs(self, build_inputs, vector[OptixBuildInput]& ret) cdef void _init_accel_options(self, size_t num_build_inputs, unsigned int build_flags, OptixBuildOperation operation, vector[OptixAccelBuildOptions]& ret) cdef void build(self, build_inputs, stream=*) diff --git a/optix/build.pyx b/optix/build.pyx index ed2593f..a4d9fa0 100644 --- a/optix/build.pyx +++ b/optix/build.pyx @@ -8,6 +8,8 @@ from enum import IntEnum, IntFlag from libc.string cimport memcpy, memset from libcpp.vector cimport vector from .common import round_up, ensure_iterable +import typing as typ +from .micromap cimport BuildInputOpacityMicromap, OpacityMicromapArray optix_init() @@ -17,10 +19,11 @@ __all__ = ['GeometryFlags', 'BuildInputTriangleArray', 'BuildInputCustomPrimitiveArray', 'BuildInputCurveArray', + 'BuildInputSphereArray', 'BuildInputInstanceArray', 'Instance', 'AccelerationStructure', - 'CurveEndcapFlags' + 'CurveEndcapFlags', ] @@ -31,9 +34,7 @@ class GeometryFlags(IntEnum): NONE = OPTIX_GEOMETRY_FLAG_NONE, DISABLE_ANYHIT = OPTIX_GEOMETRY_FLAG_DISABLE_ANYHIT, REQUIRE_SINGLE_ANYHIT_CALL = OPTIX_GEOMETRY_FLAG_REQUIRE_SINGLE_ANYHIT_CALL - - IF _OPTIX_VERSION_MAJOR == 7 and _OPTIX_VERSION_MINOR > 4: - DISABLE_TRIANGLE_FACE_CULLING = OPTIX_GEOMETRY_FLAG_DISABLE_TRIANGLE_FACE_CULLING + DISABLE_TRIANGLE_FACE_CULLING = OPTIX_GEOMETRY_FLAG_DISABLE_TRIANGLE_FACE_CULLING class BuildFlags(IntFlag): @@ -57,21 +58,14 @@ class PrimitiveType(IntEnum): ROUND_QUADRATIC_BSPLINE = OPTIX_PRIMITIVE_TYPE_ROUND_QUADRATIC_BSPLINE, ROUND_CUBIC_BSPLINE = OPTIX_PRIMITIVE_TYPE_ROUND_CUBIC_BSPLINE, ROUND_LINEAR = OPTIX_PRIMITIVE_TYPE_ROUND_LINEAR - - IF _OPTIX_VERSION > 70300: # switch to new instance flags - ROUND_CATMULLROM = OPTIX_PRIMITIVE_TYPE_ROUND_CATMULLROM - IF _OPTIX_VERSION > 70400: # switch to new instance flags - SPHERE = OPTIX_PRIMITIVE_TYPE_SPHERE - + ROUND_CATMULLROM = OPTIX_PRIMITIVE_TYPE_ROUND_CATMULLROM + SPHERE = OPTIX_PRIMITIVE_TYPE_SPHERE TRIANGLE = OPTIX_PRIMITIVE_TYPE_TRIANGLE class CurveEndcapFlags(IntEnum): - IF _OPTIX_VERSION > 70300: # switch to new instance flags - DEFAULT = OPTIX_CURVE_ENDCAP_DEFAULT, - ON = OPTIX_CURVE_ENDCAP_ON - ELSE: - DEFAULT = 0 # only for interface. Ignored for Optix versions below 7.4 + DEFAULT = OPTIX_CURVE_ENDCAP_DEFAULT, + ON = OPTIX_CURVE_ENDCAP_ON class InstanceFlags(IntFlag): @@ -83,18 +77,36 @@ class InstanceFlags(IntFlag): FLIP_TRIANGLE_FACING = OPTIX_INSTANCE_FLAG_FLIP_TRIANGLE_FACING, DISABLE_ANYHIT = OPTIX_INSTANCE_FLAG_DISABLE_ANYHIT, ENFORCE_ANYHIT = OPTIX_INSTANCE_FLAG_ENFORCE_ANYHIT, + FORCE_OPACITY_MICROMAP_2_STATE = OPTIX_INSTANCE_FLAG_FORCE_OPACITY_MICROMAP_2_STATE, + DISABLE_OPACITY_MICROMAPS = OPTIX_INSTANCE_FLAG_DISABLE_OPACITY_MICROMAPS + + +class BuildInputType(IntEnum): + TRIANGLES = OPTIX_BUILD_INPUT_TYPE_TRIANGLES, + CUSTOM_PRIMITIVES = OPTIX_BUILD_INPUT_TYPE_CUSTOM_PRIMITIVES, + INSTANCES = OPTIX_BUILD_INPUT_TYPE_INSTANCES, + INSTANCE_POINTERS = OPTIX_BUILD_INPUT_TYPE_INSTANCE_POINTERS, + CURVES = OPTIX_BUILD_INPUT_TYPE_CURVES, + SPHERES = OPTIX_BUILD_INPUT_TYPE_SPHERES cdef class BuildInputArray(OptixObject): """ Base class for all BuildInput Arrays. This is an internal class. """ + def __init__(self, type): + self.build_input_type = (BuildInputType(type).value) + cdef void prepare_build_input(self, OptixBuildInput* build_input) except *: pass cdef size_t num_elements(self): return 0 + @property + def type(self): + return BuildInputType(self.build_input_type) + cdef class BuildInputTriangleArray(BuildInputArray): """ @@ -127,9 +139,9 @@ cdef class BuildInputTriangleArray(BuildInputArray): flags = None, sbt_record_offset_buffer = None, pre_transform = None, - primitive_index_offset = 0 - ): - + primitive_index_offset = 0, + opacity_micromap: typ.Optional[BuildInputOpacityMicromap] = None): + super().__init__(BuildInputType.TRIANGLES) self._d_vertex_buffers = [cp.asarray(vb) for vb in ensure_iterable(vertex_buffers)] self._d_vertex_buffer_ptrs.reserve(len(self._d_vertex_buffers)) @@ -163,6 +175,7 @@ cdef class BuildInputTriangleArray(BuildInputArray): self.build_input.indexStrideInBytes = 0 self.build_input.numIndexTriplets = 0 self.build_input.indexBuffer = 0 + self._d_index_buffer = None self.build_input.numSbtRecords = num_sbt_records @@ -200,11 +213,16 @@ cdef class BuildInputTriangleArray(BuildInputArray): self.build_input.preTransform = 0 self.build_input.transformFormat = OPTIX_TRANSFORM_FORMAT_NONE + self.c_opacity_micromap = opacity_micromap + if self.c_opacity_micromap is not None: + self.build_input.opacityMicromap = self.c_opacity_micromap.build_input + + def __dealloc__(self): pass cdef void prepare_build_input(self, OptixBuildInput* build_input) except *: - build_input.type = OPTIX_BUILD_INPUT_TYPE_TRIANGLES + build_input.type = self.build_input_type build_input.triangleArray = self.build_input def _vertex_format(self, dtype, shape): @@ -238,6 +256,19 @@ cdef class BuildInputTriangleArray(BuildInputArray): cdef size_t num_elements(self): return self.build_input.numVertices + @property + def micromap(self): + return self.c_opacity_micromap + + @property + def num_sbt_records(self): + return self.build_input.numSbtRecords + + + def _repr_details(self): + return f"nvertices={self.num_elements()}, " \ + f"ntriangles={self._d_index_buffer.shape[0] if self._d_index_buffer is not None else self.num_elements() // 3}, " \ + f"n_sbt_records={self.build_input.numSbtRecords}" cdef class BuildInputCustomPrimitiveArray(BuildInputArray): """ @@ -267,6 +298,7 @@ cdef class BuildInputCustomPrimitiveArray(BuildInputArray): sbt_record_offset_buffer = None, primitive_index_offset = 0 ): + super().__init__(BuildInputType.CUSTOM_PRIMITIVES) self._d_aabb_buffers = [cp.asarray(ab, dtype=np.float32).reshape(-1, 6) for ab in aabb_buffers] self._d_aabb_buffer_ptrs.reserve(len(self._d_aabb_buffers)) @@ -317,7 +349,7 @@ cdef class BuildInputCustomPrimitiveArray(BuildInputArray): self.build_input.primitiveIndexOffset = primitive_index_offset cdef void prepare_build_input(self, OptixBuildInput * build_input) except *: - build_input.type = OPTIX_BUILD_INPUT_TYPE_CUSTOM_PRIMITIVES + build_input.type = self.build_input_type build_input.customPrimitiveArray = self.build_input cdef size_t num_elements(self): @@ -358,7 +390,7 @@ cdef class BuildInputCurveArray(BuildInputArray): flags=None, primitive_index_offset=0, endcap_flags=CurveEndcapFlags.DEFAULT): - + super().__init__(BuildInputType.CURVES) self.build_input.curveType = curve_type.value self._d_vertex_buffers = [cp.asarray(vb, np.float32) for vb in ensure_iterable(vertex_buffers)] self._d_vertex_buffer_ptrs.reserve(len(self._d_vertex_buffers)) @@ -417,119 +449,123 @@ cdef class BuildInputCurveArray(BuildInputArray): self.build_input.primitiveIndexOffset = primitive_index_offset - IF _OPTIX_VERSION > 70300: - self.build_input.endcapFlags = endcap_flags # only for Optix versions >= 7.4 + self.build_input.endcapFlags = endcap_flags cdef void prepare_build_input(self, OptixBuildInput * build_input) except *: - build_input.type = OPTIX_BUILD_INPUT_TYPE_CURVES + build_input.type = self.build_input_type build_input.curveArray = self.build_input cdef size_t num_elements(self): return self.build_input.numPrimitives -IF _OPTIX_VERSION > 70400: - cdef class BuildInputSphereArray(BuildInputArray): +cdef class BuildInputSphereArray(BuildInputArray): + """ + BuildInputArray for a sphere. This class wraps the OptixBuildInputSphereArray struct. + In Contrast to the behavior of the Optix C++ API, this Python class will automatically convert all numpy.ndarrays + to cupy.ndarrays and keep track of them. + + Parameters + ---------- + vertex_buffers: + List of vertex buffers (one for each motion step) or a single array. + All arrays will be converted to cupy.ndarrays before any further processing. + index_buffer: ndarray, optional + A single 2d array containing the indices of all triangles or None + num_sbt_records: int + The number of records in the ShaderBindingTable for this geometry + flags: GeometryFlags + Flags to use in this input for each motionstep + sbt_record_offset_buffer: ndarray, optional + Offsets into the ShaderBindingTable record for each primitive (index) or None + pre_transform: ndarray(3,4) or None + A transform to apply prior to processing + primitive_index_offset: int + The offset applied to the primitive index in device code """ - BuildInputArray for a sphere. This class wraps the OptixBuildInputSphereArray struct. - In Contrast to the behavior of the Optix C++ API, this Python class will automatically convert all numpy.ndarrays - to cupy.ndarrays and keep track of them. - - Parameters - ---------- - vertex_buffers: - List of vertex buffers (one for each motion step) or a single array. - All arrays will be converted to cupy.ndarrays before any further processing. - index_buffer: ndarray, optional - A single 2d array containing the indices of all triangles or None - num_sbt_records: int - The number of records in the ShaderBindingTable for this geometry - flags: GeometryFlags - Flags to use in this input for each motionstep - sbt_record_offset_buffer: ndarray, optional - Offsets into the ShaderBindingTable record for each primitive (index) or None - pre_transform: ndarray(3,4) or None - A transform to apply prior to processing - primitive_index_offset: int - The offset applied to the primitive index in device code - """ - def __init__(self, - vertex_buffers, - radius_buffers, - num_sbt_records = 1, - flags = None, - sbt_record_offset_buffer = None, - pre_transform = None, - primitive_index_offset = 0 - ): - - self._d_vertex_buffers = [cp.asarray(vb) for vb in ensure_iterable(vertex_buffers)] - self._d_vertex_buffer_ptrs.reserve(len(self._d_vertex_buffers)) - - self._d_radius_buffers = [cp.asarray(vb) for vb in ensure_iterable(radius_buffers)] - self._d_radius_buffer_ptrs.reserve(len(self._d_radius_buffers)) - - if len(self._d_radius_buffers) != len(self._d_vertex_buffers): - raise ValueError("Argument radius_buffers must have the same number of arrays as vertex_buffers.") - - if len(self._d_vertex_buffers) == 0: - raise ValueError("BuildInputSphereArray cannot be empty.") + def __init__(self, + vertex_buffers, + radius_buffers, + num_sbt_records = 1, + flags = None, + sbt_record_offset_buffer = None, + pre_transform = None, + primitive_index_offset = 0 + ): + super().__init__(BuildInputType.SPHERES) + self._d_vertex_buffers = [cp.asarray(vb) for vb in ensure_iterable(vertex_buffers)] + self._d_vertex_buffer_ptrs.reserve(len(self._d_vertex_buffers)) - dtype = self._d_vertex_buffers[0].dtype - shape = self._d_vertex_buffers[0].shape - strides = self._d_vertex_buffers[0].strides + self._d_radius_buffers = [cp.asarray(vb) for vb in ensure_iterable(radius_buffers)] + self._d_radius_buffer_ptrs.reserve(len(self._d_radius_buffers)) - radius_dtype = self._d_radius_buffers[0].dtype - radius_shape = self._d_radius_buffers[0].shape - strides = self._d_radius_buffers[0].strides + if len(self._d_radius_buffers) != len(self._d_vertex_buffers): + raise ValueError("Argument radius_buffers must have the same number of arrays as vertex_buffers.") - for vb, rb in zip(self._d_vertex_buffers, self._d_radius_buffers): - if vb.dtype != dtype or vb.shape != shape or vb.strides != strides: - raise ValueError("All vertex buffers must have the same size and dtype.") - self._d_vertex_buffer_ptrs.push_back(vb.data.ptr) + if len(self._d_vertex_buffers) == 0: + raise ValueError("BuildInputSphereArray cannot be empty.") - if rb.dtype != dtype or rb.shape != shape or rb.strides != strides: - raise ValueError("All radius buffers must have the same size and dtype.") - self._d_radius_buffer_ptrs.push_back(rb.data.ptr) + dtype = self._d_vertex_buffers[0].dtype + shape = self._d_vertex_buffers[0].shape + strides = self._d_vertex_buffers[0].strides - self.build_input.vertexBuffers = self._d_vertex_buffer_ptrs.const_data() - self.build_input.radiusBuffers = self._d_radius_buffer_ptrs.const_data() + radius_dtype = self._d_radius_buffers[0].dtype + radius_shape = self._d_radius_buffers[0].shape + strides = self._d_radius_buffers[0].strides - self.build_input.vertexStrideInBytes = self._d_vertex_buffers[0].strides[0] - self.build_input.radiusStrideInBytes = self._d_radius_buffers[0].strides[0] + for vb, rb in zip(self._d_vertex_buffers, self._d_radius_buffers): + if vb.dtype != dtype or vb.shape != shape or vb.strides != strides: + raise ValueError("All vertex buffers must have the same size and dtype.") + self._d_vertex_buffer_ptrs.push_back(vb.data.ptr) - self.build_input.numVertices = shape[0] - self.build_input.singleRadius = 1 if self._d_radius_buffers[0].shape[0] == 1 else 0 + if rb.dtype != dtype or rb.shape != shape or rb.strides != strides: + raise ValueError("All radius buffers must have the same size and dtype.") + self._d_radius_buffer_ptrs.push_back(rb.data.ptr) - self.build_input.numSbtRecords = num_sbt_records - self._flags.resize(num_sbt_records) + self.build_input.vertexBuffers = self._d_vertex_buffer_ptrs.const_data() + self.build_input.radiusBuffers = self._d_radius_buffer_ptrs.const_data() - if flags is None: - for i in range(num_sbt_records): - self._flags[i] = OPTIX_GEOMETRY_FLAG_NONE - else: - for i in range(num_sbt_records): - self._flags[i] = flags[i].value + self.build_input.vertexStrideInBytes = self._d_vertex_buffers[0].strides[0] + self.build_input.radiusStrideInBytes = self._d_radius_buffers[0].strides[0] - self.build_input.flags = self._flags.data() + self.build_input.numVertices = shape[0] + self.build_input.singleRadius = 1 if self._d_radius_buffers[0].shape[0] == 1 else 0 + self.build_input.numSbtRecords = num_sbt_records + self._flags.resize(num_sbt_records) - if sbt_record_offset_buffer is not None: - self._d_sbt_offset_buffer = cp.asarray(sbt_record_offset_buffer).ravel() - self.build_input.sbtIndexOffsetBuffer = self._d_sbt_offset_buffer.data.ptr - itemsize = self._d_sbt_offset_buffer.itemsize - if itemsize > 4: - raise ValueError("Only 32 bit allowed at max") - self.build_input.sbtIndexOffsetSizeInBytes = itemsize - self.build_input.sbtIndexOffsetStrideInBytes = self._d_sbt_offset_buffer.strides[0] - else: - self.build_input.sbtIndexOffsetBuffer = 0 - self.build_input.sbtIndexOffsetStrideInBytes = 0 - self.build_input.sbtIndexOffsetSizeInBytes = 0 + if flags is None: + for i in range(num_sbt_records): + self._flags[i] = OPTIX_GEOMETRY_FLAG_NONE + else: + for i in range(num_sbt_records): + self._flags[i] = flags[i].value - self.build_input.primitiveIndexOffset = primitive_index_offset + self.build_input.flags = self._flags.data() + + + if sbt_record_offset_buffer is not None: + self._d_sbt_offset_buffer = cp.asarray(sbt_record_offset_buffer).ravel() + self.build_input.sbtIndexOffsetBuffer = self._d_sbt_offset_buffer.data.ptr + itemsize = self._d_sbt_offset_buffer.itemsize + if itemsize > 4: + raise ValueError("Only 32 bit allowed at max") + self.build_input.sbtIndexOffsetSizeInBytes = itemsize + self.build_input.sbtIndexOffsetStrideInBytes = self._d_sbt_offset_buffer.strides[0] + else: + self.build_input.sbtIndexOffsetBuffer = 0 + self.build_input.sbtIndexOffsetStrideInBytes = 0 + self.build_input.sbtIndexOffsetSizeInBytes = 0 + + self.build_input.primitiveIndexOffset = primitive_index_offset + + cdef void prepare_build_input(self, OptixBuildInput * build_input) except *: + build_input.type = self.build_input_type + build_input.sphereArray = self.build_input + + cdef size_t num_elements(self): + return self.build_input.numVertices - __all__.append('BuildInputSphereArray') cdef class Instance(OptixObject): """ @@ -584,14 +620,17 @@ cdef class Instance(OptixObject): # update the handle as well self.instance.traversableHandle = self.traversable.handle - def __deepcopy__(self, memodict={}): - from copy import deepcopy + def relocate(self, + device: typ.Optional[DeviceContext] = None, + stream: typ.Optional[cp.cuda.Stream] = None): cls = self.__class__ result = cls.__new__(cls) - memodict[id(self)] = result result.instance = self.instance - result.traversable = deepcopy(self.traversable) + result.traversable = self.traversable.relocate(device=device, stream=stream) + def __deepcopy__(self, memo): + result = self.relocate() + memo[id(self)] = result return result @property @@ -617,6 +656,7 @@ cdef class BuildInputInstanceArray(BuildInputArray): A list of the Instances to use as input """ def __init__(self, instances): + super().__init__(BuildInputType.INSTANCES) instances = ensure_iterable(instances) self.instances = instances @@ -633,7 +673,7 @@ cdef class BuildInputInstanceArray(BuildInputArray): self.build_input.numInstances = len(instances) cdef void prepare_build_input(self, OptixBuildInput * build_input) except *: - build_input.type = OPTIX_BUILD_INPUT_TYPE_INSTANCES + build_input.type = self.build_input_type build_input.instanceArray = self.build_input cdef size_t num_elements(self): @@ -684,6 +724,92 @@ cdef class BuildInputInstanceArray(BuildInputArray): return cp.ndarray(shape=(3,4), dtype=np.float32, memptr=device_ptr) +cdef class RelocationDependency: + cdef OptixBuildInputType _type + + def __init__(self, type): + self._type = (BuildInputType(type).value) + + @property + def type(self): + return BuildInputType(self._type) + + cdef RelocationDependency relocate(self, device, stream): + return self + + cdef void fill_relocation_input(self, OptixRelocateInput& input): + input.type = self._type + + cdef void finalize_relocation_input(self): + pass + +cdef class RelocationInstanceDependency(RelocationDependency): + cdef object instances + cdef object d_instances + + def __init__(self, instances): + super().__init__(BuildInputType.INSTANCES) + self.instances = instances + self.d_instances = None + + cdef RelocationInstanceDependency relocate(self, device, stream): + relocated_instances = [inst.relocate(device=device, stream=stream) for inst in self.instances] + result = self.__class__(relocated_instances) + return result + + cdef void fill_relocation_input(self, OptixRelocateInput& input): + cdef vector[OptixTraversableHandle] c_instance_handles + cdef ssize_t c_instance_handles_size = 0 + cdef object d_instances + cdef size_t i + cdef CUdeviceptr d_instances_ptr = 0 + cdef vector[OptixRelocateInput] c_relocate_inputs + cdef size_t num_relocate_inputs + + input.type = self.type + input.instanceArray.numInstances = len(self.instances) + + # prepare the new instance handles for relocation by copiing them into a temporary device buffer + c_instance_handles.resize(len(self.instances)) + c_instance_handles_size = sizeof(OptixTraversableHandle) * c_instance_handles.size() + + self.d_instances = cp.cuda.alloc(c_instance_handles_size) + for i in range(c_instance_handles.size()): + c_instance_handles[i] = self.instances[i].traversable.handle + + d_instances_ptr = self.d_instances.ptr + cp.cuda.runtime.memcpy(d_instances_ptr, c_instance_handles.data(), c_instance_handles_size, + cp.cuda.runtime.memcpyHostToDevice) + c_relocate_inputs[0].instanceArray.traversableHandles = d_instances_ptr + input.instanceArray.traversableHandles = d_instances_ptr + + cdef void finalize_relocation_input(self): + self.d_instances = None # remove the temporary cuda buffer again + + +cdef class RelocationTriangleDependency(RelocationDependency): + cdef unsigned int num_sbt_records + cdef OpacityMicromapArray micromap + + def __init__(self, num_sbt_records, micromap=None): + super().__init__(BuildInputType.INSTANCES) + self.num_sbt_records = num_sbt_records + self.micromap = micromap + + cdef RelocationInstanceDependency relocate(self, device, stream): + if self.micromap is not None: + relocated_micromap = self.micromap.relocate(device=device, stream=stream) + else: + relocated_micromap = None + result = self.__class__(self.num_sbt_records, relocated_micromap) + return result + + cdef void fill_relocation_input(self, OptixRelocateInput& input): + input.type = self.type + input.triangleArray.numSbtRecords = self.num_sbt_records + input.triangleArray.opacityMicromap.opacityMicromapArray = self.micromap.d_micromap_array_buffer.ptr + + cdef class AccelerationStructure(OptixContextObject): """ Class representing a Geometry Acceleration Structure (GAS) or Instance Acceleration Structure (IAS). This wraps the OptixTraversableHandle internally and manages the ressources like @@ -706,6 +832,10 @@ cdef class AccelerationStructure(OptixContextObject): Allow for random access of the vertices in triangle geometry random_instance_access: bool Allow for random access of the instances if an IAS is built + allow_opacity_micromap_update: bool + Allows to update the opacity micromaps in this structure + allow_disable_opacity_micromaps: bool + Allows to disable the opacity micromaps for instances in this structure stream: cupy.cuda.Stream, optional Cuda stream to use. If None the default stream is used """ @@ -717,6 +847,8 @@ cdef class AccelerationStructure(OptixContextObject): prefer_fast_build=False, random_vertex_access=False, random_instance_access=False, + allow_opacity_micromap_update=False, + allow_disable_opacity_micromaps=False, stream=None): super().__init__(context) @@ -735,9 +867,15 @@ cdef class AccelerationStructure(OptixContextObject): self._build_flags |= OPTIX_BUILD_FLAG_ALLOW_RANDOM_VERTEX_ACCESS if random_instance_access: self._build_flags |= OPTIX_BUILD_FLAG_ALLOW_RANDOM_INSTANCE_ACCESS + if allow_opacity_micromap_update: + self._build_flags |= OPTIX_BUILD_FLAG_ALLOW_OPACITY_MICROMAP_UPDATE + if allow_disable_opacity_micromaps: + self._build_flags |= OPTIX_BUILD_FLAG_ALLOW_DISABLE_OPACITY_MICROMAPS + self._gas_buffer = None self._instances = None + self._relocate_deps = [] build_inputs = ensure_iterable(build_inputs) self.build(build_inputs, stream=stream) @@ -778,14 +916,23 @@ cdef class AccelerationStructure(OptixContextObject): cdef void build(self, build_inputs, stream=None): # build a single vector from all the build inputs cdef size_t inputs_size = len(build_inputs) - cdef vector[OptixBuildInput] inputs #= vector[OptixBuildInput](inputs_size) - - self._init_build_inputs(build_inputs, inputs) + cdef vector[OptixBuildInput] c_inputs #= vector[OptixBuildInput](inputs_size) + cdef size_t i - if isinstance(build_inputs[0], BuildInputInstanceArray): - if inputs_size > 1: - raise ValueError("Only a single build input allowed for instance builds") - self._instances = (build_inputs[0]).instances # keep the instances so the buffers do not get deleted + self._init_build_inputs(build_inputs, c_inputs) + + for build_input in build_inputs: + if isinstance(build_input, BuildInputInstanceArray): + relocation_dep = RelocationInstanceDependency((build_input).instances) + if inputs_size > 1: + raise ValueError("Only a single build input allowed for instance builds") + elif isinstance(build_input, BuildInputTriangleArray): + micromap = build_input.micromap + micromap_array = micromap.micromap_array if micromap is not None else None + relocation_dep = RelocationTriangleDependency(build_input.num_sbt_records, micromap=micromap_array) + else: + relocation_dep = RelocationDependency(build_input.type) + self._relocate_deps.append(relocation_dep) cdef vector[OptixAccelBuildOptions] accel_options# = vector[OptixAccelBuildOptions](inputs_size) self._init_accel_options(inputs_size, self._build_flags, OPTIX_BUILD_OPERATION_BUILD, accel_options) @@ -798,7 +945,7 @@ cdef class AccelerationStructure(OptixContextObject): optix_check_return(optixAccelComputeMemoryUsage(self.context.c_context, accel_options.data(), - inputs.data(), + c_inputs.data(), inputs_size, &self._buffer_sizes)) @@ -827,7 +974,7 @@ cdef class AccelerationStructure(OptixContextObject): optix_check_return(optixAccelBuild(self.context.c_context, c_stream, accel_options.data(), - inputs.data(), + c_inputs.data(), inputs_size, tmp_gas_buffer_ptr, self._buffer_sizes.tempSizeInBytes, @@ -871,6 +1018,8 @@ cdef class AccelerationStructure(OptixContextObject): cdef size_t inputs_size = len(build_inputs) + if inputs_size != len(self._relocate_deps): + raise ValueError("Number of build inputs given to update() must be the same as the one used to build this GAS") cdef vector[OptixBuildInput] inputs #= vector[OptixBuildInput](inputs_size) self._init_build_inputs(build_inputs, inputs) @@ -903,30 +1052,50 @@ cdef class AccelerationStructure(OptixContextObject): NULL, 0)) - def __deepcopy__(self, memodict={}): + def __deepcopy__(self, memo): + """ + Perform a deep copy of the AccelerationStructure by using the standard python copy.deepcopy function. + """ + # relocate on the same device to perform a regular deep copy + result = self.relocate(device=None) + memo[id(self)] = result + return result + + + def relocate(self, + device: typ.Optional[DeviceContext] = None, + stream: typ.Optional[cp.cuda.Stream] = None): """ - Perform a deep copy of the AccelerationStructure by using the standard python copy.deepcopy function. This method - will also handle any necessary relocation tasks required by optiX on a copy. + Relocate this acceleration structure into another copy. Usually this is equivalent to a deep copy. + Additionally, the accleration structure can be copied to a different defice by specifying the device + parameter with a different DeviceContext. Parameters ---------- - memodict - + device: + An optional DeviceContext. The resulting copy of the acceleration structure will be copied + to this device. If None, the acceleration structure's current device is used. + stream: + The stream to use for the relocation. If None, the default stream (0) is used. Returns ------- copy: AccelerationStructure - The copy of the AccelerationStructure + The copy of the AccelerationStructure on the new device """ from copy import deepcopy # relocate the optix structure - cdef OptixAccelRelocationInfo gas_info - memset(&gas_info, 0, sizeof(OptixAccelRelocationInfo)) # init struct to 0 + cdef OptixRelocationInfo gas_info + memset(&gas_info, 0, sizeof(OptixRelocationInfo)) # init struct to 0 optix_check_return(optixAccelGetRelocationInfo(self.context.c_context, self._handle, &gas_info)) + if device is None: + device = self.context + + # check if the new device is compatible with this acceleration structure cdef int compatible = 0 - optix_check_return(optixAccelCheckRelocationCompatibility(self.context.c_context, + optix_check_return(optixCheckRelocationCompatibility((device).c_context, &gas_info, &compatible)) if compatible != 1: @@ -936,47 +1105,53 @@ cdef class AccelerationStructure(OptixContextObject): cls = self.__class__ cdef AccelerationStructure result = cls.__new__(cls) - memodict[id(self)] = result - - result.context = self.context + result.context = device result._build_flags = self._build_flags result._buffer_sizes = self._buffer_sizes - result._instances = deepcopy(self._instances) # copy all instances and their AccelerationStructures first - + + #if self._instances is not None: + # result._instances = [inst.relocate(device=device, stream=stream) for inst in self._instances] # copy all instances and their AccelerationStructures first + buffer_size = round_up(self._buffer_sizes.outputSizeInBytes, 8) + 8 result._gas_buffer = cp.cuda.alloc(buffer_size) cp.cuda.runtime.memcpy(result._gas_buffer.ptr, self._gas_buffer.ptr, buffer_size, cp.cuda.runtime.memcpyDeviceToDevice) - cdef vector[OptixTraversableHandle] c_instance_handles - cdef ssize_t c_instance_handles_size = 0 - cdef object d_instances - cdef size_t i - cdef CUdeviceptr d_instances_ptr = 0 - - if result._instances is not None: - # prepare the new instance handles for relocation - c_instance_handles.resize(len(result._instances)) - c_instance_handles_size = sizeof(OptixTraversableHandle) * c_instance_handles.size() - d_instances = cp.cuda.alloc(c_instance_handles_size) - for i in range(c_instance_handles.size()): - c_instance_handles[i] = result._instances[i].traversable.handle - d_instances_ptr = d_instances.ptr - cp.cuda.runtime.memcpy(d_instances_ptr, c_instance_handles.data(), c_instance_handles_size, cp.cuda.runtime.memcpyHostToDevice) + #cdef vector[OptixTraversableHandle] c_instance_handles + #cdef ssize_t c_instance_handles_size = 0 + #cdef object d_instances + #cdef size_t i + #cdef CUdeviceptr d_instances_ptr = 0 + cdef vector[OptixRelocateInput] c_relocate_inputs + #cdef size_t num_relocate_inputs + c_relocate_inputs.resize(len(self._relocate_deps)) + + # prepare to relocate the dependencies (micromaps and instances) + result._relocate_deps = [] + for i, dep in enumerate(self._relocate_deps): + relocated_dep = dep.relocate(device, stream) + result._relocate_deps.append(relocated_dep) + relocated_dep.fill_relocation_input(c_relocate_inputs[i]) result._handle = 0 - cdef uintptr_t c_stream = 0 + + if stream is not None: + c_stream = stream.ptr + cdef OptixTraversableHandle c_handle = 0 optix_check_return(optixAccelRelocate(result.context.c_context, c_stream, &gas_info, - d_instances_ptr, - c_instance_handles_size, + &c_relocate_inputs[0], + c_relocate_inputs.size(), result._gas_buffer, self._buffer_sizes.outputSizeInBytes, &c_handle)) result._handle = c_handle + for dep in result._relocate_deps: + dep.finalize_relocation_input() + return result @property diff --git a/optix/context.pyx b/optix/context.pyx index 0049179..333baee 100644 --- a/optix/context.pyx +++ b/optix/context.pyx @@ -4,9 +4,9 @@ from .common cimport optix_check_return, optix_init from libc.stdint cimport uintptr_t, int32_t import cupy as cp -optix_init() +__all__ = ['optix_version', 'DeviceContext'] -OPTIX_VERSION = _OPTIX_VERSION +optix_init() def optix_version(): return _OPTIX_VERSION_MAJOR, _OPTIX_VERSION_MINOR, _OPTIX_VERSION_MICRO diff --git a/optix/denoiser.pxd b/optix/denoiser.pxd index 7e4082f..9c08ae7 100644 --- a/optix/denoiser.pxd +++ b/optix/denoiser.pxd @@ -6,68 +6,39 @@ from libc.stdint cimport uintptr_t from libcpp cimport bool cdef extern from "optix_includes.h" nogil: - IF _OPTIX_VERSION > 70400: - cdef enum OptixDenoiserModelKind: - OPTIX_DENOISER_MODEL_KIND_LDR - OPTIX_DENOISER_MODEL_KIND_HDR - OPTIX_DENOISER_MODEL_KIND_AOV - OPTIX_DENOISER_MODEL_KIND_TEMPORAL - OPTIX_DENOISER_MODEL_KIND_TEMPORAL_AOV - OPTIX_DENOISER_MODEL_KIND_UPSCALE2X - OPTIX_DENOISER_MODEL_KIND_TEMPORAL_UPSCALE2X - - ELIF _OPTIX_VERSION > 70300: - cdef enum OptixDenoiserModelKind: - OPTIX_DENOISER_MODEL_KIND_LDR - OPTIX_DENOISER_MODEL_KIND_HDR - OPTIX_DENOISER_MODEL_KIND_AOV - OPTIX_DENOISER_MODEL_KIND_TEMPORAL - OPTIX_DENOISER_MODEL_KIND_TEMPORAL_AOV - ELSE: - cdef enum OptixDenoiserModelKind: - OPTIX_DENOISER_MODEL_KIND_LDR - OPTIX_DENOISER_MODEL_KIND_HDR - OPTIX_DENOISER_MODEL_KIND_AOV - OPTIX_DENOISER_MODEL_KIND_TEMPORAL - + cdef enum OptixDenoiserModelKind: + OPTIX_DENOISER_MODEL_KIND_LDR + OPTIX_DENOISER_MODEL_KIND_HDR + OPTIX_DENOISER_MODEL_KIND_AOV + OPTIX_DENOISER_MODEL_KIND_TEMPORAL + OPTIX_DENOISER_MODEL_KIND_TEMPORAL_AOV + OPTIX_DENOISER_MODEL_KIND_UPSCALE2X + OPTIX_DENOISER_MODEL_KIND_TEMPORAL_UPSCALE2X cdef struct OptixDenoiserOptions: unsigned int guideAlbedo unsigned int guideNormal - IF _OPTIX_VERSION > 70400: - cdef struct OptixDenoiserSizes: - size_t stateSizeInBytes - size_t withOverlapScratchSizeInBytes - size_t withoutOverlapScratchSizeInBytes - unsigned int overlapWindowSizeInPixels - size_t computeAverageColorSizeInBytes - size_t computeIntensitySizeInBytes - size_t internalGuideLayerPixelSizeInBytes - - cdef enum OptixDenoiserAlphaMode: - OPTIX_DENOISER_ALPHA_MODE_COPY, - OPTIX_DENOISER_ALPHA_MODE_ALPHA_AS_AOV, - OPTIX_DENOISER_ALPHA_MODE_FULL_DENOISE_PASS - - cdef struct OptixDenoiserParams: - OptixDenoiserAlphaMode denoiseAlpha - CUdeviceptr hdrIntensity - float blendFactor - CUdeviceptr hdrAverageColor - unsigned int temporalModeUsePreviousLayers - ELSE: - cdef struct OptixDenoiserSizes: - size_t stateSizeInBytes - size_t withOverlapScratchSizeInBytes - size_t withoutOverlapScratchSizeInBytes - unsigned int overlapWindowSizeInPixels - - cdef struct OptixDenoiserParams: - unsigned int denoiseAlpha - CUdeviceptr hdrIntensity - float blendFactor - CUdeviceptr hdrAverageColor + cdef struct OptixDenoiserSizes: + size_t stateSizeInBytes + size_t withOverlapScratchSizeInBytes + size_t withoutOverlapScratchSizeInBytes + unsigned int overlapWindowSizeInPixels + size_t computeAverageColorSizeInBytes + size_t computeIntensitySizeInBytes + size_t internalGuideLayerPixelSizeInBytes + + cdef enum OptixDenoiserAlphaMode: + OPTIX_DENOISER_ALPHA_MODE_COPY, + OPTIX_DENOISER_ALPHA_MODE_ALPHA_AS_AOV, + OPTIX_DENOISER_ALPHA_MODE_FULL_DENOISE_PASS + + cdef struct OptixDenoiserParams: + OptixDenoiserAlphaMode denoiseAlpha + CUdeviceptr hdrIntensity + float blendFactor + CUdeviceptr hdrAverageColor + unsigned int temporalModeUsePreviousLayers cdef enum OptixPixelFormat: @@ -93,18 +64,12 @@ cdef extern from "optix_includes.h" nogil: OptixImage2D previousOutput OptixImage2D output - IF _OPTIX_VERSION > 70400: - cdef struct OptixDenoiserGuideLayer: - OptixImage2D albedo - OptixImage2D normal - OptixImage2D flow - OptixImage2D previousOutputInternalGuideLayer - OptixImage2D outputInternalGuideLayer - ELSE: - cdef struct OptixDenoiserGuideLayer: - OptixImage2D albedo - OptixImage2D normal - OptixImage2D flow + cdef struct OptixDenoiserGuideLayer: + OptixImage2D albedo + OptixImage2D normal + OptixImage2D flow + OptixImage2D previousOutputInternalGuideLayer + OptixImage2D outputInternalGuideLayer ctypedef struct OptixDenoiser: pass diff --git a/optix/denoiser.pyx b/optix/denoiser.pyx index b3aa5c6..529dca5 100644 --- a/optix/denoiser.pyx +++ b/optix/denoiser.pyx @@ -8,20 +8,20 @@ import numpy as np from enum import IntEnum from libcpp.vector cimport vector from .common import ensure_iterable +from typing import Optional optix_init() __all__ = ['DenoiserModelKind', - 'Denoiser' + 'Denoiser', + 'DenoiserAlphaMode' ] -IF _OPTIX_VERSION > 70400: - class DenoiserAlphaMode(enum.IntEnum): - COPY = OPTIX_DENOISER_ALPHA_MODE_COPY - ALPHA_AS_AOV = OPTIX_DENOISER_ALPHA_MODE_ALPHA_AS_AOV - FULL_DENOISE_PASS = OPTIX_DENOISER_ALPHA_MODE_FULL_DENOISE_PASS +class DenoiserAlphaMode(enum.IntEnum): + COPY = OPTIX_DENOISER_ALPHA_MODE_COPY + ALPHA_AS_AOV = OPTIX_DENOISER_ALPHA_MODE_ALPHA_AS_AOV + FULL_DENOISE_PASS = OPTIX_DENOISER_ALPHA_MODE_FULL_DENOISE_PASS - __all__.append('DenoiserAlphaMode') class DenoiserModelKind(IntEnum): """ @@ -31,20 +31,14 @@ class DenoiserModelKind(IntEnum): HDR = OPTIX_DENOISER_MODEL_KIND_HDR AOV = OPTIX_DENOISER_MODEL_KIND_AOV TEMPORAL = OPTIX_DENOISER_MODEL_KIND_TEMPORAL - - IF _OPTIX_VERSION > 70300: - TEMPORAL_AOV = OPTIX_DENOISER_MODEL_KIND_TEMPORAL_AOV - IF _OPTIX_VERSION > 70400: - UPSCALE2X = OPTIX_DENOISER_MODEL_KIND_UPSCALE2X - TEMPORAL_UPSCALE2X = OPTIX_DENOISER_MODEL_KIND_TEMPORAL_UPSCALE2X + TEMPORAL_AOV = OPTIX_DENOISER_MODEL_KIND_TEMPORAL_AOV + UPSCALE2X = OPTIX_DENOISER_MODEL_KIND_UPSCALE2X + TEMPORAL_UPSCALE2X = OPTIX_DENOISER_MODEL_KIND_TEMPORAL_UPSCALE2X def temporal_mode(self): - IF _OPTIX_VERSION > 70400: - return self == self.TEMPORAL or self==self.TEMPORAL_AOV or self == self.TEMPORAL_UPSCALE2X - ELIF _OPTIX_VERSION > 70300: - return self == self.TEMPORAL or self == self.TEMPORAL_AOV - ELSE: - return self == self.TEMPORAL + return self == self.TEMPORAL or \ + self==self.TEMPORAL_AOV or \ + self == self.TEMPORAL_UPSCALE2X class PixelFormat(IntEnum): @@ -55,8 +49,6 @@ class PixelFormat(IntEnum): FLOAT3 = OPTIX_PIXEL_FORMAT_FLOAT3 FLOAT4 = OPTIX_PIXEL_FORMAT_FLOAT4 - - @classmethod def from_dtype_size(cls, dtype, size): try: @@ -205,12 +197,8 @@ cdef class Denoiser(OptixContextObject): self._state_size = return_sizes.stateSizeInBytes self._d_state = cp.cuda.alloc(return_sizes.stateSizeInBytes) - IF _OPTIX_VERSION > 70400: - self._intensity_scratch_size = return_sizes.computeIntensitySizeInBytes - self._average_color_scratch_size = return_sizes.computeAverageColorSizeInBytes - ELSE: - self._intensity_scratch_size = self._scratch_size - self._average_color_scratch_size = self._scratch_size + self._intensity_scratch_size = return_sizes.computeIntensitySizeInBytes + self._average_color_scratch_size = return_sizes.computeAverageColorSizeInBytes cdef uintptr_t c_stream = 0 @@ -227,8 +215,6 @@ cdef class Denoiser(OptixContextObject): self._d_scratch.ptr, self._scratch_size)) - - @classmethod def create_with_user_model(cls, DeviceContext context, unsigned char[::1] user_model not None): obj = cls(context, model_kind=None) @@ -245,7 +231,7 @@ cdef class Denoiser(OptixContextObject): normals=None, flow=None, outputs=None, - denoise_alpha=None, + denoise_alpha: DenoiserAlphaMode = DenoiserAlphaMode.COPY, blend_factor=0.0, stream=None, temporal_use_previous_layer=False): @@ -313,16 +299,8 @@ cdef class Denoiser(OptixContextObject): params.hdrAverageColor = self._d_avg_color.ptr if self._d_avg_color is not None else 0 params.blendFactor = blend_factor - IF _OPTIX_VERSION > 70400: - params.temporalModeUsePreviousLayers = 1 if temporal_use_previous_layer and temporal_mode else 0 - if denoise_alpha is None: - denoise_alpha = DenoiserAlphaMode.COPY - - assert isinstance(denoise_alpha, DenoiserAlphaMode), "Optix >7.5 changed this from a boolean variable into an enum" - params.denoiseAlpha = denoise_alpha.value - ELSE: - params.denoiseAlpha = 1 if denoise_alpha else 0 - + params.temporalModeUsePreviousLayers = 1 if temporal_use_previous_layer and temporal_mode else 0 + params.denoiseAlpha = denoise_alpha.value cdef uintptr_t c_stream = 0 diff --git a/optix/micromap.pxd b/optix/micromap.pxd new file mode 100644 index 0000000..157e485 --- /dev/null +++ b/optix/micromap.pxd @@ -0,0 +1,169 @@ +from libc.stdint cimport uint32_t +from libcpp.vector cimport vector +from libcpp.pair cimport pair +from .base cimport OptixObject +from .common cimport OptixResult, CUstream, CUdeviceptr +from .context cimport OptixDeviceContext, OptixContextObject + + +cdef extern from "optix_micromap.h" nogil: + cdef packed struct float2: + float x + float y + + void optixMicromapIndexToBaseBarycentrics(uint32_t microTriangleIndex, + uint32_t subdivisionLevel, + float2& baseBarycentrics0, + float2& baseBarycentrics1, + float2& baseBarycentrics2) + + +cdef extern from "optix.h" nogil: + cdef enum OptixOpacityMicromapArrayIndexingMode: + OPTIX_OPACITY_MICROMAP_ARRAY_INDEXING_MODE_NONE, + OPTIX_OPACITY_MICROMAP_ARRAY_INDEXING_MODE_LINEAR, + OPTIX_OPACITY_MICROMAP_ARRAY_INDEXING_MODE_INDEXED + + + cdef enum OptixOpacityMicromapFlags: + OPTIX_OPACITY_MICROMAP_FLAG_NONE, + OPTIX_OPACITY_MICROMAP_FLAG_PREFER_FAST_TRACE, + OPTIX_OPACITY_MICROMAP_FLAG_PREFER_FAST_BUILD + + + cdef enum OptixOpacityMicromapFormat: + OPTIX_OPACITY_MICROMAP_FORMAT_NONE, + OPTIX_OPACITY_MICROMAP_FORMAT_2_STATE # 0: Transparent, 1: Opaque + OPTIX_OPACITY_MICROMAP_FORMAT_4_STATE # 0: Transparent, 1: Opaque, 2: Unknown-Transparent, 3: Unknown-Opaque + + + cdef struct OptixOpacityMicromapHistogramEntry: + unsigned int count + unsigned int subdivisionLevel + OptixOpacityMicromapFormat format + + + cdef struct OptixOpacityMicromapUsageCount: + unsigned int count + unsigned int subdivisionLevel + OptixOpacityMicromapFormat format + + + cdef struct OptixOpacityMicromapDesc: + unsigned int byteOffset + unsigned short subdivisionLevel + unsigned short format + + + # get the defines for the micromap state as constant variables to access them from cython + cdef const unsigned char OPTIX_OPACITY_MICROMAP_STATE_TRANSPARENT_DEFINE "OPTIX_OPACITY_MICROMAP_STATE_TRANSPARENT" # = 0 + cdef const unsigned char OPTIX_OPACITY_MICROMAP_STATE_OPAQUE_DEFINE "OPTIX_OPACITY_MICROMAP_STATE_OPAQUE" # = 1 + cdef const unsigned char OPTIX_OPACITY_MICROMAP_STATE_UNKNOWN_TRANSPARENT_DEFINE "OPTIX_OPACITY_MICROMAP_STATE_UNKNOWN_TRANSPARENT" # = 2 + cdef const unsigned char OPTIX_OPACITY_MICROMAP_STATE_UNKNOWN_OPAQUE_DEFINE "OPTIX_OPACITY_MICROMAP_STATE_UNKNOWN_OPAQUE" # = 3 + + cdef const unsigned long long OPTIX_OPACITY_MICROMAP_ARRAY_BUFFER_BYTE_ALIGNMENT "OPTIX_OPACITY_MICROMAP_ARRAY_BUFFER_BYTE_ALIGNMENT" + + cdef const int OPTIX_OPACITY_MICROMAP_PREDEFINED_INDEX_FULLY_TRANSPARENT_DEFINE "OPTIX_OPACITY_MICROMAP_PREDEFINED_INDEX_FULLY_TRANSPARENT" # = 3 + cdef const int OPTIX_OPACITY_MICROMAP_PREDEFINED_INDEX_FULLY_OPAQUE_DEFINE "OPTIX_OPACITY_MICROMAP_PREDEFINED_INDEX_FULLY_OPAQUE" # = 3 + cdef const int OPTIX_OPACITY_MICROMAP_PREDEFINED_INDEX_FULLY_UNKNOWN_TRANSPARENT_DEFINE "OPTIX_OPACITY_MICROMAP_PREDEFINED_INDEX_FULLY_UNKNOWN_TRANSPARENT" # = 3 + cdef const int OPTIX_OPACITY_MICROMAP_PREDEFINED_INDEX_FULLY_UNKNOWN_OPAQUE_DEFINE "OPTIX_OPACITY_MICROMAP_PREDEFINED_INDEX_FULLY_UNKNOWN_OPAQUE" # = 3 + + + + cdef struct OptixOpacityMicromapArrayBuildInput: + OptixOpacityMicromapFlags flags + CUdeviceptr inputBuffer + CUdeviceptr perMicromapDescBuffer + unsigned int perMicromapDescStrideInBytes + unsigned int numMicromapHistogramEntries + const OptixOpacityMicromapHistogramEntry * micromapHistogramEntries + + + cdef struct OptixBuildInputOpacityMicromap: + OptixOpacityMicromapArrayIndexingMode indexingMode + CUdeviceptr opacityMicromapArray + CUdeviceptr indexBuffer + unsigned int indexSizeInBytes + unsigned int indexStrideInBytes + unsigned int indexOffset + unsigned int numMicromapUsageCounts + const OptixOpacityMicromapUsageCount * micromapUsageCounts + + + cdef struct OptixMicromapBufferSizes: + size_t outputSizeInBytes + size_t tempSizeInBytes + + + cdef struct OptixMicromapBuffers: + CUdeviceptr output + size_t outputSizeInBytes + CUdeviceptr temp + size_t tempSizeInBytes + + + cdef struct OptixRelocateInputOpacityMicromap: + CUdeviceptr opacityMicromapArray + + + cdef struct OptixRelocationInfo: + unsigned long long info[4] + + + OptixResult optixOpacityMicromapArrayBuild(OptixDeviceContext context, + CUstream stream, + const OptixOpacityMicromapArrayBuildInput * buildInput, + const OptixMicromapBuffers * buffers) + + + OptixResult optixOpacityMicromapArrayComputeMemoryUsage(OptixDeviceContext context, + const OptixOpacityMicromapArrayBuildInput * buildInput, + OptixMicromapBufferSizes * bufferSizes) + + + OptixResult optixOpacityMicromapArrayGetRelocationInfo(OptixDeviceContext context, + CUdeviceptr opacityMicromapArray, + OptixRelocationInfo * info) + + + OptixResult optixOpacityMicromapArrayRelocate(OptixDeviceContext context, + CUstream stream, + const OptixRelocationInfo * info, + CUdeviceptr targetOpacityMicromapArray, + size_t targetOpacityMicromapArraySizeInBytes) + + + OptixResult optixCheckRelocationCompatibility(OptixDeviceContext context, + const OptixRelocationInfo * info, + int * compatible + ) +# cdef extern from "" namespace "std" nogil: +# cdef cppclass std_hash "hash"[T]: +# function() except + +# bint operator()(const T&) const + + + +cdef class OpacityMicromapInput(OptixObject): + cdef object buffer + cdef OptixOpacityMicromapFormat c_format + cdef unsigned int c_subdivision_level + + +cdef class OpacityMicromapArray(OptixContextObject): + cdef object d_micromap_array_buffer + cdef OptixOpacityMicromapFlags _build_flags + cdef size_t _buffer_size + cdef unsigned int c_num_micromaps + cdef tuple _micromap_types + + cdef void build(self, inputs, stream=*) + + +cdef class BuildInputOpacityMicromap(OptixObject): + cdef OptixBuildInputOpacityMicromap build_input + cdef OpacityMicromapArray c_micromap_array + cdef object _d_index_buffer + cdef object _usage_counts + cdef vector[OptixOpacityMicromapUsageCount] c_usage_counts + diff --git a/optix/micromap.pyx b/optix/micromap.pyx new file mode 100644 index 0000000..fa237cd --- /dev/null +++ b/optix/micromap.pyx @@ -0,0 +1,541 @@ +# distutils: language = c++ + +import numpy as np +cimport numpy as np +np.import_array() + +import cupy as cp +from .common cimport optix_check_return, optix_init + +cimport cython +from cython.operator import dereference +from libc.stdint cimport uint8_t, uint16_t, uint32_t, uintptr_t +from libcpp cimport bool +from libcpp.vector cimport vector +from libc.string cimport memset +from collections import defaultdict, namedtuple +from collections.abc import Mapping, Sequence +from enum import IntEnum, IntFlag +import typing as typ +from .common import ensure_iterable +from .context cimport DeviceContext + +optix_init() + +__all__ = ['micromap_indices_to_base_barycentrics', + 'OpacityMicromapFormat', + 'OpacityMicromapState', + 'OpacityMicromapInput', + 'OpacityMicromapArray', + 'OpacityMicromapArrayIndexingMode', + 'BuildInputOpacityMicromap'] + + +cdef bool valid_subdivision_level(uint8_t[:, :] opacity): + return (np.log2(opacity.shape[1]) / 2).is_integer() + +cdef bool is_baked(uint8_t[:, :] opacity): + return opacity[0, 0] > 3 + + +@cython.boundscheck(False) # Deactivate bounds checking +@cython.wraparound(False) # Deactivate negative indexing. +def micromap_indices_to_base_barycentrics(uint32_t[:] indices, uint32_t subdivision_level = 0): + """ + Converts micromap triangle indices to three base-triangle barycentric coordinates of the micro triangle vertices. + The base-triangle is the triangle that the micromap is applied to. + + Parameters + ---------- + indices: Indices of the micro triangles within a micromap. + subdivision_level: Subdivision level of the micromap. + + Returns + ------- + base_barycentrics_0: Barycentric coordinates in the space of the base triangle of vertex 0 of the micro triangle. + base_barycentrics_1: Barycentric coordinates in the space of the base triangle of vertex 1 of the micro triangle. + base_barycentrics_2: Barycentric coordinates in the space of the base triangle of vertex 2 of the micro triangle. + """ + cdef Py_ssize_t num_indices = indices.shape[0] + + barycentrics_0 = np.empty((num_indices, 2), dtype=np.float32) + barycentrics_1 = np.empty((num_indices, 2), dtype=np.float32) + barycentrics_2 = np.empty((num_indices, 2), dtype=np.float32) + + cdef float[:, ::1] barycentrics_0_view = barycentrics_0 + cdef float[:, ::1] barycentrics_1_view = barycentrics_1 + cdef float[:, ::1] barycentrics_2_view = barycentrics_2 + + cdef unsigned int i + with nogil: + for i in range(num_indices): + optixMicromapIndexToBaseBarycentrics(indices[i], + subdivision_level, + dereference(&barycentrics_0_view[i, 0]), + dereference(&barycentrics_1_view[i, 0]), + dereference(&barycentrics_2_view[i, 0])) + + return barycentrics_0, barycentrics_1, barycentrics_2 + + +@cython.boundscheck(False) # Deactivate bounds checking +@cython.wraparound(False) # Deactivate negative indexing. +@cython.cdivision(True) +def bake_opacity_micromap(uint8_t[:, :] opacity, format = None): + cdef Py_ssize_t num_tris = opacity.shape[0] + cdef Py_ssize_t num_micro_tris = opacity.shape[1] + + if not valid_subdivision_level(opacity): + raise ValueError(f"Shape of input ({opacity.shape[1]}) does " + f"not correspond to a valid subdivision level") + + cdef uint8_t bits_per_state + + if format is None: + if np.any(np.ravel(opacity) > 1): + bits_per_state = 2 + else: + bits_per_state = 1 + else: + bits_per_state = OpacityMicromapFormat(format).value + + # create the array to bake the opacities into + opacity_baked = np.zeros((opacity.shape[0], opacity.shape[1] // 16 * bits_per_state), dtype=np.uint16) + cdef unsigned int bake_stride = 16 // bits_per_state + + cdef uint16_t[:, :] opacity_baked_view = opacity_baked + cdef unsigned int baked_index + cdef unsigned int i, j + with nogil: + for i in range(num_tris): + for j in range(0, num_micro_tris): + baked_index = j // bake_stride + opacity_baked_view[i, baked_index] |= opacity[i, j] << ((j%bake_stride) * bits_per_state) + + return opacity_baked, OpacityMicromapFormat(bits_per_state) + + + + +class OpacityMicromapArrayIndexingMode(IntEnum): + NONE = OPTIX_OPACITY_MICROMAP_ARRAY_INDEXING_MODE_NONE, + LINEAR = OPTIX_OPACITY_MICROMAP_ARRAY_INDEXING_MODE_LINEAR, + INDEXED = OPTIX_OPACITY_MICROMAP_ARRAY_INDEXING_MODE_INDEXED + + +class OpacityMicromapFormat(IntEnum): + NONE = OPTIX_OPACITY_MICROMAP_FORMAT_NONE, # invalid format + TWO_STATE = OPTIX_OPACITY_MICROMAP_FORMAT_2_STATE # 0: Transparent, 1: Opaque + FOUR_STATE = OPTIX_OPACITY_MICROMAP_FORMAT_4_STATE # 0: Transparent, 1: Opaque, 2: Unknown-Transparent, 3: Unknown-Opaque + + +class OpacityMicromapState(IntEnum): + """ + This enum wraps the OPTIX_OPACITY_MICROMAP_STATE_* defines from optix + """ + TRANSPARENT = OPTIX_OPACITY_MICROMAP_STATE_TRANSPARENT_DEFINE + OPAQUE = OPTIX_OPACITY_MICROMAP_STATE_OPAQUE_DEFINE + UNKNOWN_TRANSPARENT = OPTIX_OPACITY_MICROMAP_STATE_UNKNOWN_TRANSPARENT_DEFINE + UNKNOWN_OPAQUE = OPTIX_OPACITY_MICROMAP_STATE_UNKNOWN_OPAQUE_DEFINE + + def bits_per_state(self): + """ + Returns the number of bits (either 1 or 2) required to encode this micromap state. + """ + if self < 2: + return 1 + return 2 + + def format(self): + """ + Returns the OpacityMicromapFormat to encode this state. This is either + OpacityMicromapFormat.TWO_STATE or OpacityMicromapFormat.FOUR_STATE. + """ + if self < 2: + return OpacityMicromapFormat.TWO_STATE + return OpacityMicromapFormat.FOUR_STATE + + +class OpacityMicromapPredefinedIndex(IntEnum): + """ + This enum wraps the OPTIX_OPACITY_MICROMAP_PREDEFINED_INDEX_FULLY_* defines from optix. + It provides the indices required to mark a triangle as either fully opaque or transparent in a + OpacityMicromapBuildInput. + """ + FULLY_TRANSPARENT = OPTIX_OPACITY_MICROMAP_PREDEFINED_INDEX_FULLY_TRANSPARENT_DEFINE + FULLY_OPAQUE = OPTIX_OPACITY_MICROMAP_PREDEFINED_INDEX_FULLY_OPAQUE_DEFINE + FULLY_UNKNOWN_TRANSPARENT = OPTIX_OPACITY_MICROMAP_PREDEFINED_INDEX_FULLY_UNKNOWN_TRANSPARENT_DEFINE + FULLY_UNKNOWN_OPAQUE = OPTIX_OPACITY_MICROMAP_PREDEFINED_INDEX_FULLY_UNKNOWN_OPAQUE_DEFINE + + +OpacityMicromapType = namedtuple("OpacityMicromapType", ["format", "subdivision_level"]) + + +cdef class OpacityMicromapInput(OptixObject): + """ + This class is a simple wrapper around an uint8-numpy array that will convert convert it into + the format required by the optix opacity micromaps while keeping track of the bit encoding and subdivision + level. The class supports inputs in an unbaked format (uint8 array with the values in OpacityMicromapFormat) as + well as baked formats (values encoded in either 1 or 2 bits). + + Parameters + ---------- + opacity: The input array in either unbaked or baked format. + format: Optional format specifier. Required for baked format, optional for unbaked. Invalid formats are not checked + for unbaked inputs. + """ + def __init__(self, + opacity, + format: typ.Optional[OpacityMicromapFormat] = None): + buffer = np.atleast_2d(opacity) + + format = OpacityMicromapFormat(format) if format is not None else None + if is_baked(buffer): + if not format: + raise ValueError("Baked input requires a format specification") + shape_unbaked = (buffer.dtype.itemsize * 8 * buffer.shape[1]) / format.value + subdivision_level = (np.log2(shape_unbaked) / 2) + + if not subdivision_level.is_integer(): + ValueError(f"Shape of baked input ({opacity.shape[1]}) does " + f"not correspond to a valid subdivision level in given format ({format}).") + else: + subdivision_level = (np.log2(buffer.shape[1]) / 2) + buffer, fmt = bake_opacity_micromap(buffer, format) + + # elif format != fmt: + # raise ValueError(f"Attempt to bake the micromap input resulted in a different format than the given one. " + # f"{format} != {fmt}.") + + self.buffer = buffer + self.c_format = format.value + self.c_subdivision_level = subdivision_level + + @property + def format(self): + return OpacityMicromapFormat(self.c_format) + + @property + def subdivision_level(self): + return self.c_subdivision_level + + @property + def ntriangles(self): + return self.buffer.shape[0] + + @property + def nbytes(self): + return self.buffer.size * self.buffer.itemsize + + def _repr_details(self): + return f"ntriangles={self.ntriangles}, format={self.format.name}, subdivision_level={self.subdivision_level}" +# ctypedef pair[OptixOpacityMicromapFormat, int] histogram_entry +# +# cdef bint histogram_entry_hash(const histogram_entry& s) nogil: +# return std_hash[OptixOpacityMicromapFormat]()(s.first) ^ std_hash[int]()(s.second) + + + +cdef class OpacityMicromapArray(OptixContextObject): + """ + Class representing an array of opacity micromaps on the optix device. + This class wraps the internal GPU buffer containing the micromap data and serves to build the structure from + one or multiple OpactiyMircomap inputs + + Parameters + ---------- + context: + The device context to use. + inputs: + An iterable of OpacityMicromapInput or numpy ndarrays. All numpy arrays will be converted into + OpacityMicroMapInput automatically in this class. + flags: + A set of OpacityMicromapFlags to use for building this array. If None, the default OpacityMicromapFlags.NONE + is used. + stream: + Cuda stream to use for building this micromap array. If None the default stream is used. + """ + def __init__(self, + context: DeviceContext, + inputs: typ.Iterable[typ.Union[np.ndarray, OpacityMicromapInput]], + prefer_fast_build: bool = False, + stream: typ.Optional[cp.cuda.Stream] = None): + super().__init__(context) + self.d_micromap_array_buffer = None + self._micromap_types = None + self._build_flags = OPTIX_OPACITY_MICROMAP_FLAG_NONE + if prefer_fast_build: + self._build_flags = OPTIX_OPACITY_MICROMAP_FLAG_PREFER_FAST_BUILD + else: + self._build_flags = OPTIX_OPACITY_MICROMAP_FLAG_PREFER_FAST_TRACE + self.build(inputs, stream=stream) + + cdef void build(self, inputs, stream=None): + # convert all inputs into the correct format first + inputs = [OpacityMicromapInput(inp) if not isinstance(inp, OpacityMicromapInput) + else inp for inp in ensure_iterable(inputs)] + + cdef OptixOpacityMicromapArrayBuildInput build_input + build_input.flags = self._build_flags + + cdef size_t inputs_size_in_bytes = 0 + micromap_counts = defaultdict(lambda: 0) + micromap_types = [] + + self.c_num_micromaps = 0 + # build the histogram from the input specifications and convert it into a cpp vector to pass it to the build input + for i in inputs: + omm_type = OpacityMicromapType(i.format, i.subdivision_level) + micromap_counts[omm_type] += i.ntriangles + micromap_types.append(omm_type) + inputs_size_in_bytes += i.nbytes + self.c_num_micromaps += i.ntriangles + self._micromap_types = tuple(micromap_types) + + cdef vector[OptixOpacityMicromapHistogramEntry] histogram_entries + histogram_entries.resize(len(micromap_counts)) + build_input.numMicromapHistogramEntries = histogram_entries.size() + + for i, (k, v) in enumerate(micromap_counts.items()): + histogram_entries[i].count = v + histogram_entries[i].format = k.format.value + histogram_entries[i].subdivisionLevel = k.subdivision_level + + build_input.micromapHistogramEntries = histogram_entries.data() + del micromap_counts + + # allocate a buffer to hold all input micromaps and put it's pointer in the build input + d_input_buffer = cp.cuda.alloc(inputs_size_in_bytes) + build_input.inputBuffer = d_input_buffer.ptr + + cdef unsigned int offset = 0 + cdef vector[OptixOpacityMicromapDesc] descs + cdef uint16_t[:, :] buffer_view + cdef unsigned int t + cdef unsigned int desc_i = 0; + + descs.resize(self.c_num_micromaps) + #TODO use the actual triangles in the input array here! + # copy all input data into the device buffer + for i, inp in enumerate(inputs): + buffer_view = (inp).buffer + num_bytes = inp.nbytes + cp.cuda.runtime.memcpy(d_input_buffer.ptr + offset, + &buffer_view[0,0], + num_bytes, + cp.cuda.runtime.memcpyHostToDevice) + for t in range(inp.ntriangles): + # fill the descriptor array at the same time with to information in input + descs[desc_i].byteOffset = offset + offset += buffer_view.shape[1] * sizeof(uint16_t) + descs[desc_i].subdivisionLevel = inp.subdivision_level + descs[desc_i].format = inp.format.value + desc_i += 1 + + # copy the descriptor array onto the device + cdef size_t desc_size_in_bytes = descs.size() * sizeof(OptixOpacityMicromapDesc) + + d_desc_buffer = cp.cuda.alloc(desc_size_in_bytes) + cp.cuda.runtime.memcpy(d_desc_buffer.ptr, descs.data(), desc_size_in_bytes, cp.cuda.runtime.memcpyHostToDevice) + + build_input.perMicromapDescBuffer = d_desc_buffer.ptr + build_input.perMicromapDescStrideInBytes = 0 + + cdef OptixMicromapBufferSizes build_sizes + + optix_check_return(optixOpacityMicromapArrayComputeMemoryUsage(self.context.c_context, + &build_input, + &build_sizes)) + # TODO: do we have to align this buffer? + self.d_micromap_array_buffer = cp.cuda.alloc(build_sizes.outputSizeInBytes) + self._buffer_size = build_sizes.outputSizeInBytes + + d_temp_buffer = cp.cuda.alloc(build_sizes.tempSizeInBytes) + + cdef OptixMicromapBuffers micromap_buffers + + micromap_buffers.tempSizeInBytes = build_sizes.tempSizeInBytes + micromap_buffers.temp = d_temp_buffer.ptr + + micromap_buffers.outputSizeInBytes = build_sizes.outputSizeInBytes + micromap_buffers.output = self.d_micromap_array_buffer.ptr + + cdef uintptr_t c_stream = 0 + + if stream is not None: + c_stream = stream.ptr + with nogil: + optix_check_return(optixOpacityMicromapArrayBuild(self.context.c_context, + c_stream, + &build_input, + µmap_buffers)) + # all temporary buffers will be freed automatically here + + @property + def types(self): + return self._micromap_types + + def __deepcopy__(self, memo): + """ + Perform a deep copy of the OpactiyMicromap by using the standard python copy.deepcopy function. + """ + # relocate on the same device to perform a regular deep copy + result = self.relocate(device=None) + memo[id(self)] = result + return result + + def _repr_details(self): + return f"size={self._buffer_size}, nmicromaps={self.c_num_micromaps}" + + def relocate(self, + device: typ.Optional[DeviceContext] = None, + stream: typ.Optional[cp.cuda.Stream] = None) -> OpacityMicromapArray: + """ + Relocate this opacity micromap array into another copy. Usually this is equivalent to a deep copy. + Additionally, the micromap array can be copied to a different device by specifying the device + parameter with a different DeviceContext. + + Parameters + ---------- + device: + An optional DeviceContext. The resulting copy of the micromap array will be copied + to this device. If None, the micromap array's current device is used. + stream: + The stream to use for the relocation. If None, the default stream (0) is used. + + Returns + ------- + copy: OpacityMicromapArray + The copy of the OpacityMicromapArray on the new device + """ + # first determine the relocation info for this micromap array + cdef OptixRelocationInfo micromap_info + memset(µmap_info, 0, sizeof(OptixRelocationInfo)) # init struct to 0 + + optix_check_return(optixOpacityMicromapArrayGetRelocationInfo(self.context.c_context, + self.d_micromap_array_buffer, µmap_info)) + + if device is None: + device = self.context + + # check if the new device is compatible with this micromap array + cdef int compatible = 0 + optix_check_return(optixCheckRelocationCompatibility((device).c_context, + µmap_info, + &compatible)) + + if compatible != 1: + raise RuntimeError("Device is not compatible for relocation of opacity micromap array") + + # do the relocation + cls = self.__class__ + cdef OpacityMicromapArray result = cls.__new__(cls) + + result.context = device + result._build_flags = self._build_flags + result._buffer_size = self._buffer_size + # TODO: align this? + result._gas_buffer = cp.cuda.alloc(result._buffer_size) + cp.cuda.runtime.memcpy(result.d_micromap_array_buffer.ptr, + self.d_micromap_array_buffer.ptr, + result._buffer_size, + cp.cuda.runtime.memcpyDeviceToDevice) + + cdef uintptr_t c_stream = 0 + if stream is not None: + c_stream = stream.ptr + + optix_check_return(optixOpacityMicromapArrayRelocate(result.context.c_context, + c_stream, + µmap_info, + result.d_micromap_array_buffer.ptr, + result._buffer_size)) + + return result + + +cdef class BuildInputOpacityMicromap(OptixObject): + """ + Build input for an array of micromaps. Inputs of this type can optionally be passed to a + BuildInputTriangleArray to use micromaps for it's triangles. Additionally an array of the usage_counts + for the OMMs in the Array needs to be passed as a list. + If the indexing mode is specified as INDEXED, an additional index buffer containing an index into the omm array or one of + the values in OpacityMicromapPredefinedIndex is required. + + Parameters + ---------- + omm_array: + The OpacityMicromapArray to use by the triangles. + usage_counts: + The number of times each omm in the OpacityMicromapArray is used. + indexing_mode: + The indexing mode the omms should use. Must be one of the values in OpacityMicromapArrayIndexingMode. + By default NONE is used, disabling the use of OMMs + index_buffer: + If the indexing_mode is INDEXED, this additional index buffer, specifiing an omm to use or the default value + for each triangle in the geometry is required. + """ + def __init__(self, + OpacityMicromapArray omm_array, + usage_counts: Sequence[int], + indexing_mode: OpacityMicromapArrayIndexingMode = OpacityMicromapArrayIndexingMode.NONE, + index_buffer = None): + indexing_mode = OpacityMicromapArrayIndexingMode(indexing_mode) + + if indexing_mode == OpacityMicromapArrayIndexingMode.INDEXED: + if index_buffer is None: + raise ValueError("index_buffer is required for indexing_mode=INDEXED.") + if not any(np.issubdtype(index_buffer.dtype, dt) for dt in (np.uint16, np.uint32)): + raise ValueError("index_buffer must be of dtype np.uint16 or np.uint32") + self._d_index_buffer = cp.asarray(index_buffer).ravel() + # enable the index buffer in the build_input struct + self.build_input.indexBuffer = self._d_index_buffer.data.ptr + self.build_input.indexOffset = 0 + self.build_input.indexSizeInBytes = self._d_index_buffer.itemsize + self.build_input.indexStrideInBytes = self._d_index_buffer.strides[0] + else: + self.build_input.indexBuffer = 0 + self.build_input.indexOffset = 0 + self.build_input.indexSizeInBytes = 0 + self.build_input.indexStrideInBytes = 0 + + self.build_input.indexingMode = indexing_mode.value + self.c_micromap_array = omm_array + self.build_input.opacityMicromapArray = self.c_micromap_array.d_micromap_array_buffer.ptr + + # fill the usage counts vector from the specified usage array + # TODO: is there a way to determine the usage count automatically? + micromap_types = self.c_micromap_array.types + if len(usage_counts) != len(micromap_types): + raise IndexError(f"Number of entries in usage_count must be equal to the number of omms in micromap_array. " + f"Expected {len(micromap_types)}), got ({len(usage_counts)})") + usage_count_hist = defaultdict(lambda: 0) + + for type, usage_count in zip(micromap_types, usage_counts): + usage_count_hist[type] += usage_count + + self._usage_counts = usage_count_hist + self.c_usage_counts.resize(len(usage_counts)) + for i, (k, v) in enumerate(usage_count_hist.items()): + self.c_usage_counts[i].count = v + self.c_usage_counts[i].format = k.format.value + self.c_usage_counts[i].subdivisionLevel = k.subdivision_level + + self.build_input.micromapUsageCounts = self.c_usage_counts.data() + self.build_input.numMicromapUsageCounts = self.c_usage_counts.size() + + @property + def usage_counts(self): + return self._usage_counts + + @property + def types(self): + return self.c_micromap_array.types + + @property + def micromap_array(self): + return self.c_micromap_array + + @property + def index_buffer(self): + return self._d_index_buffer \ No newline at end of file diff --git a/optix/module.pxd b/optix/module.pxd index 94bb927..e5ba795 100644 --- a/optix/module.pxd +++ b/optix/module.pxd @@ -28,60 +28,48 @@ cdef extern from "optix_includes.h" nogil: const char* annotation - IF _OPTIX_VERSION > 70300: # switch to new version - cdef size_t OPTIX_COMPILE_DEFAULT_MAX_PAYLOAD_TYPE_COUNT - - cdef enum OptixPayloadSemantics: - OPTIX_PAYLOAD_SEMANTICS_TRACE_CALLER_NONE, - OPTIX_PAYLOAD_SEMANTICS_TRACE_CALLER_READ, - OPTIX_PAYLOAD_SEMANTICS_TRACE_CALLER_WRITE, - OPTIX_PAYLOAD_SEMANTICS_TRACE_CALLER_READ_WRITE, - OPTIX_PAYLOAD_SEMANTICS_CH_NONE, - OPTIX_PAYLOAD_SEMANTICS_CH_READ, - OPTIX_PAYLOAD_SEMANTICS_CH_WRITE, - OPTIX_PAYLOAD_SEMANTICS_CH_READ_WRITE, - OPTIX_PAYLOAD_SEMANTICS_MS_NONE, - OPTIX_PAYLOAD_SEMANTICS_MS_READ, - OPTIX_PAYLOAD_SEMANTICS_MS_WRITE, - OPTIX_PAYLOAD_SEMANTICS_MS_READ_WRITE, - OPTIX_PAYLOAD_SEMANTICS_AH_NONE, - OPTIX_PAYLOAD_SEMANTICS_AH_READ, - OPTIX_PAYLOAD_SEMANTICS_AH_WRITE, - OPTIX_PAYLOAD_SEMANTICS_AH_READ_WRITE, - OPTIX_PAYLOAD_SEMANTICS_IS_NONE, - OPTIX_PAYLOAD_SEMANTICS_IS_READ, - OPTIX_PAYLOAD_SEMANTICS_IS_WRITE, - OPTIX_PAYLOAD_SEMANTICS_IS_READ_WRITE, - - cdef struct OptixPayloadType: - unsigned int numPayloadValues - const unsigned int* payloadSemantics - - cdef struct OptixModuleCompileOptions: - int maxRegisterCount - OptixCompileOptimizationLevel optLevel - OptixCompileDebugLevel debugLevel - const OptixModuleCompileBoundValueEntry* boundValues - unsigned int numBoundValues - unsigned int numPayloadTypes - OptixPayloadType* payloadTypes - - cdef struct OptixBuiltinISOptions: - OptixPrimitiveType builtinISModuleType - int usesMotionBlur - unsigned int buildFlags - unsigned int curveEndcapFlags - ELSE: - cdef struct OptixModuleCompileOptions: - int maxRegisterCount - OptixCompileOptimizationLevel optLevel - OptixCompileDebugLevel debugLevel - const OptixModuleCompileBoundValueEntry* boundValues - unsigned int numBoundValues - - cdef struct OptixBuiltinISOptions: - OptixPrimitiveType builtinISModuleType - int usesMotionBlur + cdef size_t OPTIX_COMPILE_DEFAULT_MAX_PAYLOAD_TYPE_COUNT + + cdef enum OptixPayloadSemantics: + OPTIX_PAYLOAD_SEMANTICS_TRACE_CALLER_NONE, + OPTIX_PAYLOAD_SEMANTICS_TRACE_CALLER_READ, + OPTIX_PAYLOAD_SEMANTICS_TRACE_CALLER_WRITE, + OPTIX_PAYLOAD_SEMANTICS_TRACE_CALLER_READ_WRITE, + OPTIX_PAYLOAD_SEMANTICS_CH_NONE, + OPTIX_PAYLOAD_SEMANTICS_CH_READ, + OPTIX_PAYLOAD_SEMANTICS_CH_WRITE, + OPTIX_PAYLOAD_SEMANTICS_CH_READ_WRITE, + OPTIX_PAYLOAD_SEMANTICS_MS_NONE, + OPTIX_PAYLOAD_SEMANTICS_MS_READ, + OPTIX_PAYLOAD_SEMANTICS_MS_WRITE, + OPTIX_PAYLOAD_SEMANTICS_MS_READ_WRITE, + OPTIX_PAYLOAD_SEMANTICS_AH_NONE, + OPTIX_PAYLOAD_SEMANTICS_AH_READ, + OPTIX_PAYLOAD_SEMANTICS_AH_WRITE, + OPTIX_PAYLOAD_SEMANTICS_AH_READ_WRITE, + OPTIX_PAYLOAD_SEMANTICS_IS_NONE, + OPTIX_PAYLOAD_SEMANTICS_IS_READ, + OPTIX_PAYLOAD_SEMANTICS_IS_WRITE, + OPTIX_PAYLOAD_SEMANTICS_IS_READ_WRITE, + + cdef struct OptixPayloadType: + unsigned int numPayloadValues + const unsigned int* payloadSemantics + + cdef struct OptixModuleCompileOptions: + int maxRegisterCount + OptixCompileOptimizationLevel optLevel + OptixCompileDebugLevel debugLevel + const OptixModuleCompileBoundValueEntry* boundValues + unsigned int numBoundValues + unsigned int numPayloadTypes + OptixPayloadType* payloadTypes + + cdef struct OptixBuiltinISOptions: + OptixPrimitiveType builtinISModuleType + int usesMotionBlur + unsigned int buildFlags + unsigned int curveEndcapFlags OptixResult optixModuleCreateFromPTX(OptixDeviceContext context, @@ -104,52 +92,50 @@ cdef extern from "optix_includes.h" nogil: OptixModule *builtinModule) - IF _OPTIX_VERSION > 70300: # switch to new version - ctypedef struct OptixTask: - pass + ctypedef struct OptixTask: + pass - cdef enum OptixModuleCompileState: - OPTIX_MODULE_COMPILE_STATE_NOT_STARTED - OPTIX_MODULE_COMPILE_STATE_STARTED - OPTIX_MODULE_COMPILE_STATE_IMPENDING_FAILURE - OPTIX_MODULE_COMPILE_STATE_FAILED - OPTIX_MODULE_COMPILE_STATE_COMPLETED + cdef enum OptixModuleCompileState: + OPTIX_MODULE_COMPILE_STATE_NOT_STARTED + OPTIX_MODULE_COMPILE_STATE_STARTED + OPTIX_MODULE_COMPILE_STATE_IMPENDING_FAILURE + OPTIX_MODULE_COMPILE_STATE_FAILED + OPTIX_MODULE_COMPILE_STATE_COMPLETED - cdef OptixResult optixModuleGetCompilationState(OptixModule module, - OptixModuleCompileState * state) + cdef OptixResult optixModuleGetCompilationState(OptixModule module, + OptixModuleCompileState * state) - cdef OptixResult optixModuleCreateFromPTXWithTasks(OptixDeviceContext context, - const OptixModuleCompileOptions * moduleCompileOptions, - const OptixPipelineCompileOptions * pipelineCompileOptions, - const char * PTX, - size_t PTXsize, - char * logString, - size_t * logStringSize, - OptixModule * module, - OptixTask * firstTask) + cdef OptixResult optixModuleCreateFromPTXWithTasks(OptixDeviceContext context, + const OptixModuleCompileOptions * moduleCompileOptions, + const OptixPipelineCompileOptions * pipelineCompileOptions, + const char * PTX, + size_t PTXsize, + char * logString, + size_t * logStringSize, + OptixModule * module, + OptixTask * firstTask) + + cdef OptixResult optixTaskExecute(OptixTask task, + OptixTask * additionalTasks, + unsigned int maxNumAdditionalTasks, + unsigned int *numAdditionalTasksCreated) - cdef OptixResult optixTaskExecute(OptixTask task, - OptixTask * additionalTasks, - unsigned int maxNumAdditionalTasks, - unsigned int *numAdditionalTasksCreated) cdef class BuiltinISOptions(OptixObject): cdef OptixBuiltinISOptions options + cdef class Module(OptixContextObject): cdef OptixModule module cdef list _compile_flags -IF _OPTIX_VERSION > 70300: # switch to new version - cdef class ModuleCompileOptions(OptixObject): - cdef OptixModuleCompileOptions compile_options - cdef vector[OptixPayloadType] payload_types - cdef vector[vector[unsigned int]] payload_values # WTF! - - cdef class Task(OptixObject): - cdef OptixTask task - cdef Module module -ELSE: - cdef class ModuleCompileOptions(OptixObject): - cdef OptixModuleCompileOptions compile_options \ No newline at end of file + +cdef class ModuleCompileOptions(OptixObject): + cdef OptixModuleCompileOptions compile_options + cdef vector[OptixPayloadType] payload_types + cdef vector[vector[unsigned int]] payload_values # WTF! + +cdef class Task(OptixObject): + cdef OptixTask task + cdef Module module \ No newline at end of file diff --git a/optix/module.pyx b/optix/module.pyx index dbdc47c..583ff40 100644 --- a/optix/module.pyx +++ b/optix/module.pyx @@ -15,6 +15,9 @@ from libcpp.vector cimport vector optix_init() +__all__ = ['Module', 'ModuleCompileOptions', 'CompileOptimizationLevel', 'CompileDebugLevel', + 'PayloadSemantics', 'Task'] + class CompileOptimizationLevel(IntEnum): """ Wraps the OptixCompileOptimizationLevel enum @@ -25,93 +28,92 @@ class CompileOptimizationLevel(IntEnum): LEVEL_2 = OPTIX_COMPILE_OPTIMIZATION_LEVEL_2, LEVEL_3 = OPTIX_COMPILE_OPTIMIZATION_LEVEL_3, -IF _OPTIX_VERSION > 70300: - class PayloadSemantics(IntFlag): - """ - Wraps the PayloadSemantics enum. - """ +class PayloadSemantics(IntFlag): + """ + Wraps the PayloadSemantics enum. + """ + + DEFAULT = OPTIX_PAYLOAD_SEMANTICS_TRACE_CALLER_READ_WRITE | OPTIX_PAYLOAD_SEMANTICS_CH_READ_WRITE | OPTIX_PAYLOAD_SEMANTICS_MS_READ_WRITE | OPTIX_PAYLOAD_SEMANTICS_AH_READ_WRITE | OPTIX_PAYLOAD_SEMANTICS_IS_READ_WRITE # allow everything as default + TRACE_CALLER_NONE = OPTIX_PAYLOAD_SEMANTICS_TRACE_CALLER_NONE, + TRACE_CALLER_READ = OPTIX_PAYLOAD_SEMANTICS_TRACE_CALLER_READ, + TRACE_CALLER_WRITE = OPTIX_PAYLOAD_SEMANTICS_TRACE_CALLER_WRITE, + TRACE_CALLER_READ_WRITE = OPTIX_PAYLOAD_SEMANTICS_TRACE_CALLER_READ_WRITE, + CH_NONE = OPTIX_PAYLOAD_SEMANTICS_CH_NONE, + CH_READ = OPTIX_PAYLOAD_SEMANTICS_CH_READ, + CH_WRITE = OPTIX_PAYLOAD_SEMANTICS_CH_WRITE, + CH_READ_WRITE = OPTIX_PAYLOAD_SEMANTICS_CH_READ_WRITE, + MS_NONE = OPTIX_PAYLOAD_SEMANTICS_MS_NONE, + MS_READ = OPTIX_PAYLOAD_SEMANTICS_MS_READ, + MS_WRITE = OPTIX_PAYLOAD_SEMANTICS_MS_WRITE, + MS_READ_WRITE = OPTIX_PAYLOAD_SEMANTICS_MS_READ_WRITE, + AH_NONE = OPTIX_PAYLOAD_SEMANTICS_AH_NONE, + AH_READ = OPTIX_PAYLOAD_SEMANTICS_AH_READ, + AH_WRITE = OPTIX_PAYLOAD_SEMANTICS_AH_WRITE, + AH_READ_WRITE = OPTIX_PAYLOAD_SEMANTICS_AH_READ_WRITE, + IS_NONE = OPTIX_PAYLOAD_SEMANTICS_IS_NONE, + IS_READ = OPTIX_PAYLOAD_SEMANTICS_IS_READ, + IS_WRITE = OPTIX_PAYLOAD_SEMANTICS_IS_WRITE, + IS_READ_WRITE = OPTIX_PAYLOAD_SEMANTICS_IS_READ_WRITE + +class ModuleCompileState(IntFlag): + NOT_STARTED = OPTIX_MODULE_COMPILE_STATE_NOT_STARTED, + STARTED = OPTIX_MODULE_COMPILE_STATE_STARTED, + IMPENDING_FAILURE = OPTIX_MODULE_COMPILE_STATE_IMPENDING_FAILURE, + FAILED = OPTIX_MODULE_COMPILE_STATE_FAILED, + COMPLETED = OPTIX_MODULE_COMPILE_STATE_COMPLETED, + + +cdef class Task(OptixObject): + """ + Class to represent a parallel Task to compile an OptiX module. + A Task can be executed in parallel by e.g. a thread pool to handle lots of module compilations concurrently. + It is only valid as long as the corresponding module exists, therefore in this wrapper a reference to the module + if stored. - DEFAULT = OPTIX_PAYLOAD_SEMANTICS_TRACE_CALLER_READ_WRITE | OPTIX_PAYLOAD_SEMANTICS_CH_READ_WRITE | OPTIX_PAYLOAD_SEMANTICS_MS_READ_WRITE | OPTIX_PAYLOAD_SEMANTICS_AH_READ_WRITE | OPTIX_PAYLOAD_SEMANTICS_IS_READ_WRITE # allow everything as default - TRACE_CALLER_NONE = OPTIX_PAYLOAD_SEMANTICS_TRACE_CALLER_NONE, - TRACE_CALLER_READ = OPTIX_PAYLOAD_SEMANTICS_TRACE_CALLER_READ, - TRACE_CALLER_WRITE = OPTIX_PAYLOAD_SEMANTICS_TRACE_CALLER_WRITE, - TRACE_CALLER_READ_WRITE = OPTIX_PAYLOAD_SEMANTICS_TRACE_CALLER_READ_WRITE, - CH_NONE = OPTIX_PAYLOAD_SEMANTICS_CH_NONE, - CH_READ = OPTIX_PAYLOAD_SEMANTICS_CH_READ, - CH_WRITE = OPTIX_PAYLOAD_SEMANTICS_CH_WRITE, - CH_READ_WRITE = OPTIX_PAYLOAD_SEMANTICS_CH_READ_WRITE, - MS_NONE = OPTIX_PAYLOAD_SEMANTICS_MS_NONE, - MS_READ = OPTIX_PAYLOAD_SEMANTICS_MS_READ, - MS_WRITE = OPTIX_PAYLOAD_SEMANTICS_MS_WRITE, - MS_READ_WRITE = OPTIX_PAYLOAD_SEMANTICS_MS_READ_WRITE, - AH_NONE = OPTIX_PAYLOAD_SEMANTICS_AH_NONE, - AH_READ = OPTIX_PAYLOAD_SEMANTICS_AH_READ, - AH_WRITE = OPTIX_PAYLOAD_SEMANTICS_AH_WRITE, - AH_READ_WRITE = OPTIX_PAYLOAD_SEMANTICS_AH_READ_WRITE, - IS_NONE = OPTIX_PAYLOAD_SEMANTICS_IS_NONE, - IS_READ = OPTIX_PAYLOAD_SEMANTICS_IS_READ, - IS_WRITE = OPTIX_PAYLOAD_SEMANTICS_IS_WRITE, - IS_READ_WRITE = OPTIX_PAYLOAD_SEMANTICS_IS_READ_WRITE - - class ModuleCompileState(IntFlag): - NOT_STARTED = OPTIX_MODULE_COMPILE_STATE_NOT_STARTED, - STARTED = OPTIX_MODULE_COMPILE_STATE_STARTED, - IMPENDING_FAILURE = OPTIX_MODULE_COMPILE_STATE_IMPENDING_FAILURE, - FAILED = OPTIX_MODULE_COMPILE_STATE_FAILED, - COMPLETED = OPTIX_MODULE_COMPILE_STATE_COMPLETED, - - - cdef class Task(OptixObject): + Note, that a Task is not supposed to be created by the user directly, but provided by the create_as_task method + of the Module class. + + Parameters + ---------- + module: Module + The module this Task belongs to. + """ + def __init__(self, Module module): + self.module = module + self.task = NULL + + def execute(self, max_additional_tasks=2): """ - Class to represent a parallel Task to compile an OptiX module. - A Task can be executed in parallel by e.g. a thread pool to handle lots of module compilations concurrently. - It is only valid as long as the corresponding module exists, therefore in this wrapper a reference to the module - if stored. + Execute the Task. If more parallel work is found, it will be returned as a new list of Task objects. + The list has a maximum size of max_additional_tasks. - Note, that a Task is not supposed to be created by the user directly, but provided by the create_as_task method - of the Module class. + Node, that each Task can only be executed by a single thread. Parameters ---------- - module: Module - The module this Task belongs to. + max_additional_tasks: int + The maximum number of new Tasks to create from this one + + Returns + ------- + tasks: List[Task] + The newly created tasks if any """ - def __init__(self, Module module): - self.module = module - self.task = NULL - - def execute(self, max_additional_tasks=2): - """ - Execute the Task. If more parallel work is found, it will be returned as a new list of Task objects. - The list has a maximum size of max_additional_tasks. - - Node, that each Task can only be executed by a single thread. - - Parameters - ---------- - max_additional_tasks: int - The maximum number of new Tasks to create from this one - - Returns - ------- - tasks: List[Task] - The newly created tasks if any - """ - cdef vector[OptixTask] additional_tasks - cdef unsigned int i - cdef unsigned int additional_tasks_created = 0 - cdef unsigned int max_num_additional_tasks = max_additional_tasks - - with nogil: - additional_tasks.resize(max_num_additional_tasks) - optix_check_return(optixTaskExecute(self.task, additional_tasks.data(), max_num_additional_tasks, &additional_tasks_created)) - - cdef list tasks = [] - for i in range(additional_tasks_created): - t = Task(self.module) - t.task = additional_tasks[i] - tasks.append(t) - return tasks + cdef vector[OptixTask] additional_tasks + cdef unsigned int i + cdef unsigned int additional_tasks_created = 0 + cdef unsigned int max_num_additional_tasks = max_additional_tasks + + with nogil: + additional_tasks.resize(max_num_additional_tasks) + optix_check_return(optixTaskExecute(self.task, additional_tasks.data(), max_num_additional_tasks, &additional_tasks_created)) + + cdef list tasks = [] + for i in range(additional_tasks_created): + t = Task(self.module) + t.task = additional_tasks[i] + tasks.append(t) + return tasks cdef class ModuleCompileOptions(OptixObject): @@ -133,25 +135,22 @@ cdef class ModuleCompileOptions(OptixObject): self.compile_options.numBoundValues = 0 self.compile_options.boundValues = NULL # currently not supported - IF _OPTIX_VERSION > 70300: - if payload_types is None: - self.compile_options.numPayloadTypes = 0 - self.compile_options.payloadTypes = NULL - else: - # set the payload types for these compile options (this is horrible, i know ;)) - payload_types = [ensure_iterable(pt) for pt in ensure_iterable(payload_types)] # list of lists - self.payload_types.resize(len(payload_types)) # the number of different payload types - self.payload_values.resize(self.payload_types.size()) # a vector of semantics for each payload type - self.compile_options.numPayloadTypes = self.payload_types.size() - for i, payload_values in enumerate(payload_types): - self.payload_types[i].numPayloadValues = len(payload_values) - self.payload_values[i].resize(self.payload_types[i].numPayloadValues) - for j, payload_semantics in enumerate(payload_values): - self.payload_values[i][j] = payload_semantics.value - self.payload_types[i].payloadSemantics = self.payload_values[i].data() - self.compile_options.payloadTypes = self.payload_types.data() - - + if payload_types is None: + self.compile_options.numPayloadTypes = 0 + self.compile_options.payloadTypes = NULL + else: + # set the payload types for these compile options (this is horrible, i know ;)) + payload_types = [ensure_iterable(pt) for pt in ensure_iterable(payload_types)] # list of lists + self.payload_types.resize(len(payload_types)) # the number of different payload types + self.payload_values.resize(self.payload_types.size()) # a vector of semantics for each payload type + self.compile_options.numPayloadTypes = self.payload_types.size() + for i, payload_values in enumerate(payload_types): + self.payload_types[i].numPayloadValues = len(payload_values) + self.payload_values[i].resize(self.payload_types[i].numPayloadValues) + for j, payload_semantics in enumerate(payload_values): + self.payload_values[i][j] = payload_semantics.value + self.payload_types[i].payloadSemantics = self.payload_values[i].data() + self.compile_options.payloadTypes = self.payload_types.data() @property def max_register_count(self): @@ -178,7 +177,7 @@ cdef class ModuleCompileOptions(OptixObject): self.compile_options.debugLevel = level.value -cdef tuple _nvrtc_compile_flags_default = ('-use_fast_math', '-lineinfo', '-default-device', '-std=c++11', '-rdc', 'true') +cdef tuple _nvrtc_compile_flags_default = ('-use_fast_math', '-default-device', '-std=c++11', '-rdc', 'true') def get_default_nvrtc_compile_flags(std=None, rdc=False): flags = list(_nvrtc_compile_flags_default[:-3]) @@ -199,19 +198,16 @@ cdef _is_ptx(src): cdef class BuiltinISOptions(OptixObject): def __init__(self, primitive_type, - build_flags=None, + build_flags, uses_motion_blur=False, curve_endcap_flags=None): self.options.builtinISModuleType = primitive_type.value self.options.usesMotionBlur = uses_motion_blur - IF _OPTIX_VERSION > 70300: - if build_flags is None: - raise ValueError("Parameter build_flags is required for OptiX versions >= 7.4.") - self.options.buildFlags = build_flags.value - if curve_endcap_flags is None: - curve_endcap_flags = CurveEndcapFlags.DEFAULT - self.options.curveEndcapFlags = curve_endcap_flags.value + self.options.buildFlags = build_flags.value + if curve_endcap_flags is None: + curve_endcap_flags = CurveEndcapFlags.DEFAULT + self.options.curveEndcapFlags = curve_endcap_flags.value cdef class Module(OptixContextObject): @@ -253,12 +249,10 @@ cdef class Module(OptixContextObject): if module_compile_options.debug_level != CompileDebugLevel.NONE: self._compile_flags.append("-G") + self._compile_flags.append("-lineinfo") if src is not None: ptx = self.compile_cuda_ptx(src, compile_flags, name=program_name) c_ptx = ptx - #IF _OPTIX_VERSION > 70300: - # self._check_payload_values(module_compile_options, pipeline_compile_options) - optix_check_return(optixModuleCreateFromPTX(self.context.c_context, &module_compile_options.compile_options, &pipeline_compile_options.compile_options, @@ -267,89 +261,86 @@ cdef class Module(OptixContextObject): NULL, NULL, &self.module)) - def __dealloc__(self): if self.module != 0: optix_check_return(optixModuleDestroy(self.module)) - IF _OPTIX_VERSION > 70300: - @property - def compile_state(self): - cdef OptixModuleCompileState state - with nogil: - optix_check_return(optixModuleGetCompilationState(self.module, &state)) - return ModuleCompileState(state) - - # @staticmethod - # def _check_payload_values(ModuleCompileOptions module_compile_options, PipelineCompileOptions pipeline_compile_options): - # IF _OPTIX_VERSION > 70300: - # # check if the payload values match between the module and pipeline compile options - # pipeline_payload_values = pipeline_compile_options.compile_options.numPayloadValues - # if module_compile_options.payload_types.size() > 0: - # for i in range(module_compile_options.compile_options.numPayloadTypes): - # if pipeline_payload_values != module_compile_options.compile_options.payloadTypes[ - # i].numPayloadValues: - # raise ValueError( - # f"number of payload values in module compile options at index {i} does not match the num_payload_values in the pipeline_compile_options.") - # return - - @classmethod - def create_as_task(cls, - DeviceContext context, - src, - ModuleCompileOptions module_compile_options = ModuleCompileOptions(), - PipelineCompileOptions pipeline_compile_options = PipelineCompileOptions(), - compile_flags=_nvrtc_compile_flags_default, - program_name=None): - """ - Create a module associated with a parallel task. - The function will perform just enough work to instantiate the module. - Everything else will be done by the task on request. - - Parameters - ---------- - context: DeviceContext - The current OptiX context - src: str - Either a string containing the module's source code or PTX or the path to a file containing it. - module_compile_options: ModuleCompileOptions - Compile options of this module - pipeline_compile_options: PipelineCompileOptions - Compile options of the pipeline the module will be used in - compile_flags: list[str], optional - List of compiler flags to use. If omitted, the default flags are used. - program_name: str, optional - The name the program is given internally. Of omitted either the filename is used if given or a default name is used. - - Returns - ------- - - module: Module - The created module - task: Task - The task associated with this module - - """ - cdef Module module = Module(context, None, compile_flags=compile_flags) - cdef const char * c_ptx - cdef unsigned int pipeline_payload_values, i - #cls._check_payload_values(module_compile_options, pipeline_compile_options) - - ptx = cls.compile_cuda_ptx(src, compile_flags, name=program_name) - c_ptx = ptx + @property + def compile_state(self): + cdef OptixModuleCompileState state + with nogil: + optix_check_return(optixModuleGetCompilationState(self.module, &state)) + return ModuleCompileState(state) + + # @staticmethod + # def _check_payload_values(ModuleCompileOptions module_compile_options, PipelineCompileOptions pipeline_compile_options): + # # check if the payload values match between the module and pipeline compile options + # pipeline_payload_values = pipeline_compile_options.compile_options.numPayloadValues + # if module_compile_options.payload_types.size() > 0: + # for i in range(module_compile_options.compile_options.numPayloadTypes): + # if pipeline_payload_values != module_compile_options.compile_options.payloadTypes[ + # i].numPayloadValues: + # raise ValueError( + # f"number of payload values in module compile options at index {i} does not match the num_payload_values in the pipeline_compile_options.") + # return - cdef Task task = Task(module) + @classmethod + def create_as_task(cls, + DeviceContext context, + src, + ModuleCompileOptions module_compile_options = ModuleCompileOptions(), + PipelineCompileOptions pipeline_compile_options = PipelineCompileOptions(), + compile_flags=_nvrtc_compile_flags_default, + program_name=None): + """ + Create a module associated with a parallel task. + The function will perform just enough work to instantiate the module. + Everything else will be done by the task on request. - optix_check_return(optixModuleCreateFromPTXWithTasks(context.c_context, - &module_compile_options.compile_options, - &pipeline_compile_options.compile_options, - c_ptx, - len(ptx) + 1, - NULL, - NULL, - &module.module, - &task.task)) - return module, task + Parameters + ---------- + context: DeviceContext + The current OptiX context + src: str + Either a string containing the module's source code or PTX or the path to a file containing it. + module_compile_options: ModuleCompileOptions + Compile options of this module + pipeline_compile_options: PipelineCompileOptions + Compile options of the pipeline the module will be used in + compile_flags: list[str], optional + List of compiler flags to use. If omitted, the default flags are used. + program_name: str, optional + The name the program is given internally. Of omitted either the filename is used if given or a default name is used. + + Returns + ------- + + module: Module + The created module + task: Task + The task associated with this module + + """ + cdef Module module = Module(context, None, compile_flags=compile_flags) + cdef const char * c_ptx + cdef unsigned int pipeline_payload_values, i + #cls._check_payload_values(module_compile_options, pipeline_compile_options) + + ptx = cls.compile_cuda_ptx(src, compile_flags, name=program_name) + c_ptx = ptx + + cdef Task task = Task(module) + + optix_check_return(optixModuleCreateFromPTXWithTasks(context.c_context, + &module_compile_options.compile_options, + &pipeline_compile_options.compile_options, + c_ptx, + len(ptx) + 1, + NULL, + NULL, + &module.module, + &task.task)) + return module, task @classmethod @@ -379,8 +370,7 @@ cdef class Module(OptixContextObject): """ cdef Module module = cls(context, None) - IF _OPTIX_VERSION > 70300: - cls._check_payload_values(module_compile_options, pipeline_compile_options) + #cls._check_payload_values(module_compile_options, pipeline_compile_options) optix_check_return(optixBuiltinISModuleGet(context.c_context, &module_compile_options.compile_options, &pipeline_compile_options.compile_options, @@ -393,28 +383,53 @@ cdef class Module(OptixContextObject): @staticmethod def compile_cuda_ptx(src, compile_flags=_nvrtc_compile_flags_default, name=None, **kwargs): - if os.path.exists(src): - name = src + """ + Compiles a valid source module into the ptx format. Accepts files containing either source code, ptx, or + optix-ir code, compiles the source code if necessary and returns valid ptx or optix-ir modules. + + Parameters + ---------- + src: A string containing either the file name of the module to be compiled or the CUDA source code directly. + compile_flags: The flags used for the call to the nvptx compiler. If src is compiled already, this is ignored. + name: The name of the compiled module. If src is not an inline string, this is ignored and the file name is used + kwargs: Additional kwargs passed to the NVRTC compiler. See the _NVRTCProgram in the cupy package for details. + + Returns + ------- + ptx: A compiled ptx string or a string in optix-ir format. + """ + compiled = False + if os.path.isfile(src): + # if src is a file + name, ext = os.path.splitext(src) + if ext == '.ptx' or ext == '.optixir': + # if the file points to a compiled module (either in ptx or in optixir format) just return it's contents + compiled = True + # read the file contents with open(src, 'r') as f: src = f.read() - if _is_ptx(src): - return src + elif _is_ptx(src): + # if the source is in ptx format already (e.g. as an inline string) just return it + compiled = True - elif name is None: + if name is None: name = "default_program" - # TODO is there a public API for that? - from cupy.cuda.compiler import _NVRTCProgram as NVRTCProgram - prog = NVRTCProgram(src, name, **kwargs) - flags = list(compile_flags) - # get cuda and optix_include_paths - cuda_include_path = get_cuda_include_path() - optix_include_path = get_local_optix_include_path() - if not os.path.exists(optix_include_path): - warnings.warn("Local optix not found. This usually indicates some installation issue. Attempting" - " to load the global optix includes instead.", RuntimeWarning) - optix_include_path = get_optix_include_path() - flags.extend([f'-I{cuda_include_path}', f'-I{optix_include_path}']) - ptx, _ = prog.compile(flags) - return ptx + if not compiled: + # TODO is there a public API for that? + from cupy.cuda.compiler import _NVRTCProgram as NVRTCProgram + prog = NVRTCProgram(src, name, **kwargs) + flags = list(compile_flags) + # get cuda and optix_include_paths + cuda_include_path = get_cuda_include_path() + optix_include_path = get_local_optix_include_path() + if not os.path.exists(optix_include_path): + warnings.warn("Local optix not found. This usually indicates some installation issue. Attempting" + " to load the global optix includes instead.", RuntimeWarning) + optix_include_path = get_optix_include_path() + flags.extend([f'-I{cuda_include_path}', f'-I{optix_include_path}']) + ptx, _ = prog.compile(flags) + return ptx + else: + return src diff --git a/optix/pipeline.pxd b/optix/pipeline.pxd index 47f46fb..5e072a2 100644 --- a/optix/pipeline.pxd +++ b/optix/pipeline.pxd @@ -23,47 +23,22 @@ cdef extern from "optix_includes.h" nogil: OPTIX_TRAVERSABLE_GRAPH_FLAG_ALLOW_SINGLE_GAS, OPTIX_TRAVERSABLE_GRAPH_FLAG_ALLOW_SINGLE_LEVEL_INSTANCING - IF _OPTIX_VERSION > 70400: # switch to new primitive type flags - cdef enum OptixPrimitiveTypeFlags: - OPTIX_PRIMITIVE_TYPE_FLAGS_CUSTOM, - OPTIX_PRIMITIVE_TYPE_FLAGS_ROUND_QUADRATIC_BSPLINE, - OPTIX_PRIMITIVE_TYPE_FLAGS_ROUND_CUBIC_BSPLINE, - OPTIX_PRIMITIVE_TYPE_FLAGS_ROUND_LINEAR, - OPTIX_PRIMITIVE_TYPE_FLAGS_ROUND_CATMULLROM, - OPTIX_PRIMITIVE_TYPE_FLAGS_SPHERE, - OPTIX_PRIMITIVE_TYPE_FLAGS_TRIANGLE, - ELIF _OPTIX_VERSION > 70300: - cdef enum OptixPrimitiveTypeFlags: - OPTIX_PRIMITIVE_TYPE_FLAGS_CUSTOM, - OPTIX_PRIMITIVE_TYPE_FLAGS_ROUND_QUADRATIC_BSPLINE, - OPTIX_PRIMITIVE_TYPE_FLAGS_ROUND_CUBIC_BSPLINE, - OPTIX_PRIMITIVE_TYPE_FLAGS_ROUND_LINEAR, - OPTIX_PRIMITIVE_TYPE_FLAGS_ROUND_CATMULLROM, - OPTIX_PRIMITIVE_TYPE_FLAGS_TRIANGLE, - ELSE: - cdef enum OptixPrimitiveTypeFlags: - OPTIX_PRIMITIVE_TYPE_FLAGS_CUSTOM, - OPTIX_PRIMITIVE_TYPE_FLAGS_ROUND_QUADRATIC_BSPLINE, - OPTIX_PRIMITIVE_TYPE_FLAGS_ROUND_CUBIC_BSPLINE, - OPTIX_PRIMITIVE_TYPE_FLAGS_ROUND_LINEAR, - OPTIX_PRIMITIVE_TYPE_FLAGS_TRIANGLE - - - IF _OPTIX_VERSION > 70300: # switch to new compile debug level - cdef enum OptixCompileDebugLevel: - OPTIX_COMPILE_DEBUG_LEVEL_DEFAULT, - OPTIX_COMPILE_DEBUG_LEVEL_NONE, - OPTIX_COMPILE_DEBUG_LEVEL_MINIMAL, - OPTIX_COMPILE_DEBUG_LEVEL_MODERATE, - OPTIX_COMPILE_DEBUG_LEVEL_FULL - ELSE: - cdef enum OptixCompileDebugLevel: - OPTIX_COMPILE_DEBUG_LEVEL_DEFAULT, - OPTIX_COMPILE_DEBUG_LEVEL_NONE, - OPTIX_COMPILE_DEBUG_LEVEL_LINEINFO, - OPTIX_COMPILE_DEBUG_LEVEL_FULL + cdef enum OptixPrimitiveTypeFlags: + OPTIX_PRIMITIVE_TYPE_FLAGS_CUSTOM, + OPTIX_PRIMITIVE_TYPE_FLAGS_ROUND_QUADRATIC_BSPLINE, + OPTIX_PRIMITIVE_TYPE_FLAGS_ROUND_CUBIC_BSPLINE, + OPTIX_PRIMITIVE_TYPE_FLAGS_ROUND_LINEAR, + OPTIX_PRIMITIVE_TYPE_FLAGS_ROUND_CATMULLROM, + OPTIX_PRIMITIVE_TYPE_FLAGS_SPHERE, + OPTIX_PRIMITIVE_TYPE_FLAGS_TRIANGLE, + cdef enum OptixCompileDebugLevel: + OPTIX_COMPILE_DEBUG_LEVEL_DEFAULT, + OPTIX_COMPILE_DEBUG_LEVEL_NONE, + OPTIX_COMPILE_DEBUG_LEVEL_MINIMAL, + OPTIX_COMPILE_DEBUG_LEVEL_MODERATE, + OPTIX_COMPILE_DEBUG_LEVEL_FULL cdef struct OptixPipelineCompileOptions: @@ -74,6 +49,7 @@ cdef extern from "optix_includes.h" nogil: unsigned int exceptionFlags const char * pipelineLaunchParamsVariableName unsigned int usesPrimitiveTypeFlags + int allowOpacityMicromaps cdef struct OptixPipelineLinkOptions: diff --git a/optix/pipeline.pyx b/optix/pipeline.pyx index 43ce269..3a30a93 100644 --- a/optix/pipeline.pyx +++ b/optix/pipeline.pyx @@ -15,6 +15,9 @@ from .shader_binding_table cimport ShaderBindingTable optix_init() +__all__ = ['CompileDebugLevel', 'ExceptionFlags', 'TraversableGraphFlags', 'PrimitiveTypeFlags', + 'PipelineCompileOptions', 'PipelineLinkOptions', 'Pipeline'] + class ExceptionFlags(IntFlag): """ @@ -45,34 +48,20 @@ class PrimitiveTypeFlags(IntFlag): ROUND_QUADRATIC_BSPLINE = OPTIX_PRIMITIVE_TYPE_FLAGS_ROUND_QUADRATIC_BSPLINE, ROUND_CUBIC_BSPLINE = OPTIX_PRIMITIVE_TYPE_FLAGS_ROUND_CUBIC_BSPLINE, ROUND_LINEAR = OPTIX_PRIMITIVE_TYPE_FLAGS_ROUND_LINEAR - - # switch to new primitive type flags - IF _OPTIX_VERSION > 70300: # switch to new compile debug level flags - ROUND_CATMULLROM = OPTIX_PRIMITIVE_TYPE_FLAGS_ROUND_CATMULLROM - IF _OPTIX_VERSION > 70400: - SPHERE = OPTIX_PRIMITIVE_TYPE_FLAGS_SPHERE + ROUND_CATMULLROM = OPTIX_PRIMITIVE_TYPE_FLAGS_ROUND_CATMULLROM + SPHERE = OPTIX_PRIMITIVE_TYPE_FLAGS_SPHERE TRIANGLE = OPTIX_PRIMITIVE_TYPE_FLAGS_TRIANGLE # fixes negative number error -IF _OPTIX_VERSION > 70300: # switch to new compile debug level flags - class CompileDebugLevel(IntEnum): - """ - Wraps the OptixCompileDebugLevel enum. - """ - DEFAULT = OPTIX_COMPILE_DEBUG_LEVEL_DEFAULT, - NONE = OPTIX_COMPILE_DEBUG_LEVEL_NONE, - MINIMAL = OPTIX_COMPILE_DEBUG_LEVEL_MINIMAL, - MODERATE = OPTIX_COMPILE_DEBUG_LEVEL_MODERATE, - FULL = OPTIX_COMPILE_DEBUG_LEVEL_FULL -ELSE: - class CompileDebugLevel(IntEnum): - """ - Wraps the OptixCompileDebugLevel enum. - """ - DEFAULT = OPTIX_COMPILE_DEBUG_LEVEL_DEFAULT, - NONE = OPTIX_COMPILE_DEBUG_LEVEL_NONE, - LINEINFO = OPTIX_COMPILE_DEBUG_LEVEL_LINEINFO, - FULL = OPTIX_COMPILE_DEBUG_LEVEL_FULL +class CompileDebugLevel(IntEnum): + """ + Wraps the OptixCompileDebugLevel enum. + """ + DEFAULT = OPTIX_COMPILE_DEBUG_LEVEL_DEFAULT, + NONE = OPTIX_COMPILE_DEBUG_LEVEL_NONE, + MINIMAL = OPTIX_COMPILE_DEBUG_LEVEL_MINIMAL, + MODERATE = OPTIX_COMPILE_DEBUG_LEVEL_MODERATE, + FULL = OPTIX_COMPILE_DEBUG_LEVEL_FULL cdef class PipelineCompileOptions(OptixObject): @@ -88,7 +77,8 @@ cdef class PipelineCompileOptions(OptixObject): num_attribute_values = 0, exception_flags = ExceptionFlags.NONE, pipeline_launch_params_variable_name = "params", - uses_primitive_type_flags = PrimitiveTypeFlags.DEFAULT): + uses_primitive_type_flags = PrimitiveTypeFlags.DEFAULT, + allow_opacity_micromaps=False): self.uses_motion_blur = uses_motion_blur self.traversable_graph_flags = traversable_graph_flags self.num_payload_values = num_payload_values @@ -96,6 +86,7 @@ cdef class PipelineCompileOptions(OptixObject): self.exception_flags = exception_flags self.pipeline_launch_params_variable_name = pipeline_launch_params_variable_name self.uses_primitive_type_flags = uses_primitive_type_flags + self.allow_opacity_micromaps = allow_opacity_micromaps @property def uses_motion_blur(self): @@ -156,6 +147,14 @@ cdef class PipelineCompileOptions(OptixObject): def uses_primitive_type_flags(self, flags): self.compile_options.usesPrimitiveTypeFlags = flags.value + @property + def allow_opacity_micromaps(self): + return self.compile_options.allowOpacityMicromaps + + @allow_opacity_micromaps.setter + def allow_opacity_micromaps(self, allow): + self.compile_options.allowOpacityMicromaps = allow + @property def c_obj(self): return &self.compile_options diff --git a/optix/program_group.pyx b/optix/program_group.pyx index 2d95c24..09523fc 100644 --- a/optix/program_group.pyx +++ b/optix/program_group.pyx @@ -7,6 +7,8 @@ from libc.string cimport memset from enum import IntEnum optix_init() +__all__ = ['ProgramGroup', 'ProgramGroupKind'] + class ProgramGroupKind(IntEnum): """ Wraps the OptixProgramGroupKind enum diff --git a/optix/shader_binding_table.pyx b/optix/shader_binding_table.pyx index 4577339..8d6cf35 100644 --- a/optix/shader_binding_table.pyx +++ b/optix/shader_binding_table.pyx @@ -8,6 +8,9 @@ import numpy as np optix_init() +__all__ = ['ShaderBindingTable'] + + cdef class ShaderBindingTable(OptixObject): """ Represents a ShaderBindingTable, containing data used by the various programs in the OptiX Pipeline. diff --git a/optix/struct.pyx b/optix/struct.pyx index 133dac1..3c15488 100644 --- a/optix/struct.pyx +++ b/optix/struct.pyx @@ -10,6 +10,8 @@ from collections.abc import Mapping optix_init() +__all__ = ['SbtRecord', 'LaunchParamsRecord'] + def _aligned_itemsize( formats, alignment ): names = [] for i in range( len(formats ) ): diff --git a/setup.py b/setup.py index 79fa148..7dcb49b 100644 --- a/setup.py +++ b/setup.py @@ -1,11 +1,12 @@ -from struct import pack -from setuptools import setup, Extension, find_packages, find_namespace_packages +from setuptools import setup, Extension, find_packages from Cython.Build import cythonize import re import os from pathlib import Path import shutil +import numpy +OPTIX_COMPATIBLE_VERSION = (7, 6) # standalone import of a module (https://stackoverflow.com/a/58423785) def import_module_from_path(path): @@ -33,8 +34,10 @@ def import_module_from_path(path): optix_include_path = util.get_optix_include_path() print("Found cuda includes at", cuda_include_path) print("Found optix includes at", optix_include_path) -if cuda_include_path is None or optix_include_path is None: - raise RuntimeError("Cuda or optix not found in the system") +if cuda_include_path is None: + raise RuntimeError("CUDA not found in the system, but is required to build this package.") +if optix_include_path is None: + raise RuntimeError("OptiX not found in the system, but is required to build this package.") optix_version_re = re.compile(r'.*OPTIX_VERSION +(\d{5})') # get the optix version from the header with open(Path(optix_include_path) / "optix.h", 'r') as f: @@ -45,10 +48,11 @@ def import_module_from_path(path): optix_version_minor = (optix_version % 10000) // 100 optix_version_micro = optix_version % 100 -print(f"Found OptiX version {optix_version_major}.{optix_version_minor}.{optix_version_micro}.") +if (optix_version_major, optix_version_minor) != OPTIX_COMPATIBLE_VERSION: + raise ValueError(f"Found unsupported optix version {optix_version_major}.{optix_version_minor}.{optix_version_micro}. This package" + f"requires an optix version of {OPTIX_COMPATIBLE_VERSION[0]}.{OPTIX_COMPATIBLE_VERSION[1]}.x.") cython_compile_env = { - '_OPTIX_VERSION': optix_version, '_OPTIX_VERSION_MAJOR': optix_version_major, '_OPTIX_VERSION_MINOR': optix_version_minor, '_OPTIX_VERSION_MICRO': optix_version_micro @@ -60,9 +64,9 @@ def import_module_from_path(path): libraries.append('advapi32') extensions = [Extension("*", ["optix/*.pyx"], - include_dirs=[cuda_include_path, optix_include_path], libraries=libraries)] + include_dirs=[cuda_include_path, optix_include_path, numpy.get_include()], libraries=libraries)] extensions = cythonize(extensions, language_level="3", - compile_time_env=cython_compile_env, build_dir="build") + compile_time_env=cython_compile_env, build_dir="build", annotate=True) with open("README.md", "r", encoding="utf-8") as fh: long_description = fh.read() @@ -80,6 +84,7 @@ def glob_fix(package_name, glob): return [str(path.relative_to(package_path)) for path in package_path.glob(glob)] + class custom_bdist_wheel(_bdist_wheel): def finalize_options(self): _bdist_wheel.finalize_options(self)