diff --git a/crates/bevy_core_pipeline/src/core_3d/mod.rs b/crates/bevy_core_pipeline/src/core_3d/mod.rs index 39aee1657136f6..c9db076c570f0c 100644 --- a/crates/bevy_core_pipeline/src/core_3d/mod.rs +++ b/crates/bevy_core_pipeline/src/core_3d/mod.rs @@ -16,9 +16,11 @@ pub mod graph { #[derive(Debug, Hash, PartialEq, Eq, Clone, RenderLabel)] pub enum Node3d { MsaaWriteback, + OcclusionCullingDepthPrepass, Prepass, DeferredPrepass, CopyDeferredLightingId, + EarlyDownsampleDepthBuffer, EndPrepasses, StartMainPass, MainOpaquePass, @@ -31,6 +33,7 @@ pub mod graph { Fxaa, Upscaling, ContrastAdaptiveSharpening, + LateDownsampleDepthBuffer, EndMainPassPostProcessing, } } @@ -73,6 +76,7 @@ use nonmax::NonMaxU32; use crate::{ core_3d::main_transmissive_pass_3d_node::MainTransmissivePass3dNode, + culling::HierarchicalDepthBuffer, deferred::{ copy_lighting_id::CopyDeferredLightingIdNode, node::DeferredGBufferPrepassNode, AlphaMask3dDeferred, Opaque3dDeferred, DEFERRED_LIGHTING_PASS_ID_FORMAT, @@ -495,18 +499,27 @@ pub fn extract_camera_prepass_phase( Has, Has, Has, + Has, ), With, >, >, ) { - for (entity, camera, depth_prepass, normal_prepass, motion_vector_prepass, deferred_prepass) in - cameras_3d.iter() + for ( + entity, + camera, + depth_prepass, + normal_prepass, + motion_vector_prepass, + deferred_prepass, + hierarchical_depth_buffer, + ) in cameras_3d.iter() { if camera.is_active { let mut entity = commands.get_or_spawn(entity); - if depth_prepass || normal_prepass || motion_vector_prepass { + if depth_prepass || normal_prepass || motion_vector_prepass || hierarchical_depth_buffer + { entity.insert(( BinnedRenderPhase::::default(), BinnedRenderPhase::::default(), @@ -542,7 +555,13 @@ pub fn prepare_core_3d_depth_textures( msaa: Res, render_device: Res, views_3d: Query< - (Entity, &ExtractedCamera, Option<&DepthPrepass>, &Camera3d), + ( + Entity, + &ExtractedCamera, + Has, + Has, + &Camera3d, + ), ( With>, With>, @@ -552,13 +571,17 @@ pub fn prepare_core_3d_depth_textures( >, ) { let mut render_target_usage = HashMap::default(); - for (_, camera, depth_prepass, camera_3d) in &views_3d { + for (_, camera, depth_prepass, hierarchical_depth_buffer, camera_3d) in &views_3d { // Default usage required to write to the depth texture let mut usage: TextureUsages = camera_3d.depth_texture_usages.into(); - if depth_prepass.is_some() { - // Required to read the output of the prepass + // Required to read the output of the prepass + if depth_prepass { usage |= TextureUsages::COPY_SRC; } + // Required to build a hierarchical Z-buffer + if hierarchical_depth_buffer { + usage |= TextureUsages::COPY_SRC | TextureUsages::TEXTURE_BINDING; + } render_target_usage .entry(camera.target.clone()) .and_modify(|u| *u |= usage) @@ -566,7 +589,7 @@ pub fn prepare_core_3d_depth_textures( } let mut textures = HashMap::default(); - for (entity, camera, _, camera_3d) in &views_3d { + for (entity, camera, _, _, camera_3d) in &views_3d { let Some(physical_target_size) = camera.physical_target_size else { continue; }; @@ -730,6 +753,7 @@ pub fn prepare_prepass_textures( Has, Has, Has, + Has, ), Or<( With>, @@ -744,8 +768,15 @@ pub fn prepare_prepass_textures( let mut deferred_textures = HashMap::default(); let mut deferred_lighting_id_textures = HashMap::default(); let mut motion_vectors_textures = HashMap::default(); - for (entity, camera, depth_prepass, normal_prepass, motion_vector_prepass, deferred_prepass) in - &views_3d + for ( + entity, + camera, + depth_prepass, + normal_prepass, + motion_vector_prepass, + deferred_prepass, + hierarchical_depth_buffer, + ) in &views_3d { let Some(physical_target_size) = camera.physical_target_size else { continue; @@ -757,7 +788,7 @@ pub fn prepare_prepass_textures( height: physical_target_size.y, }; - let cached_depth_texture = depth_prepass.then(|| { + let cached_depth_texture = (depth_prepass || hierarchical_depth_buffer).then(|| { depth_textures .entry(camera.target.clone()) .or_insert_with(|| { diff --git a/crates/bevy_core_pipeline/src/culling/downsample_depth.wgsl b/crates/bevy_core_pipeline/src/culling/downsample_depth.wgsl new file mode 100644 index 00000000000000..fbb70bf31679ff --- /dev/null +++ b/crates/bevy_core_pipeline/src/culling/downsample_depth.wgsl @@ -0,0 +1,16 @@ +#import bevy_core_pipeline::fullscreen_vertex_shader::FullscreenVertexOutput + +@group(0) @binding(0) var input_depth: texture_2d; +@group(0) @binding(1) var samplr: sampler; + +/// Performs a 2x2 downsample on a depth texture to generate the next mip level of a hierarchical depth buffer. + +@fragment +fn downsample_depth(in: FullscreenVertexOutput) -> @location(0) vec4 { + let depth_quad = textureGather(0, input_depth, samplr, in.uv); + let downsampled_depth = min( + min(depth_quad.x, depth_quad.y), + min(depth_quad.z, depth_quad.w), + ); + return vec4(downsampled_depth, 0.0, 0.0, 0.0); +} diff --git a/crates/bevy_core_pipeline/src/culling/mod.rs b/crates/bevy_core_pipeline/src/culling/mod.rs new file mode 100644 index 00000000000000..bac2918405f053 --- /dev/null +++ b/crates/bevy_core_pipeline/src/culling/mod.rs @@ -0,0 +1,776 @@ +//! Functionality relevant to GPU occlusion culling. +//! +//! Currently, there's no support for GPU occlusion culling in Bevy; however, +//! these functions lay the groundwork for one. + +use bevy_app::{App, Plugin}; +use bevy_asset::{load_internal_asset, Handle}; +use bevy_color::LinearRgba; +use bevy_derive::{Deref, DerefMut}; +use bevy_ecs::{ + component::Component, + entity::Entity, + query::{Has, QueryItem, With, Without}, + schedule::IntoSystemConfigs as _, + system::{lifetimeless::Read, Commands, Query, Res, ResMut, Resource}, + world::{FromWorld, World}, +}; +use bevy_math::{UVec2, Vec4Swizzles as _}; +use bevy_reflect::Reflect; +use bevy_render::{ + camera::ExtractedCamera, + extract_component::{ExtractComponent, ExtractComponentPlugin}, + render_graph::{NodeRunError, RenderGraphApp, RenderGraphContext, ViewNode, ViewNodeRunner}, + render_phase::BinnedRenderPhase, + render_resource::{ + binding_types, BindGroup, BindGroupEntries, BindGroupLayout, BindGroupLayoutEntries, + CachedRenderPipelineId, ColorTargetState, ColorWrites, CommandEncoderDescriptor, Extent3d, + FragmentState, LoadOp, MultisampleState, Operations, PipelineCache, PrimitiveState, + RenderPassColorAttachment, RenderPassDescriptor, RenderPipelineDescriptor, Sampler, + SamplerBindingType, SamplerDescriptor, Shader, ShaderStages, StoreOp, TextureAspect, + TextureDescriptor, TextureDimension, TextureFormat, TextureSampleType, TextureUsages, + TextureView, TextureViewDescriptor, TextureViewDimension, + }, + renderer::{RenderContext, RenderDevice}, + texture::{CachedTexture, TextureCache}, + view::{ExtractedView, ViewDepthTexture}, + Render, RenderApp, RenderSet, +}; +use bevy_utils::{prelude::default, previous_power_of_2}; + +use crate::{ + core_3d::graph::{Core3d, Node3d}, + fullscreen_vertex_shader, + prepass::{node::PrepassRunner, AlphaMask3dPrepass, Opaque3dPrepass}, +}; + +pub const DOWNSAMPLE_DEPTH_SHADER_HANDLE: Handle = + Handle::weak_from_u128(11295947011526841734); +pub const RESOLVE_DEPTH_SHADER_HANDLE: Handle = + Handle::weak_from_u128(15894811689345116803); + +/// Supplies functionality relating to GPU occlusion culling. +/// +/// Bevy doesn't currently support GPU occlusion culling outside of meshlets, +/// but this functionality may be useful for those wishing to implement their +/// own occlusion culling systems. +pub struct OcclusionCullingPlugin; + +impl Plugin for OcclusionCullingPlugin { + fn build(&self, app: &mut App) { + load_internal_asset!( + app, + DOWNSAMPLE_DEPTH_SHADER_HANDLE, + "downsample_depth.wgsl", + Shader::from_wgsl + ); + load_internal_asset!( + app, + RESOLVE_DEPTH_SHADER_HANDLE, + "resolve_depth.wgsl", + Shader::from_wgsl + ); + + app.add_plugins(ExtractComponentPlugin::::default()); + + let Some(render_app) = app.get_sub_app_mut(RenderApp) else { + return; + }; + + render_app + .init_resource::() + .init_resource::() + .add_systems( + Render, + ( + ( + prepare_downsample_depth_pipeline, + prepare_resolve_depth_pipeline, + ) + .in_set(RenderSet::Prepare), + prepare_culling_view_resources.in_set(RenderSet::PrepareBindGroups), + ), + ); + render_app + .add_render_graph_node::>( + Core3d, + Node3d::EarlyDownsampleDepthBuffer, + ) + .add_render_graph_node::>( + Core3d, + Node3d::LateDownsampleDepthBuffer, + ) + .add_render_graph_node::>( + Core3d, + Node3d::OcclusionCullingDepthPrepass, + ) + .add_render_graph_edges( + Core3d, + ( + Node3d::OcclusionCullingDepthPrepass, + Node3d::EarlyDownsampleDepthBuffer, + Node3d::Prepass, + ), + ) + .add_render_graph_edges( + Core3d, + ( + Node3d::EndMainPass, + Node3d::LateDownsampleDepthBuffer, + Node3d::EndMainPassPostProcessing, + ), + ); + } + + fn finish(&self, app: &mut App) { + let Some(render_app) = app.get_sub_app_mut(RenderApp) else { + return; + }; + + render_app + .init_resource::() + .init_resource::(); + } +} + +/// Place this component on a camera to request that Bevy build a hierarchical +/// depth buffer, which can be used for two-phase occlusion culling. +#[derive(Component, Reflect)] +pub struct HierarchicalDepthBuffer; + +impl ExtractComponent for HierarchicalDepthBuffer { + type QueryData = (); + + type QueryFilter = (); + + type Out = HierarchicalDepthBuffer; + + fn extract_component(_: QueryItem<'_, Self::QueryData>) -> Option { + Some(HierarchicalDepthBuffer) + } +} + +/// A render graph node for running the prepass early, before downsampling. +#[derive(Default)] +pub struct OcclusionCullingDepthPrepassNode; + +impl ViewNode for OcclusionCullingDepthPrepassNode { + type ViewQuery = ( + Read, + Read>, + Read>, + Read, + Has, + ); + + fn run<'w>( + &self, + graph: &mut RenderGraphContext, + render_context: &mut RenderContext<'w>, + ( + camera, + opaque_prepass_phase, + alpha_mask_prepass_phase, + view_depth_texture, + hierarchical_depth_buffer, + ): QueryItem<'w, Self::ViewQuery>, + world: &'w World, + ) -> Result<(), NodeRunError> { + if !hierarchical_depth_buffer { + return Ok(()); + } + + let diagnostics = render_context.diagnostic_recorder(); + let prepass_runner = PrepassRunner::new(view_depth_texture, None); + let view_entity = graph.view_entity(); + + render_context.add_command_buffer_generation_task(move |render_device| { + let mut command_encoder = + render_device.create_command_encoder(&CommandEncoderDescriptor { + label: Some("occlusion culling depth prepass command encoder"), + }); + + prepass_runner.run_prepass( + world, + &render_device, + diagnostics, + &mut command_encoder, + view_entity, + camera, + opaque_prepass_phase, + alpha_mask_prepass_phase, + "occlusion culling depth prepass", + ); + + command_encoder.finish() + }); + + Ok(()) + } +} + +struct DownsampleDebugLabels { + downsample_group: &'static str, + downsample_pass: &'static str, + resolve_pass: &'static str, +} + +/// A render graph node for generating a downsampled depth buffer. +/// +/// This pass runs right after the occlusion culling prepass, before the main +/// phase or the prepass if any. +#[derive(Default)] +pub struct EarlyDownsampleDepthBufferNode; + +impl ViewNode for EarlyDownsampleDepthBufferNode { + type ViewQuery = ( + Option>, + Has, + ); + + fn run<'w>( + &self, + _: &mut RenderGraphContext, + render_context: &mut RenderContext<'w>, + (culling_view_resources, gpu_occlusion_culling): QueryItem<'w, Self::ViewQuery>, + world: &'w World, + ) -> Result<(), NodeRunError> { + static DEBUG_LABELS: DownsampleDebugLabels = DownsampleDebugLabels { + downsample_group: "early downsample depth", + downsample_pass: "early downsample depth pass", + resolve_pass: "early resolve depth pass", + }; + + run_downsample_depth_buffer_node( + render_context, + culling_view_resources, + gpu_occlusion_culling, + world, + &DEBUG_LABELS, + ) + } +} + +/// A render graph node for generating a downsampled depth buffer. +/// +/// This pass runs at the end of the frame, in preparation for the next frame. +#[derive(Default)] +pub struct LateDownsampleDepthBufferNode; + +impl ViewNode for LateDownsampleDepthBufferNode { + type ViewQuery = ( + Option>, + Has, + ); + + fn run<'w>( + &self, + _: &mut RenderGraphContext, + render_context: &mut RenderContext<'w>, + (culling_view_resources, gpu_occlusion_culling): QueryItem<'w, Self::ViewQuery>, + world: &'w World, + ) -> Result<(), NodeRunError> { + static DEBUG_LABELS: DownsampleDebugLabels = DownsampleDebugLabels { + downsample_group: "late downsample depth", + downsample_pass: "late downsample depth pass", + resolve_pass: "late resolve depth pass", + }; + + run_downsample_depth_buffer_node( + render_context, + culling_view_resources, + gpu_occlusion_culling, + world, + &DEBUG_LABELS, + ) + } +} + +fn run_downsample_depth_buffer_node( + render_context: &mut RenderContext, + culling_view_resources: Option<&HierarchicalDepthBufferViewResources>, + gpu_occlusion_culling: bool, + world: &World, + debug_labels: &DownsampleDebugLabels, +) -> Result<(), NodeRunError> { + let pipeline_cache = world.resource::(); + if !gpu_occlusion_culling { + return Ok(()); + } + let Some(culling_view_resources) = culling_view_resources else { + return Ok(()); + }; + let (Some(downsample_depth_pipeline), Some(resolve_depth_pipeline)) = ( + **world.resource::(), + **world.resource::(), + ) else { + return Ok(()); + }; + + // If the depth buffer is multisampled, resolve it now. + if let Some(multisample_resources) = &culling_view_resources.multisample_resources { + resolve_depth_buffer( + render_context, + multisample_resources, + pipeline_cache, + resolve_depth_pipeline, + debug_labels, + ); + } + + // Downsample the depth buffer repeatedly to produce the hierarchical + // Z-buffer. + downsample_depth( + render_context, + &culling_view_resources.depth_pyramid_mips, + &culling_view_resources.downsample_depth_bind_groups, + pipeline_cache, + downsample_depth_pipeline, + debug_labels, + ); + + Ok(()) +} + +/// The [`CachedRenderPipelineId`] for the shader that downsamples the depth +/// buffer to produce a hierarchical Z-buffer. +#[derive(Resource, Deref, DerefMut, Default)] +pub struct DownsampleDepthPipelineId(Option); + +/// The [`CachedRenderPipelineId`] for the multisampled depth buffer resolution +/// shader. +#[derive(Resource, Deref, DerefMut, Default)] +pub struct ResolveDepthPipelineId(Option); + +#[derive(Resource)] +pub struct DownsampleDepthPipeline { + bind_group_layout: BindGroupLayout, + depth_pyramid_sampler: Sampler, +} + +/// Holds the bind group layout for the shader that resolves multisampled depth +/// buffers in preparation for hierarchical Z-buffer building. +#[derive(Resource)] +pub struct ResolveDepthPipeline { + /// The bind group layout for the multisampled depth buffer resolution + /// shader. + bind_group_layout: BindGroupLayout, +} + +/// A component, attached to each view in the render world that has a +/// [`HierarchicalDepthBuffer`] component, that holds the generated hierarchical +/// Z buffer for that view. +#[derive(Component)] +pub struct HierarchicalDepthBufferViewResources { + /// The actual hierarchical Z buffer. + /// + /// This is a mipmapped `R32Float` texture. + pub depth_pyramid: CachedTexture, + /// One [`TextureView`] for each mip level of the texture. + depth_pyramid_mips: Box<[TextureView]>, + /// Bind groups for each downsampling operation. + /// + /// There will be one such operation per mip level. + downsample_depth_bind_groups: Box<[BindGroup]>, + /// If the depth buffer is multisampled, holds information needed to resolve + /// it. + multisample_resources: Option, +} + +/// Information needed to resolve a multisampled depth buffer. +struct MultisampleCullingViewResources { + /// The non-multisampled texture that the multisampled depth buffer is to be + /// resolved to. + resolved_depth_texture: CachedTexture, + /// The bind group for the shader that does this resolving. + resolve_depth_bind_group: BindGroup, +} + +impl FromWorld for DownsampleDepthPipeline { + fn from_world(world: &mut World) -> Self { + let render_device = world.resource_mut::(); + + DownsampleDepthPipeline { + bind_group_layout: render_device.create_bind_group_layout( + "downsample depth bind group layout", + &BindGroupLayoutEntries::sequential( + ShaderStages::FRAGMENT, + ( + binding_types::texture_2d(TextureSampleType::Float { filterable: false }), + binding_types::sampler(SamplerBindingType::NonFiltering), + ), + ), + ), + depth_pyramid_sampler: render_device.create_sampler(&SamplerDescriptor { + label: Some("depth pyramid sampler"), + ..default() + }), + } + } +} + +impl FromWorld for ResolveDepthPipeline { + fn from_world(world: &mut World) -> Self { + let render_device = world.resource_mut::(); + + ResolveDepthPipeline { + bind_group_layout: render_device.create_bind_group_layout( + "resolve depth bind group layout", + &BindGroupLayoutEntries::single( + ShaderStages::FRAGMENT, + binding_types::texture_2d_multisampled(TextureSampleType::Float { + filterable: false, + }), + ), + ), + } + } +} + +/// Creates the pipeline needed to produce a hierarchical Z-buffer. +pub fn prepare_downsample_depth_pipeline( + pipeline_cache: ResMut, + mut downsample_depth_pipeline_id: ResMut, + downsample_depth_pipeline: Res, +) { + if downsample_depth_pipeline_id.is_some() { + return; + } + + let render_pipeline_descriptor = RenderPipelineDescriptor { + label: Some("downsample depth".into()), + layout: vec![downsample_depth_pipeline.bind_group_layout.clone()], + push_constant_ranges: vec![], + vertex: fullscreen_vertex_shader::fullscreen_shader_vertex_state(), + primitive: PrimitiveState::default(), + depth_stencil: None, + multisample: MultisampleState::default(), + fragment: Some(FragmentState { + shader: DOWNSAMPLE_DEPTH_SHADER_HANDLE, + shader_defs: vec![], + entry_point: "downsample_depth".into(), + targets: vec![Some(ColorTargetState { + format: TextureFormat::R32Float, + blend: None, + write_mask: ColorWrites::ALL, + })], + }), + }; + + **downsample_depth_pipeline_id = + Some(pipeline_cache.queue_render_pipeline(render_pipeline_descriptor)); +} + +/// Creates the pipeline that resolves multisampled depth buffers, taking the +/// minimum depth of each pixel sample. +/// +/// In theory, we could use a Vulkan 1.3 extension [1] for this, but we can't +/// rely on that being available, and it isn't exposed through `wgpu` anyway. So +/// we spin up the raster hardware and do a draw instead. +/// +/// [1]: https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkSubpassDescriptionDepthStencilResolveKHR.html +pub fn prepare_resolve_depth_pipeline( + pipeline_cache: ResMut, + mut resolve_depth_pipeline_id: ResMut, + resolve_depth_pipeline: Res, +) { + if resolve_depth_pipeline_id.is_some() { + return; + } + + let base_fragment_state = FragmentState { + shader: RESOLVE_DEPTH_SHADER_HANDLE, + shader_defs: vec![], + entry_point: "main".into(), + targets: vec![Some(ColorTargetState { + format: TextureFormat::R32Float, + blend: None, + write_mask: ColorWrites::ALL, + })], + }; + + let base_render_pipeline_descriptor = RenderPipelineDescriptor { + label: Some("resolve depth".into()), + layout: vec![], + push_constant_ranges: vec![], + vertex: fullscreen_vertex_shader::fullscreen_shader_vertex_state(), + primitive: PrimitiveState::default(), + depth_stencil: None, + multisample: MultisampleState::default(), + fragment: None, + }; + + let multisample_render_pipeline_descriptor = RenderPipelineDescriptor { + layout: vec![resolve_depth_pipeline.bind_group_layout.clone()], + fragment: Some(FragmentState { + shader_defs: vec![], + ..base_fragment_state + }), + ..base_render_pipeline_descriptor + }; + + **resolve_depth_pipeline_id = + Some(pipeline_cache.queue_render_pipeline(multisample_render_pipeline_descriptor)); +} + +/// A system that prepares the downsample and resolve pipelines for hierarchical +/// Z buffer creation. +pub fn prepare_culling_view_resources( + mut commands: Commands, + views: Query< + (Entity, &ExtractedView, &ViewDepthTexture), + ( + With, + Without, + ), + >, + render_device: ResMut, + mut texture_cache: ResMut, + downsample_depth_pipeline: Res, + resolve_depth_pipeline: Res, +) { + // We do this for each view because views may have different depth buffers. + for (view_entity, extracted_view, view_depth_texture) in views.iter() { + // Determine the size and number of mips. + let depth_size = Extent3d { + // If not a power of 2, round down to the nearest power of 2 to + // ensure depth is conservative. + width: previous_power_of_2(extracted_view.viewport.z), + height: previous_power_of_2(extracted_view.viewport.w), + depth_or_array_layers: 1, + }; + let depth_mip_count = depth_size.width.max(depth_size.height).ilog2() + 1; + + // Create the depth pyramid. + let depth_pyramid = texture_cache.get( + &render_device, + TextureDescriptor { + label: Some("depth pyramid"), + size: depth_size, + mip_level_count: depth_mip_count, + sample_count: 1, + dimension: TextureDimension::D2, + format: TextureFormat::R32Float, + usage: TextureUsages::RENDER_ATTACHMENT | TextureUsages::TEXTURE_BINDING, + view_formats: &[], + }, + ); + + // If we have a multisampled depth texture, prepare the intermediate + // buffer for resolution. + let multisample_resources = (view_depth_texture.texture.sample_count() > 1).then(|| { + prepare_multisample_culling_view_resources( + &mut texture_cache, + &render_device, + extracted_view.viewport.zw(), + &resolve_depth_pipeline, + view_depth_texture, + ) + }); + + // Create the views for the mip levels and the bind groups for each pass. + let depth_pyramid_mips = create_downsample_depth_pyramid_mips(&depth_pyramid); + let downsample_depth_bind_groups = create_downsample_depth_bind_groups( + &render_device, + &downsample_depth_pipeline, + &multisample_resources, + view_depth_texture, + &depth_pyramid_mips, + ); + + // Record the results. + commands + .entity(view_entity) + .insert(HierarchicalDepthBufferViewResources { + depth_pyramid, + depth_pyramid_mips, + downsample_depth_bind_groups, + multisample_resources, + }); + } +} + +/// Creates [`MultisampleCullingViewResources`] for a single view. +/// +/// This is only used for views that render to multisampled targets. +fn prepare_multisample_culling_view_resources( + texture_cache: &mut TextureCache, + render_device: &RenderDevice, + depth_buffer_size: UVec2, + resolve_depth_pipeline: &ResolveDepthPipeline, + view_depth_texture: &ViewDepthTexture, +) -> MultisampleCullingViewResources { + // Create the texture. + let resolved_depth_texture = texture_cache.get( + render_device, + TextureDescriptor { + label: Some("resolved depth"), + size: Extent3d { + width: depth_buffer_size.x, + height: depth_buffer_size.y, + depth_or_array_layers: 1, + }, + mip_level_count: 1, + sample_count: 1, + dimension: TextureDimension::D2, + format: TextureFormat::R32Float, + usage: TextureUsages::RENDER_ATTACHMENT | TextureUsages::TEXTURE_BINDING, + view_formats: &[], + }, + ); + + // Create the bind group. + let resolve_depth_bind_group = render_device.create_bind_group( + "resolve depth bind group", + &resolve_depth_pipeline.bind_group_layout, + &BindGroupEntries::single(view_depth_texture.view()), + ); + + MultisampleCullingViewResources { + resolved_depth_texture, + resolve_depth_bind_group, + } +} + +/// Creates the texture views for each mip level of the depth pyramid. +fn create_downsample_depth_pyramid_mips(depth_pyramid: &CachedTexture) -> Box<[TextureView]> { + (0..depth_pyramid.texture.mip_level_count()) + .map(|i| { + depth_pyramid.texture.create_view(&TextureViewDescriptor { + label: Some("depth pyramid texture view"), + format: Some(TextureFormat::R32Float), + dimension: Some(TextureViewDimension::D2), + aspect: TextureAspect::All, + base_mip_level: i, + mip_level_count: Some(1), + base_array_layer: 0, + array_layer_count: None, + }) + }) + .collect() +} + +/// Creates the bind groups for each mip level of the depth. +fn create_downsample_depth_bind_groups( + render_device: &RenderDevice, + downsample_depth_pipeline: &DownsampleDepthPipeline, + multisample_culling_resources: &Option, + view_depth_texture: &ViewDepthTexture, + depth_pyramid_mips: &[TextureView], +) -> Box<[BindGroup]> { + (0..depth_pyramid_mips.len()) + .map(|i| { + if i == 0 { + render_device.create_bind_group( + "downsample depth bind group (initial)", + &downsample_depth_pipeline.bind_group_layout, + &BindGroupEntries::sequential(( + match multisample_culling_resources { + Some(multisample_resources) => { + &multisample_resources.resolved_depth_texture.default_view + } + None => view_depth_texture.view(), + }, + &downsample_depth_pipeline.depth_pyramid_sampler, + )), + ) + } else { + render_device.create_bind_group( + "downsample depth bind group", + &downsample_depth_pipeline.bind_group_layout, + &BindGroupEntries::sequential(( + &depth_pyramid_mips[i - 1], + &downsample_depth_pipeline.depth_pyramid_sampler, + )), + ) + } + }) + .collect() +} + +fn resolve_depth_buffer( + render_context: &mut RenderContext, + multisample_culling_resources: &MultisampleCullingViewResources, + pipeline_cache: &PipelineCache, + resolve_depth_pipeline_id: CachedRenderPipelineId, + debug_labels: &DownsampleDebugLabels, +) { + let Some(resolve_depth_pipeline) = + pipeline_cache.get_render_pipeline(resolve_depth_pipeline_id) + else { + return; + }; + + let resolve_depth_pass = RenderPassDescriptor { + label: Some(debug_labels.resolve_pass), + color_attachments: &[Some(RenderPassColorAttachment { + view: &multisample_culling_resources + .resolved_depth_texture + .default_view, + resolve_target: None, + ops: Operations { + load: LoadOp::Clear(LinearRgba::BLACK.into()), + store: StoreOp::Store, + }, + })], + depth_stencil_attachment: None, + timestamp_writes: None, + occlusion_query_set: None, + }; + + { + let mut resolve_depth_pass = render_context.begin_tracked_render_pass(resolve_depth_pass); + resolve_depth_pass.set_bind_group( + 0, + &multisample_culling_resources.resolve_depth_bind_group, + &[], + ); + resolve_depth_pass.set_render_pipeline(resolve_depth_pipeline); + resolve_depth_pass.draw(0..3, 0..1); + } +} + +fn downsample_depth( + render_context: &mut RenderContext, + depth_pyramid_mips: &[TextureView], + downsample_depth_bind_groups: &[BindGroup], + pipeline_cache: &PipelineCache, + downsample_depth_pipeline_id: CachedRenderPipelineId, + debug_labels: &DownsampleDebugLabels, +) { + let Some(downsample_pipeline) = + pipeline_cache.get_render_pipeline(downsample_depth_pipeline_id) + else { + return; + }; + + render_context + .command_encoder() + .push_debug_group(debug_labels.downsample_group); + + for (depth_pyramid_mip, downsample_depth_bind_group) in + depth_pyramid_mips.iter().zip(downsample_depth_bind_groups) + { + let downsample_pass = RenderPassDescriptor { + label: Some(debug_labels.downsample_pass), + color_attachments: &[Some(RenderPassColorAttachment { + view: depth_pyramid_mip, + resolve_target: None, + ops: Operations { + load: LoadOp::Clear(LinearRgba::BLACK.into()), + store: StoreOp::Store, + }, + })], + depth_stencil_attachment: None, + timestamp_writes: None, + occlusion_query_set: None, + }; + + let mut downsample_pass = render_context.begin_tracked_render_pass(downsample_pass); + downsample_pass.set_bind_group(0, downsample_depth_bind_group, &[]); + downsample_pass.set_render_pipeline(downsample_pipeline); + downsample_pass.draw(0..3, 0..1); + } + + render_context.command_encoder().pop_debug_group(); +} diff --git a/crates/bevy_core_pipeline/src/culling/resolve_depth.wgsl b/crates/bevy_core_pipeline/src/culling/resolve_depth.wgsl new file mode 100644 index 00000000000000..1727990bb811bd --- /dev/null +++ b/crates/bevy_core_pipeline/src/culling/resolve_depth.wgsl @@ -0,0 +1,21 @@ +// Resolves a multisample depth buffer with the min operation. +// +// This is a workaround for multisample depth resolve not being available in +// `wgpu`. + +#import bevy_core_pipeline::fullscreen_vertex_shader::FullscreenVertexOutput + +@group(0) @binding(0) var input_depth: texture_multisampled_2d; + +@fragment +fn main(in: FullscreenVertexOutput) -> @location(0) vec4 { + let coords = vec2(floor(in.uv * vec2(textureDimensions(input_depth)))); + + // Take the minimum of every sample. + var depth = 1.0f / 0.0f; // infinity + for (var sample = 0; sample < i32(textureNumSamples(input_depth)); sample += 1) { + depth = min(depth, textureLoad(input_depth, coords, sample).r); + } + + return vec4(depth, 0.0, 0.0, 0.0); +} diff --git a/crates/bevy_core_pipeline/src/lib.rs b/crates/bevy_core_pipeline/src/lib.rs index 9bb44c4e33116d..f82e1605cc4b3c 100644 --- a/crates/bevy_core_pipeline/src/lib.rs +++ b/crates/bevy_core_pipeline/src/lib.rs @@ -12,6 +12,7 @@ pub mod bloom; pub mod contrast_adaptive_sharpening; pub mod core_2d; pub mod core_3d; +pub mod culling; pub mod deferred; pub mod fullscreen_vertex_shader; pub mod fxaa; @@ -50,6 +51,7 @@ use crate::{ contrast_adaptive_sharpening::CASPlugin, core_2d::Core2dPlugin, core_3d::Core3dPlugin, + culling::OcclusionCullingPlugin, deferred::copy_lighting_id::CopyDeferredLightingIdPlugin, fullscreen_vertex_shader::FULLSCREEN_SHADER_HANDLE, fxaa::FxaaPlugin, @@ -89,6 +91,7 @@ impl Plugin for CorePipelinePlugin { BloomPlugin, FxaaPlugin, CASPlugin, + OcclusionCullingPlugin, )); } } diff --git a/crates/bevy_core_pipeline/src/prepass/mod.rs b/crates/bevy_core_pipeline/src/prepass/mod.rs index 01fca93ddc2543..842cadba79db31 100644 --- a/crates/bevy_core_pipeline/src/prepass/mod.rs +++ b/crates/bevy_core_pipeline/src/prepass/mod.rs @@ -67,7 +67,9 @@ pub struct DeferredPrepass; #[derive(Component)] pub struct ViewPrepassTextures { /// The depth texture generated by the prepass. - /// Exists only if [`DepthPrepass`] is added to the [`ViewTarget`](bevy_render::view::ViewTarget) + /// Exists only if [`DepthPrepass`] or + /// [`crate::culling::HierarchicalDepthBuffer`] is added to the + /// [`ViewTarget`](bevy_render::view::ViewTarget) pub depth: Option, /// The normals texture generated by the prepass. /// Exists only if [`NormalPrepass`] is added to the [`ViewTarget`](bevy_render::view::ViewTarget) diff --git a/crates/bevy_core_pipeline/src/prepass/node.rs b/crates/bevy_core_pipeline/src/prepass/node.rs index 74de568e2bdfb7..92441db0f461b4 100644 --- a/crates/bevy_core_pipeline/src/prepass/node.rs +++ b/crates/bevy_core_pipeline/src/prepass/node.rs @@ -5,8 +5,11 @@ use bevy_render::{ diagnostic::RecordDiagnostics, render_graph::{NodeRunError, RenderGraphContext, ViewNode}, render_phase::{BinnedRenderPhase, TrackedRenderPass}, - render_resource::{CommandEncoderDescriptor, RenderPassDescriptor, StoreOp}, - renderer::RenderContext, + render_resource::{ + CommandEncoder, CommandEncoderDescriptor, RenderPassColorAttachment, + RenderPassDepthStencilAttachment, RenderPassDescriptor, StoreOp, + }, + renderer::{RenderContext, RenderDevice}, view::ViewDepthTexture, }; #[cfg(feature = "trace")] @@ -20,6 +23,14 @@ use super::{AlphaMask3dPrepass, DeferredPrepass, Opaque3dPrepass, ViewPrepassTex #[derive(Default)] pub struct PrepassNode; +/// A helper type that runs the prepass phases. +pub(crate) struct PrepassRunner<'a> { + /// The color attachment where the prepass will be rendered to. + color_attachments: Vec>>, + /// The depth/stencil attachment where the prepass will be rendered to. + depth_stencil_attachment: Option>, +} + impl ViewNode for PrepassNode { type ViewQuery = ( &'static ExtractedCamera, @@ -46,26 +57,7 @@ impl ViewNode for PrepassNode { ) -> Result<(), NodeRunError> { let diagnostics = render_context.diagnostic_recorder(); - let mut color_attachments = vec![ - view_prepass_textures - .normal - .as_ref() - .map(|normals_texture| normals_texture.get_attachment()), - view_prepass_textures - .motion_vectors - .as_ref() - .map(|motion_vectors_texture| motion_vectors_texture.get_attachment()), - // Use None in place of deferred attachments - None, - None, - ]; - - // If all color attachments are none: clear the color attachment list so that no fragment shader is required - if color_attachments.iter().all(Option::is_none) { - color_attachments.clear(); - } - - let depth_stencil_attachment = Some(view_depth_texture.get_attachment(StoreOp::Store)); + let prepass_runner = PrepassRunner::new(view_depth_texture, Some(view_prepass_textures)); let view_entity = graph.view_entity(); render_context.add_command_buffer_generation_task(move |render_device| { @@ -78,40 +70,17 @@ impl ViewNode for PrepassNode { label: Some("prepass_command_encoder"), }); - // Render pass setup - let render_pass = command_encoder.begin_render_pass(&RenderPassDescriptor { - label: Some("prepass"), - color_attachments: &color_attachments, - depth_stencil_attachment, - timestamp_writes: None, - occlusion_query_set: None, - }); - - let mut render_pass = TrackedRenderPass::new(&render_device, render_pass); - let pass_span = diagnostics.pass_span(&mut render_pass, "prepass"); - - if let Some(viewport) = camera.viewport.as_ref() { - render_pass.set_camera_viewport(viewport); - } - - // Opaque draws - if !opaque_prepass_phase.batchable_keys.is_empty() - || !opaque_prepass_phase.unbatchable_keys.is_empty() - { - #[cfg(feature = "trace")] - let _opaque_prepass_span = info_span!("opaque_prepass").entered(); - opaque_prepass_phase.render(&mut render_pass, world, view_entity); - } - - // Alpha masked draws - if !alpha_mask_prepass_phase.is_empty() { - #[cfg(feature = "trace")] - let _alpha_mask_prepass_span = info_span!("alpha_mask_prepass").entered(); - alpha_mask_prepass_phase.render(&mut render_pass, world, view_entity); - } - - pass_span.end(&mut render_pass); - drop(render_pass); + prepass_runner.run_prepass( + world, + &render_device, + diagnostics, + &mut command_encoder, + view_entity, + camera, + opaque_prepass_phase, + alpha_mask_prepass_phase, + "prepass", + ); // Copy prepass depth to the main depth texture if deferred isn't going to if deferred_prepass.is_none() { @@ -130,3 +99,85 @@ impl ViewNode for PrepassNode { Ok(()) } } + +impl<'a> PrepassRunner<'a> { + /// Creates a new [`PrepassRunner`] with the given depth texture. + pub(crate) fn new( + view_depth_texture: &'a ViewDepthTexture, + view_prepass_textures: Option<&'a ViewPrepassTextures>, + ) -> Self { + let mut color_attachments = vec![ + view_prepass_textures + .and_then(|view_prepass_textures| view_prepass_textures.normal.as_ref()) + .map(|normals_texture| normals_texture.get_attachment()), + view_prepass_textures + .and_then(|view_prepass_textures| view_prepass_textures.motion_vectors.as_ref()) + .map(|motion_vectors_texture| motion_vectors_texture.get_attachment()), + // Use None in place of deferred attachments + None, + None, + ]; + + // If all color attachments are none: clear the color attachment list so + // that no fragment shader is required + if color_attachments.iter().all(Option::is_none) { + color_attachments.clear(); + } + + let depth_stencil_attachment = Some(view_depth_texture.get_attachment(StoreOp::Store)); + + Self { + color_attachments, + depth_stencil_attachment, + } + } + + #[allow(clippy::too_many_arguments)] + pub(crate) fn run_prepass( + self, + world: &World, + render_device: &RenderDevice, + diagnostics: impl RecordDiagnostics, + command_encoder: &mut CommandEncoder, + view_entity: Entity, + camera: &ExtractedCamera, + opaque_prepass_phase: &BinnedRenderPhase, + alpha_mask_prepass_phase: &BinnedRenderPhase, + label: &'static str, + ) { + // Render pass setup + let render_pass = command_encoder.begin_render_pass(&RenderPassDescriptor { + label: Some(label), + color_attachments: &self.color_attachments, + depth_stencil_attachment: self.depth_stencil_attachment, + timestamp_writes: None, + occlusion_query_set: None, + }); + + let mut render_pass = TrackedRenderPass::new(render_device, render_pass); + let pass_span = diagnostics.pass_span(&mut render_pass, label); + + if let Some(viewport) = camera.viewport.as_ref() { + render_pass.set_camera_viewport(viewport); + } + + // Opaque draws + if !opaque_prepass_phase.batchable_keys.is_empty() + || !opaque_prepass_phase.unbatchable_keys.is_empty() + { + #[cfg(feature = "trace")] + let _opaque_prepass_span = info_span!("opaque_prepass").entered(); + opaque_prepass_phase.render(&mut render_pass, world, view_entity); + } + + // Alpha masked draws + if !alpha_mask_prepass_phase.is_empty() { + #[cfg(feature = "trace")] + let _alpha_mask_prepass_span = info_span!("alpha_mask_prepass").entered(); + alpha_mask_prepass_phase.render(&mut render_pass, world, view_entity); + } + + pass_span.end(&mut render_pass); + drop(render_pass); + } +} diff --git a/crates/bevy_pbr/src/material.rs b/crates/bevy_pbr/src/material.rs index 4951d06eecf912..6cb4d308a22845 100644 --- a/crates/bevy_pbr/src/material.rs +++ b/crates/bevy_pbr/src/material.rs @@ -10,6 +10,7 @@ use bevy_core_pipeline::{ AlphaMask3d, Camera3d, Opaque3d, Opaque3dBinKey, ScreenSpaceTransmissionQuality, Transmissive3d, Transparent3d, }, + culling::HierarchicalDepthBuffer, prepass::{ DeferredPrepass, DepthPrepass, MotionVectorPrepass, NormalPrepass, OpaqueNoLightmap3dBinKey, }, @@ -539,6 +540,7 @@ pub fn queue_material_meshes( Has, Has, Has, + Has, ), Option<&Camera3d>, Has, @@ -562,7 +564,13 @@ pub fn queue_material_meshes( dither, shadow_filter_method, ssao, - (normal_prepass, depth_prepass, motion_vector_prepass, deferred_prepass), + ( + normal_prepass, + depth_prepass, + motion_vector_prepass, + deferred_prepass, + hierarchical_depth_buffer, + ), camera_3d, temporal_jitter, projection, @@ -585,7 +593,7 @@ pub fn queue_material_meshes( view_key |= MeshPipelineKey::NORMAL_PREPASS; } - if depth_prepass { + if depth_prepass || hierarchical_depth_buffer { view_key |= MeshPipelineKey::DEPTH_PREPASS; } diff --git a/crates/bevy_pbr/src/meshlet/gpu_scene.rs b/crates/bevy_pbr/src/meshlet/gpu_scene.rs index 9944f853d3f857..e17360cfe2d07e 100644 --- a/crates/bevy_pbr/src/meshlet/gpu_scene.rs +++ b/crates/bevy_pbr/src/meshlet/gpu_scene.rs @@ -21,7 +21,7 @@ use bevy_render::{ MainWorld, }; use bevy_transform::components::GlobalTransform; -use bevy_utils::{default, HashMap, HashSet}; +use bevy_utils::{default, previous_power_of_2, HashMap, HashSet}; use encase::internal::WriteInto; use std::{ iter, @@ -965,13 +965,3 @@ pub struct MeshletViewBindGroups { pub copy_material_depth: Option, pub material_draw: Option, } - -fn previous_power_of_2(x: u32) -> u32 { - // If x is a power of 2, halve it - if x.count_ones() == 1 { - x / 2 - } else { - // Else calculate the largest power of 2 that is less than x - 1 << (31 - x.leading_zeros()) - } -} diff --git a/crates/bevy_pbr/src/prepass/mod.rs b/crates/bevy_pbr/src/prepass/mod.rs index e43ab46b05e86d..279546ff725927 100644 --- a/crates/bevy_pbr/src/prepass/mod.rs +++ b/crates/bevy_pbr/src/prepass/mod.rs @@ -1,5 +1,6 @@ mod prepass_bindings; +use bevy_core_pipeline::culling::HierarchicalDepthBuffer; use bevy_render::batching::{batch_and_prepare_binned_render_phase, sort_binned_render_phase}; use bevy_render::mesh::MeshVertexBufferLayoutRef; use bevy_render::render_resource::binding_types::uniform_buffer; @@ -727,10 +728,11 @@ pub fn queue_prepass_material_meshes( Option<&mut BinnedRenderPhase>, Option<&mut BinnedRenderPhase>, Option<&mut BinnedRenderPhase>, - Option<&DepthPrepass>, - Option<&NormalPrepass>, - Option<&MotionVectorPrepass>, + Has, + Has, + Has, Option<&DeferredPrepass>, + Has, ), Or<( With>, @@ -769,16 +771,17 @@ pub fn queue_prepass_material_meshes( normal_prepass, motion_vector_prepass, deferred_prepass, + hierarchical_depth_buffer, ) in &mut views { let mut view_key = MeshPipelineKey::from_msaa_samples(msaa.samples()); - if depth_prepass.is_some() { + if depth_prepass || hierarchical_depth_buffer { view_key |= MeshPipelineKey::DEPTH_PREPASS; } - if normal_prepass.is_some() { + if normal_prepass { view_key |= MeshPipelineKey::NORMAL_PREPASS; } - if motion_vector_prepass.is_some() { + if motion_vector_prepass { view_key |= MeshPipelineKey::MOTION_VECTOR_PREPASS; } diff --git a/crates/bevy_utils/src/lib.rs b/crates/bevy_utils/src/lib.rs index 04325b64fe8054..86e68fb51ff4f4 100644 --- a/crates/bevy_utils/src/lib.rs +++ b/crates/bevy_utils/src/lib.rs @@ -395,6 +395,17 @@ impl std::hash::Hasher for NoOpHasher { } } +/// Returns the previous power of two of a `u32`. +pub fn previous_power_of_2(x: u32) -> u32 { + // If x is a power of 2, halve it + if x.count_ones() == 1 { + x / 2 + } else { + // Else calculate the largest power of 2 that is less than x + 1 << (31 - x.leading_zeros()) + } +} + /// A type which calls a function when dropped. /// This can be used to ensure that cleanup code is run even in case of a panic. /// diff --git a/examples/3d/3d_shapes.rs b/examples/3d/3d_shapes.rs index f0aaa9265bb5ce..40dae0604b51c3 100644 --- a/examples/3d/3d_shapes.rs +++ b/examples/3d/3d_shapes.rs @@ -5,6 +5,7 @@ use std::f32::consts::PI; use bevy::{ color::palettes::basic::SILVER, + core_pipeline::culling::HierarchicalDepthBuffer, prelude::*, render::{ render_asset::RenderAssetUsages, @@ -83,10 +84,14 @@ fn setup( ..default() }); - commands.spawn(Camera3dBundle { - transform: Transform::from_xyz(0.0, 6., 12.0).looking_at(Vec3::new(0., 1., 0.), Vec3::Y), - ..default() - }); + commands + .spawn(Camera3dBundle { + transform: Transform::from_xyz(0.0, 6., 12.0) + .looking_at(Vec3::new(0., 1., 0.), Vec3::Y), + ..default() + }) + //.insert(DepthPrepass) + .insert(HierarchicalDepthBuffer); } fn rotate(mut query: Query<&mut Transform, With>, time: Res