diff --git a/crates/bevy_core_pipeline/src/core_2d/mod.rs b/crates/bevy_core_pipeline/src/core_2d/mod.rs index 48d2bb5a4f609..85f986c111dd4 100644 --- a/crates/bevy_core_pipeline/src/core_2d/mod.rs +++ b/crates/bevy_core_pipeline/src/core_2d/mod.rs @@ -38,12 +38,11 @@ use bevy_render::{ render_graph::{EmptyNode, RenderGraphApp, ViewNodeRunner}, render_phase::{ sort_phase_system, CachedRenderPipelinePhaseItem, DrawFunctionId, DrawFunctions, PhaseItem, - SortedPhaseItem, SortedRenderPhase, + PhaseItemExtraIndex, SortedPhaseItem, SortedRenderPhase, }, render_resource::CachedRenderPipelineId, Extract, ExtractSchedule, Render, RenderApp, RenderSet, }; -use nonmax::NonMaxU32; use crate::{tonemapping::TonemappingNode, upscaling::UpscalingNode}; @@ -91,7 +90,7 @@ pub struct Transparent2d { pub pipeline: CachedRenderPipelineId, pub draw_function: DrawFunctionId, pub batch_range: Range, - pub dynamic_offset: Option, + pub extra_index: PhaseItemExtraIndex, } impl PhaseItem for Transparent2d { @@ -116,13 +115,13 @@ impl PhaseItem for Transparent2d { } #[inline] - fn dynamic_offset(&self) -> Option { - self.dynamic_offset + fn extra_index(&self) -> PhaseItemExtraIndex { + self.extra_index } #[inline] - fn dynamic_offset_mut(&mut self) -> &mut Option { - &mut self.dynamic_offset + fn batch_range_and_extra_index_mut(&mut self) -> (&mut Range, &mut PhaseItemExtraIndex) { + (&mut self.batch_range, &mut self.extra_index) } } diff --git a/crates/bevy_core_pipeline/src/core_3d/mod.rs b/crates/bevy_core_pipeline/src/core_3d/mod.rs index 94bc10b4f3501..e5cd6c049173f 100644 --- a/crates/bevy_core_pipeline/src/core_3d/mod.rs +++ b/crates/bevy_core_pipeline/src/core_3d/mod.rs @@ -58,7 +58,8 @@ use bevy_render::{ render_graph::{EmptyNode, RenderGraphApp, ViewNodeRunner}, render_phase::{ sort_phase_system, BinnedPhaseItem, BinnedRenderPhase, CachedRenderPipelinePhaseItem, - DrawFunctionId, DrawFunctions, PhaseItem, SortedPhaseItem, SortedRenderPhase, + DrawFunctionId, DrawFunctions, PhaseItem, PhaseItemExtraIndex, SortedPhaseItem, + SortedRenderPhase, }, render_resource::{ BindGroupId, CachedRenderPipelineId, Extent3d, FilterMode, Sampler, SamplerDescriptor, @@ -70,7 +71,6 @@ use bevy_render::{ Extract, ExtractSchedule, Render, RenderApp, RenderSet, }; use bevy_utils::{tracing::warn, HashMap}; -use nonmax::NonMaxU32; use crate::{ core_3d::main_transmissive_pass_3d_node::MainTransmissivePass3dNode, @@ -183,8 +183,9 @@ pub struct Opaque3d { pub representative_entity: Entity, /// The ranges of instances. pub batch_range: Range, - /// The dynamic offset. - pub dynamic_offset: Option, + /// An extra index, which is either a dynamic offset or an index in the + /// indirect parameters list. + pub extra_index: PhaseItemExtraIndex, } /// Data that must be identical in order to batch meshes together. @@ -229,14 +230,12 @@ impl PhaseItem for Opaque3d { &mut self.batch_range } - #[inline] - fn dynamic_offset(&self) -> Option { - self.dynamic_offset + fn extra_index(&self) -> PhaseItemExtraIndex { + self.extra_index } - #[inline] - fn dynamic_offset_mut(&mut self) -> &mut Option { - &mut self.dynamic_offset + fn batch_range_and_extra_index_mut(&mut self) -> (&mut Range, &mut PhaseItemExtraIndex) { + (&mut self.batch_range, &mut self.extra_index) } } @@ -248,13 +247,13 @@ impl BinnedPhaseItem for Opaque3d { key: Self::BinKey, representative_entity: Entity, batch_range: Range, - dynamic_offset: Option, + extra_index: PhaseItemExtraIndex, ) -> Self { Opaque3d { key, representative_entity, batch_range, - dynamic_offset, + extra_index, } } } @@ -270,7 +269,7 @@ pub struct AlphaMask3d { pub key: OpaqueNoLightmap3dBinKey, pub representative_entity: Entity, pub batch_range: Range, - pub dynamic_offset: Option, + pub extra_index: PhaseItemExtraIndex, } impl PhaseItem for AlphaMask3d { @@ -295,13 +294,13 @@ impl PhaseItem for AlphaMask3d { } #[inline] - fn dynamic_offset(&self) -> Option { - self.dynamic_offset + fn extra_index(&self) -> PhaseItemExtraIndex { + self.extra_index } #[inline] - fn dynamic_offset_mut(&mut self) -> &mut Option { - &mut self.dynamic_offset + fn batch_range_and_extra_index_mut(&mut self) -> (&mut Range, &mut PhaseItemExtraIndex) { + (&mut self.batch_range, &mut self.extra_index) } } @@ -313,13 +312,13 @@ impl BinnedPhaseItem for AlphaMask3d { key: Self::BinKey, representative_entity: Entity, batch_range: Range, - dynamic_offset: Option, + extra_index: PhaseItemExtraIndex, ) -> Self { Self { key, representative_entity, batch_range, - dynamic_offset, + extra_index, } } } @@ -337,7 +336,7 @@ pub struct Transmissive3d { pub entity: Entity, pub draw_function: DrawFunctionId, pub batch_range: Range, - pub dynamic_offset: Option, + pub extra_index: PhaseItemExtraIndex, } impl PhaseItem for Transmissive3d { @@ -373,13 +372,13 @@ impl PhaseItem for Transmissive3d { } #[inline] - fn dynamic_offset(&self) -> Option { - self.dynamic_offset + fn extra_index(&self) -> PhaseItemExtraIndex { + self.extra_index } #[inline] - fn dynamic_offset_mut(&mut self) -> &mut Option { - &mut self.dynamic_offset + fn batch_range_and_extra_index_mut(&mut self) -> (&mut Range, &mut PhaseItemExtraIndex) { + (&mut self.batch_range, &mut self.extra_index) } } @@ -411,7 +410,7 @@ pub struct Transparent3d { pub entity: Entity, pub draw_function: DrawFunctionId, pub batch_range: Range, - pub dynamic_offset: Option, + pub extra_index: PhaseItemExtraIndex, } impl PhaseItem for Transparent3d { @@ -436,13 +435,13 @@ impl PhaseItem for Transparent3d { } #[inline] - fn dynamic_offset(&self) -> Option { - self.dynamic_offset + fn extra_index(&self) -> PhaseItemExtraIndex { + self.extra_index } #[inline] - fn dynamic_offset_mut(&mut self) -> &mut Option { - &mut self.dynamic_offset + fn batch_range_and_extra_index_mut(&mut self) -> (&mut Range, &mut PhaseItemExtraIndex) { + (&mut self.batch_range, &mut self.extra_index) } } diff --git a/crates/bevy_core_pipeline/src/deferred/mod.rs b/crates/bevy_core_pipeline/src/deferred/mod.rs index 3ccd8caad0e12..1372731820224 100644 --- a/crates/bevy_core_pipeline/src/deferred/mod.rs +++ b/crates/bevy_core_pipeline/src/deferred/mod.rs @@ -5,10 +5,12 @@ use std::ops::Range; use bevy_ecs::prelude::*; use bevy_render::{ - render_phase::{BinnedPhaseItem, CachedRenderPipelinePhaseItem, DrawFunctionId, PhaseItem}, + render_phase::{ + BinnedPhaseItem, CachedRenderPipelinePhaseItem, DrawFunctionId, PhaseItem, + PhaseItemExtraIndex, + }, render_resource::{CachedRenderPipelineId, TextureFormat}, }; -use nonmax::NonMaxU32; use crate::prepass::OpaqueNoLightmap3dBinKey; @@ -26,7 +28,7 @@ pub struct Opaque3dDeferred { pub key: OpaqueNoLightmap3dBinKey, pub representative_entity: Entity, pub batch_range: Range, - pub dynamic_offset: Option, + pub extra_index: PhaseItemExtraIndex, } impl PhaseItem for Opaque3dDeferred { @@ -51,13 +53,13 @@ impl PhaseItem for Opaque3dDeferred { } #[inline] - fn dynamic_offset(&self) -> Option { - self.dynamic_offset + fn extra_index(&self) -> PhaseItemExtraIndex { + self.extra_index } #[inline] - fn dynamic_offset_mut(&mut self) -> &mut Option { - &mut self.dynamic_offset + fn batch_range_and_extra_index_mut(&mut self) -> (&mut Range, &mut PhaseItemExtraIndex) { + (&mut self.batch_range, &mut self.extra_index) } } @@ -69,13 +71,13 @@ impl BinnedPhaseItem for Opaque3dDeferred { key: Self::BinKey, representative_entity: Entity, batch_range: Range, - dynamic_offset: Option, + extra_index: PhaseItemExtraIndex, ) -> Self { Self { key, representative_entity, batch_range, - dynamic_offset, + extra_index, } } } @@ -96,7 +98,7 @@ pub struct AlphaMask3dDeferred { pub key: OpaqueNoLightmap3dBinKey, pub representative_entity: Entity, pub batch_range: Range, - pub dynamic_offset: Option, + pub extra_index: PhaseItemExtraIndex, } impl PhaseItem for AlphaMask3dDeferred { @@ -121,13 +123,13 @@ impl PhaseItem for AlphaMask3dDeferred { } #[inline] - fn dynamic_offset(&self) -> Option { - self.dynamic_offset + fn extra_index(&self) -> PhaseItemExtraIndex { + self.extra_index } #[inline] - fn dynamic_offset_mut(&mut self) -> &mut Option { - &mut self.dynamic_offset + fn batch_range_and_extra_index_mut(&mut self) -> (&mut Range, &mut PhaseItemExtraIndex) { + (&mut self.batch_range, &mut self.extra_index) } } @@ -138,13 +140,13 @@ impl BinnedPhaseItem for AlphaMask3dDeferred { key: Self::BinKey, representative_entity: Entity, batch_range: Range, - dynamic_offset: Option, + extra_index: PhaseItemExtraIndex, ) -> Self { Self { key, representative_entity, batch_range, - dynamic_offset, + extra_index, } } } diff --git a/crates/bevy_core_pipeline/src/prepass/mod.rs b/crates/bevy_core_pipeline/src/prepass/mod.rs index 670eca9e8e2d2..88a6ac3970539 100644 --- a/crates/bevy_core_pipeline/src/prepass/mod.rs +++ b/crates/bevy_core_pipeline/src/prepass/mod.rs @@ -34,11 +34,13 @@ use bevy_ecs::prelude::*; use bevy_reflect::Reflect; use bevy_render::{ mesh::Mesh, - render_phase::{BinnedPhaseItem, CachedRenderPipelinePhaseItem, DrawFunctionId, PhaseItem}, + render_phase::{ + BinnedPhaseItem, CachedRenderPipelinePhaseItem, DrawFunctionId, PhaseItem, + PhaseItemExtraIndex, + }, render_resource::{BindGroupId, CachedRenderPipelineId, Extent3d, TextureFormat, TextureView}, texture::ColorAttachment, }; -use nonmax::NonMaxU32; pub const NORMAL_PREPASS_FORMAT: TextureFormat = TextureFormat::Rgb10a2Unorm; pub const MOTION_VECTOR_PREPASS_FORMAT: TextureFormat = TextureFormat::Rg16Float; @@ -119,7 +121,7 @@ pub struct Opaque3dPrepass { pub representative_entity: Entity, pub batch_range: Range, - pub dynamic_offset: Option, + pub extra_index: PhaseItemExtraIndex, } // TODO: Try interning these. @@ -163,13 +165,13 @@ impl PhaseItem for Opaque3dPrepass { } #[inline] - fn dynamic_offset(&self) -> Option { - self.dynamic_offset + fn extra_index(&self) -> PhaseItemExtraIndex { + self.extra_index } #[inline] - fn dynamic_offset_mut(&mut self) -> &mut Option { - &mut self.dynamic_offset + fn batch_range_and_extra_index_mut(&mut self) -> (&mut Range, &mut PhaseItemExtraIndex) { + (&mut self.batch_range, &mut self.extra_index) } } @@ -181,13 +183,13 @@ impl BinnedPhaseItem for Opaque3dPrepass { key: Self::BinKey, representative_entity: Entity, batch_range: Range, - dynamic_offset: Option, + extra_index: PhaseItemExtraIndex, ) -> Self { Opaque3dPrepass { key, representative_entity, batch_range, - dynamic_offset, + extra_index, } } } @@ -208,7 +210,7 @@ pub struct AlphaMask3dPrepass { pub key: OpaqueNoLightmap3dBinKey, pub representative_entity: Entity, pub batch_range: Range, - pub dynamic_offset: Option, + pub extra_index: PhaseItemExtraIndex, } impl PhaseItem for AlphaMask3dPrepass { @@ -233,13 +235,13 @@ impl PhaseItem for AlphaMask3dPrepass { } #[inline] - fn dynamic_offset(&self) -> Option { - self.dynamic_offset + fn extra_index(&self) -> PhaseItemExtraIndex { + self.extra_index } #[inline] - fn dynamic_offset_mut(&mut self) -> &mut Option { - &mut self.dynamic_offset + fn batch_range_and_extra_index_mut(&mut self) -> (&mut Range, &mut PhaseItemExtraIndex) { + (&mut self.batch_range, &mut self.extra_index) } } @@ -251,13 +253,13 @@ impl BinnedPhaseItem for AlphaMask3dPrepass { key: Self::BinKey, representative_entity: Entity, batch_range: Range, - dynamic_offset: Option, + extra_index: PhaseItemExtraIndex, ) -> Self { Self { key, representative_entity, batch_range, - dynamic_offset, + extra_index, } } } diff --git a/crates/bevy_gizmos/src/pipeline_2d.rs b/crates/bevy_gizmos/src/pipeline_2d.rs index 4fbf9544e22b5..660cec02c92d3 100644 --- a/crates/bevy_gizmos/src/pipeline_2d.rs +++ b/crates/bevy_gizmos/src/pipeline_2d.rs @@ -18,7 +18,9 @@ use bevy_ecs::{ use bevy_math::FloatOrd; use bevy_render::{ render_asset::{prepare_assets, RenderAssets}, - render_phase::{AddRenderCommand, DrawFunctions, SetItemPipeline, SortedRenderPhase}, + render_phase::{ + AddRenderCommand, DrawFunctions, PhaseItemExtraIndex, SetItemPipeline, SortedRenderPhase, + }, render_resource::*, texture::BevyDefault, view::{ExtractedView, Msaa, RenderLayers, ViewTarget}, @@ -293,7 +295,7 @@ fn queue_line_gizmos_2d( pipeline, sort_key: FloatOrd(f32::INFINITY), batch_range: 0..1, - dynamic_offset: None, + extra_index: PhaseItemExtraIndex::NONE, }); } } @@ -351,7 +353,7 @@ fn queue_line_joint_gizmos_2d( pipeline, sort_key: FloatOrd(f32::INFINITY), batch_range: 0..1, - dynamic_offset: None, + extra_index: PhaseItemExtraIndex::NONE, }); } } diff --git a/crates/bevy_gizmos/src/pipeline_3d.rs b/crates/bevy_gizmos/src/pipeline_3d.rs index ec9800330d1b7..e247220d541bc 100644 --- a/crates/bevy_gizmos/src/pipeline_3d.rs +++ b/crates/bevy_gizmos/src/pipeline_3d.rs @@ -22,7 +22,9 @@ use bevy_ecs::{ use bevy_pbr::{MeshPipeline, MeshPipelineKey, SetMeshViewBindGroup}; use bevy_render::{ render_asset::{prepare_assets, RenderAssets}, - render_phase::{AddRenderCommand, DrawFunctions, SetItemPipeline, SortedRenderPhase}, + render_phase::{ + AddRenderCommand, DrawFunctions, PhaseItemExtraIndex, SetItemPipeline, SortedRenderPhase, + }, render_resource::*, texture::BevyDefault, view::{ExtractedView, Msaa, RenderLayers, ViewTarget}, @@ -348,7 +350,7 @@ fn queue_line_gizmos_3d( pipeline, distance: 0., batch_range: 0..1, - dynamic_offset: None, + extra_index: PhaseItemExtraIndex::NONE, }); } } @@ -437,7 +439,7 @@ fn queue_line_joint_gizmos_3d( pipeline, distance: 0., batch_range: 0..1, - dynamic_offset: None, + extra_index: PhaseItemExtraIndex::NONE, }); } } diff --git a/crates/bevy_pbr/src/material.rs b/crates/bevy_pbr/src/material.rs index 395cb1c6e1746..1309934a1e3b0 100644 --- a/crates/bevy_pbr/src/material.rs +++ b/crates/bevy_pbr/src/material.rs @@ -710,7 +710,7 @@ pub fn queue_material_meshes( pipeline: pipeline_id, distance, batch_range: 0..1, - dynamic_offset: None, + extra_index: PhaseItemExtraIndex::NONE, }); } else if material.properties.render_method == OpaqueRendererMethod::Forward { let bin_key = Opaque3dBinKey { @@ -734,7 +734,7 @@ pub fn queue_material_meshes( pipeline: pipeline_id, distance, batch_range: 0..1, - dynamic_offset: None, + extra_index: PhaseItemExtraIndex::NONE, }); } else if material.properties.render_method == OpaqueRendererMethod::Forward { let bin_key = OpaqueNoLightmap3dBinKey { @@ -759,7 +759,7 @@ pub fn queue_material_meshes( pipeline: pipeline_id, distance, batch_range: 0..1, - dynamic_offset: None, + extra_index: PhaseItemExtraIndex::NONE, }); } } diff --git a/crates/bevy_pbr/src/render/gpu_preprocess.rs b/crates/bevy_pbr/src/render/gpu_preprocess.rs index 21eff19668a65..8e32e96678ad6 100644 --- a/crates/bevy_pbr/src/render/gpu_preprocess.rs +++ b/crates/bevy_pbr/src/render/gpu_preprocess.rs @@ -14,27 +14,35 @@ use bevy_core_pipeline::core_3d::graph::{Core3d, Node3d}; use bevy_ecs::{ component::Component, entity::Entity, - query::QueryState, + query::{Has, QueryState}, schedule::{common_conditions::resource_exists, IntoSystemConfigs as _}, system::{lifetimeless::Read, Commands, Res, ResMut, Resource}, world::{FromWorld, World}, }; use bevy_render::{ - batching::gpu_preprocessing::{self, BatchedInstanceBuffers, PreprocessWorkItem}, + batching::gpu_preprocessing::{ + BatchedInstanceBuffers, GpuPreprocessingSupport, IndirectParameters, + IndirectParametersBuffer, PreprocessWorkItem, + }, render_graph::{Node, NodeRunError, RenderGraphApp, RenderGraphContext}, render_resource::{ - binding_types::{storage_buffer, storage_buffer_read_only}, + binding_types::{storage_buffer, storage_buffer_read_only, uniform_buffer}, BindGroup, BindGroupEntries, BindGroupLayout, BindingResource, BufferBinding, CachedComputePipelineId, ComputePassDescriptor, ComputePipelineDescriptor, DynamicBindGroupLayoutEntries, PipelineCache, Shader, ShaderStages, ShaderType, SpecializedComputePipeline, SpecializedComputePipelines, }, - renderer::{RenderContext, RenderDevice}, + renderer::{RenderContext, RenderDevice, RenderQueue}, + view::{GpuCulling, ViewUniform, ViewUniformOffset, ViewUniforms}, Render, RenderApp, RenderSet, }; use bevy_utils::tracing::warn; +use bitflags::bitflags; +use smallvec::{smallvec, SmallVec}; -use crate::{graph::NodePbr, MeshInputUniform, MeshUniform}; +use crate::{ + graph::NodePbr, MeshCullingData, MeshCullingDataBuffer, MeshInputUniform, MeshUniform, +}; /// The handle to the `mesh_preprocess.wgsl` compute shader. pub const MESH_PREPROCESS_SHADER_HANDLE: Handle = @@ -57,20 +65,46 @@ pub struct GpuMeshPreprocessPlugin { /// The render node for the mesh uniform building pass. pub struct GpuPreprocessNode { - view_query: QueryState<(Entity, Read)>, + view_query: QueryState<( + Entity, + Read, + Read, + Has, + )>, } -/// The compute shader pipeline for the mesh uniform building pass. +/// The compute shader pipelines for the mesh uniform building pass. #[derive(Resource)] +pub struct PreprocessPipelines { + /// The pipeline used for CPU culling. This pipeline doesn't populate + /// indirect parameters. + pub direct: PreprocessPipeline, + /// The pipeline used for GPU culling. This pipeline populates indirect + /// parameters. + pub gpu_culling: PreprocessPipeline, +} + +/// The pipeline for the GPU mesh preprocessing shader. pub struct PreprocessPipeline { - /// The single bind group layout for the compute shader. + /// The bind group layout for the compute shader. pub bind_group_layout: BindGroupLayout, /// The pipeline ID for the compute shader. /// - /// This gets filled in in `prepare_preprocess_pipeline`. + /// This gets filled in in `prepare_preprocess_pipelines`. pub pipeline_id: Option, } +bitflags! { + /// Specifies variants of the mesh preprocessing shader. + #[derive(Clone, Copy, PartialEq, Eq, Hash)] + pub struct PreprocessPipelineKey: u8 { + /// Whether GPU culling is in use. + /// + /// This `#define`'s `GPU_CULLING` in the shader. + const GPU_CULLING = 1; + } +} + /// The compute shader bind group for the mesh uniform building pass. /// /// This goes on the view. @@ -94,9 +128,9 @@ impl Plugin for GpuMeshPreprocessPlugin { // This plugin does nothing if GPU instance buffer building isn't in // use. - let render_device = render_app.world().resource::(); + let gpu_preprocessing_support = render_app.world().resource::(); if !self.use_gpu_instance_buffer_builder - || !gpu_preprocessing::can_preprocess_on_gpu(render_device) + || *gpu_preprocessing_support == GpuPreprocessingSupport::None { return; } @@ -106,17 +140,18 @@ impl Plugin for GpuMeshPreprocessPlugin { .add_render_graph_node::(Core3d, NodePbr::GpuPreprocess) .add_render_graph_edges(Core3d, (NodePbr::GpuPreprocess, Node3d::Prepass)) .add_render_graph_edges(Core3d, (NodePbr::GpuPreprocess, NodePbr::ShadowPass)) - .init_resource::() + .init_resource::() .init_resource::>() .add_systems( Render, ( - prepare_preprocess_pipeline.in_set(RenderSet::Prepare), + prepare_preprocess_pipelines.in_set(RenderSet::Prepare), prepare_preprocess_bind_groups .run_if( resource_exists::>, ) .in_set(RenderSet::PrepareBindGroups), + write_mesh_culling_data_buffer.in_set(RenderSet::PrepareResourcesFlush), ) ); } @@ -148,18 +183,7 @@ impl Node for GpuPreprocessNode { } = world.resource::>(); let pipeline_cache = world.resource::(); - let preprocess_pipeline = world.resource::(); - - let Some(preprocess_pipeline_id) = preprocess_pipeline.pipeline_id else { - warn!("The build mesh uniforms pipeline wasn't created"); - return Ok(()); - }; - - let Some(preprocess_pipeline) = pipeline_cache.get_compute_pipeline(preprocess_pipeline_id) - else { - // This will happen while the pipeline is being compiled and is fine. - return Ok(()); - }; + let preprocess_pipelines = world.resource::(); let mut compute_pass = render_context @@ -169,13 +193,46 @@ impl Node for GpuPreprocessNode { timestamp_writes: None, }); - compute_pass.set_pipeline(preprocess_pipeline); - // Run the compute passes. - for (view, bind_group) in self.view_query.iter_manual(world) { - let index_buffer = &index_buffers[&view]; - compute_pass.set_bind_group(0, &bind_group.0, &[]); - let workgroup_count = index_buffer.len().div_ceil(WORKGROUP_SIZE); + for (view, bind_group, view_uniform_offset, gpu_culling) in + self.view_query.iter_manual(world) + { + // Grab the index buffer for this view. + let Some(index_buffer) = index_buffers.get(&view) else { + warn!("The preprocessing index buffer wasn't present"); + return Ok(()); + }; + + // Select the right pipeline, depending on whether GPU culling is in + // use. + let maybe_pipeline_id = if gpu_culling { + preprocess_pipelines.gpu_culling.pipeline_id + } else { + preprocess_pipelines.direct.pipeline_id + }; + + // Fetch the pipeline. + let Some(preprocess_pipeline_id) = maybe_pipeline_id else { + warn!("The build mesh uniforms pipeline wasn't ready"); + return Ok(()); + }; + + let Some(preprocess_pipeline) = + pipeline_cache.get_compute_pipeline(preprocess_pipeline_id) + else { + // This will happen while the pipeline is being compiled and is fine. + return Ok(()); + }; + + compute_pass.set_pipeline(preprocess_pipeline); + + let mut dynamic_offsets: SmallVec<[u32; 1]> = smallvec![]; + if gpu_culling { + dynamic_offsets.push(view_uniform_offset.offset); + } + compute_pass.set_bind_group(0, &bind_group.0, &dynamic_offsets); + + let workgroup_count = index_buffer.buffer.len().div_ceil(WORKGROUP_SIZE); compute_pass.dispatch_workgroups(workgroup_count as u32, 1, 1); } @@ -183,72 +240,149 @@ impl Node for GpuPreprocessNode { } } +impl PreprocessPipelines { + pub(crate) fn pipelines_are_loaded(&self, pipeline_cache: &PipelineCache) -> bool { + self.direct.is_loaded(pipeline_cache) && self.gpu_culling.is_loaded(pipeline_cache) + } +} + +impl PreprocessPipeline { + fn is_loaded(&self, pipeline_cache: &PipelineCache) -> bool { + self.pipeline_id + .is_some_and(|pipeline_id| pipeline_cache.get_compute_pipeline(pipeline_id).is_some()) + } +} + impl SpecializedComputePipeline for PreprocessPipeline { - type Key = (); + type Key = PreprocessPipelineKey; + + fn specialize(&self, key: Self::Key) -> ComputePipelineDescriptor { + let mut shader_defs = vec![]; + if key.contains(PreprocessPipelineKey::GPU_CULLING) { + shader_defs.push("INDIRECT".into()); + shader_defs.push("FRUSTUM_CULLING".into()); + } - fn specialize(&self, _: Self::Key) -> ComputePipelineDescriptor { ComputePipelineDescriptor { - label: Some("mesh preprocessing".into()), + label: Some( + format!( + "mesh preprocessing ({})", + if key.contains(PreprocessPipelineKey::GPU_CULLING) { + "GPU culling" + } else { + "direct" + } + ) + .into(), + ), layout: vec![self.bind_group_layout.clone()], push_constant_ranges: vec![], shader: MESH_PREPROCESS_SHADER_HANDLE, - shader_defs: vec![], + shader_defs, entry_point: "main".into(), } } } -impl FromWorld for PreprocessPipeline { +impl FromWorld for PreprocessPipelines { fn from_world(world: &mut World) -> Self { let render_device = world.resource::(); - let bind_group_layout_entries = DynamicBindGroupLayoutEntries::sequential( - ShaderStages::COMPUTE, - ( - // `current_input` - storage_buffer_read_only::(false), - // `previous_input` - storage_buffer_read_only::(false), - // `indices` - storage_buffer_read_only::(false), - // `output` - storage_buffer::(false), - ), + // GPU culling bind group parameters are a superset of those in the CPU + // culling (direct) shader. + let direct_bind_group_layout_entries = preprocess_direct_bind_group_layout_entries(); + let gpu_culling_bind_group_layout_entries = preprocess_direct_bind_group_layout_entries() + .extend_sequential(( + // `indirect_parameters` + storage_buffer::(/*has_dynamic_offset=*/ false), + // `mesh_culling_data` + storage_buffer_read_only::(/*has_dynamic_offset=*/ false), + // `view` + uniform_buffer::(/*has_dynamic_offset=*/ true), + )); + + let direct_bind_group_layout = render_device.create_bind_group_layout( + "build mesh uniforms direct bind group layout", + &direct_bind_group_layout_entries, ); - - let bind_group_layout = render_device.create_bind_group_layout( - "build mesh uniforms bind group layout", - &bind_group_layout_entries, + let gpu_culling_bind_group_layout = render_device.create_bind_group_layout( + "build mesh uniforms GPU culling bind group layout", + &gpu_culling_bind_group_layout_entries, ); - PreprocessPipeline { - bind_group_layout, - pipeline_id: None, + PreprocessPipelines { + direct: PreprocessPipeline { + bind_group_layout: direct_bind_group_layout, + pipeline_id: None, + }, + gpu_culling: PreprocessPipeline { + bind_group_layout: gpu_culling_bind_group_layout, + pipeline_id: None, + }, } } } -/// A system that specializes the `mesh_preprocess.wgsl` pipeline if necessary. -pub fn prepare_preprocess_pipeline( +fn preprocess_direct_bind_group_layout_entries() -> DynamicBindGroupLayoutEntries { + DynamicBindGroupLayoutEntries::sequential( + ShaderStages::COMPUTE, + ( + // `current_input` + storage_buffer_read_only::(false), + // `previous_input` + storage_buffer_read_only::(false), + // `indices` + storage_buffer_read_only::(false), + // `output` + storage_buffer::(false), + ), + ) +} + +/// A system that specializes the `mesh_preprocess.wgsl` pipelines if necessary. +pub fn prepare_preprocess_pipelines( pipeline_cache: Res, mut pipelines: ResMut>, - mut preprocess_pipeline: ResMut, + mut preprocess_pipelines: ResMut, ) { - if preprocess_pipeline.pipeline_id.is_some() { - return; - } + preprocess_pipelines.direct.prepare( + &pipeline_cache, + &mut pipelines, + PreprocessPipelineKey::empty(), + ); + preprocess_pipelines.gpu_culling.prepare( + &pipeline_cache, + &mut pipelines, + PreprocessPipelineKey::GPU_CULLING, + ); +} - let preprocess_pipeline_id = pipelines.specialize(&pipeline_cache, &preprocess_pipeline, ()); - preprocess_pipeline.pipeline_id = Some(preprocess_pipeline_id); +impl PreprocessPipeline { + fn prepare( + &mut self, + pipeline_cache: &PipelineCache, + pipelines: &mut SpecializedComputePipelines, + key: PreprocessPipelineKey, + ) { + if self.pipeline_id.is_some() { + return; + } + + let preprocess_pipeline_id = pipelines.specialize(pipeline_cache, self, key); + self.pipeline_id = Some(preprocess_pipeline_id); + } } -/// A system that attaches the mesh uniform buffers to the bind group for the -/// compute shader. +/// A system that attaches the mesh uniform buffers to the bind groups for the +/// variants of the mesh preprocessing compute shader. pub fn prepare_preprocess_bind_groups( mut commands: Commands, render_device: Res, batched_instance_buffers: Res>, - pipeline: Res, + indirect_parameters_buffer: Res, + mesh_culling_data_buffer: Res, + view_uniforms: Res, + pipelines: Res, ) { // Grab the `BatchedInstanceBuffers`. let BatchedInstanceBuffers { @@ -267,7 +401,7 @@ pub fn prepare_preprocess_bind_groups( }; for (view, index_buffer_vec) in index_buffers { - let Some(index_buffer) = index_buffer_vec.buffer() else { + let Some(index_buffer) = index_buffer_vec.buffer.buffer() else { continue; }; @@ -275,15 +409,27 @@ pub fn prepare_preprocess_bind_groups( // length and the underlying buffer may be longer than the actual size // of the vector. let index_buffer_size = NonZeroU64::try_from( - index_buffer_vec.len() as u64 * u64::from(PreprocessWorkItem::min_size()), + index_buffer_vec.buffer.len() as u64 * u64::from(PreprocessWorkItem::min_size()), ) .ok(); - commands - .entity(*view) - .insert(PreprocessBindGroup(render_device.create_bind_group( - "preprocess_bind_group", - &pipeline.bind_group_layout, + let bind_group = if index_buffer_vec.gpu_culling { + let ( + Some(indirect_parameters_buffer), + Some(mesh_culling_data_buffer), + Some(view_uniforms_binding), + ) = ( + indirect_parameters_buffer.buffer(), + mesh_culling_data_buffer.buffer(), + view_uniforms.uniforms.binding(), + ) + else { + continue; + }; + + PreprocessBindGroup(render_device.create_bind_group( + "preprocess_gpu_culling_bind_group", + &pipelines.gpu_culling.bind_group_layout, &BindGroupEntries::sequential(( current_input_buffer.as_entire_binding(), previous_input_buffer.as_entire_binding(), @@ -293,7 +439,38 @@ pub fn prepare_preprocess_bind_groups( size: index_buffer_size, }), data_buffer.as_entire_binding(), + indirect_parameters_buffer.as_entire_binding(), + mesh_culling_data_buffer.as_entire_binding(), + view_uniforms_binding, )), - ))); + )) + } else { + PreprocessBindGroup(render_device.create_bind_group( + "preprocess_direct_bind_group", + &pipelines.direct.bind_group_layout, + &BindGroupEntries::sequential(( + current_input_buffer.as_entire_binding(), + previous_input_buffer.as_entire_binding(), + BindingResource::Buffer(BufferBinding { + buffer: index_buffer, + offset: 0, + size: index_buffer_size, + }), + data_buffer.as_entire_binding(), + )), + )) + }; + + commands.entity(*view).insert(bind_group); } } + +/// Writes the information needed to do GPU mesh culling to the GPU. +pub fn write_mesh_culling_data_buffer( + render_device: Res, + render_queue: Res, + mut mesh_culling_data_buffer: ResMut, +) { + mesh_culling_data_buffer.write_buffer(&render_device, &render_queue); + mesh_culling_data_buffer.clear(); +} diff --git a/crates/bevy_pbr/src/render/light.rs b/crates/bevy_pbr/src/render/light.rs index e09b162197ef7..74e340c01a33e 100644 --- a/crates/bevy_pbr/src/render/light.rs +++ b/crates/bevy_pbr/src/render/light.rs @@ -22,7 +22,6 @@ use bevy_transform::{components::GlobalTransform, prelude::Transform}; #[cfg(feature = "trace")] use bevy_utils::tracing::info_span; use bevy_utils::tracing::{error, warn}; -use nonmax::NonMaxU32; use std::{hash::Hash, num::NonZeroU64, ops::Range}; use crate::*; @@ -1734,7 +1733,7 @@ pub struct Shadow { pub key: ShadowBinKey, pub representative_entity: Entity, pub batch_range: Range, - pub dynamic_offset: Option, + pub extra_index: PhaseItemExtraIndex, } #[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] @@ -1771,13 +1770,13 @@ impl PhaseItem for Shadow { } #[inline] - fn dynamic_offset(&self) -> Option { - self.dynamic_offset + fn extra_index(&self) -> PhaseItemExtraIndex { + self.extra_index } #[inline] - fn dynamic_offset_mut(&mut self) -> &mut Option { - &mut self.dynamic_offset + fn batch_range_and_extra_index_mut(&mut self) -> (&mut Range, &mut PhaseItemExtraIndex) { + (&mut self.batch_range, &mut self.extra_index) } } @@ -1789,13 +1788,13 @@ impl BinnedPhaseItem for Shadow { key: Self::BinKey, representative_entity: Entity, batch_range: Range, - dynamic_offset: Option, + extra_index: PhaseItemExtraIndex, ) -> Self { Shadow { key, representative_entity, batch_range, - dynamic_offset, + extra_index, } } } diff --git a/crates/bevy_pbr/src/render/mesh.rs b/crates/bevy_pbr/src/render/mesh.rs index 5d326e05909cc..fa3cbdee014f2 100644 --- a/crates/bevy_pbr/src/render/mesh.rs +++ b/crates/bevy_pbr/src/render/mesh.rs @@ -12,13 +12,17 @@ use bevy_ecs::{ query::ROQueryItem, system::{lifetimeless::*, SystemParamItem, SystemState}, }; -use bevy_math::{vec3, Affine3, Rect, UVec2, Vec3, Vec4}; +use bevy_math::{Affine3, Rect, UVec2, Vec3, Vec4}; use bevy_render::{ batching::{ - gpu_preprocessing, no_gpu_preprocessing, GetBatchData, GetFullBatchData, - NoAutomaticBatching, + gpu_preprocessing::{ + self, GpuPreprocessingSupport, IndirectParameters, IndirectParametersBuffer, + }, + no_gpu_preprocessing, GetBatchData, GetFullBatchData, NoAutomaticBatching, }, + camera::Camera, mesh::*, + primitives::Aabb, render_asset::RenderAssets, render_phase::{ BinnedRenderPhasePlugin, PhaseItem, RenderCommand, RenderCommandResult, @@ -27,11 +31,11 @@ use bevy_render::{ render_resource::*, renderer::{RenderDevice, RenderQueue}, texture::{BevyDefault, DefaultImageSampler, ImageSampler, TextureFormatPixelInfo}, - view::{prepare_view_targets, ViewTarget, ViewUniformOffset, ViewVisibility}, + view::{prepare_view_targets, GpuCulling, ViewTarget, ViewUniformOffset, ViewVisibility}, Extract, }; use bevy_transform::components::GlobalTransform; -use bevy_utils::{tracing::error, Entry, HashMap, Parallel}; +use bevy_utils::{tracing::error, tracing::warn, Entry, HashMap, Parallel}; #[cfg(debug_assertions)] use bevy_utils::warn_once; @@ -139,6 +143,7 @@ impl Plugin for MeshRenderPlugin { .init_resource::() .init_resource::() .init_resource::() + .init_resource::() .add_systems( ExtractSchedule, ( @@ -167,9 +172,12 @@ impl Plugin for MeshRenderPlugin { let mut mesh_bindings_shader_defs = Vec::with_capacity(1); if let Some(render_app) = app.get_sub_app_mut(RenderApp) { - let render_device = render_app.world().resource::(); + render_app.init_resource::(); + + let gpu_preprocessing_support = + render_app.world().resource::(); let use_gpu_instance_buffer_builder = self.use_gpu_instance_buffer_builder - && gpu_preprocessing::can_preprocess_on_gpu(render_device); + && *gpu_preprocessing_support != GpuPreprocessingSupport::None; let render_mesh_instances = RenderMeshInstances::new(use_gpu_instance_buffer_builder); render_app.insert_resource(render_mesh_instances); @@ -209,6 +217,8 @@ impl Plugin for MeshRenderPlugin { ); }; + let indirect_parameters_buffer = IndirectParametersBuffer::new(); + let render_device = render_app.world().resource::(); if let Some(per_object_buffer_batch_size) = GpuArrayBuffer::::batch_size(render_device) @@ -219,7 +229,9 @@ impl Plugin for MeshRenderPlugin { )); } - render_app.init_resource::(); + render_app + .insert_resource(indirect_parameters_buffer) + .init_resource::(); } // Load the mesh_bindings shader module here as it depends on runtime information about @@ -295,6 +307,29 @@ pub struct MeshInputUniform { pub previous_input_index: u32, } +/// Information about each mesh instance needed to cull it on GPU. +/// +/// This consists of its axis-aligned bounding box (AABB). +#[derive(ShaderType, Pod, Zeroable, Clone, Copy)] +#[repr(C)] +pub struct MeshCullingData { + /// The 3D center of the AABB in model space, padded with an extra unused + /// float value. + pub aabb_center: Vec4, + /// The 3D extents of the AABB in model space, divided by two, padded with + /// an extra unused float value. + pub aabb_half_extents: Vec4, +} + +/// A GPU buffer that holds the information needed to cull meshes on GPU. +/// +/// At the moment, this simply holds each mesh's AABB. +/// +/// To avoid wasting CPU time in the CPU culling case, this buffer will be empty +/// if GPU culling isn't in use. +#[derive(Resource, Deref, DerefMut)] +pub struct MeshCullingDataBuffer(BufferVec); + impl MeshUniform { pub fn new(mesh_transforms: &MeshTransforms, maybe_lightmap_uv_rect: Option) -> Self { let (inverse_transpose_model_a, inverse_transpose_model_b) = @@ -403,6 +438,57 @@ pub struct RenderMeshInstanceShared { pub flags: RenderMeshInstanceFlags, } +/// Information that is gathered during the parallel portion of mesh extraction +/// when GPU mesh uniform building is enabled. +/// +/// From this, the [`MeshInputUniform`] and [`RenderMeshInstanceGpu`] are +/// prepared. +pub struct RenderMeshInstanceGpuBuilder { + /// Data that will be placed on the [`RenderMeshInstanceGpu`]. + pub shared: RenderMeshInstanceShared, + /// The current transform. + pub transform: Affine3, + /// Four 16-bit unsigned normalized UV values packed into a [`UVec2`]: + /// + /// ```text + /// <--- MSB LSB ---> + /// +---- min v ----+ +---- min u ----+ + /// lightmap_uv_rect.x: vvvvvvvv vvvvvvvv uuuuuuuu uuuuuuuu, + /// +---- max v ----+ +---- max u ----+ + /// lightmap_uv_rect.y: VVVVVVVV VVVVVVVV UUUUUUUU UUUUUUUU, + /// + /// (MSB: most significant bit; LSB: least significant bit.) + /// ``` + pub lightmap_uv_rect: UVec2, + /// The index of the previous mesh input. + pub previous_input_index: Option, + /// Various flags. + pub mesh_flags: MeshFlags, +} + +/// The per-thread queues used during [`extract_meshes_for_gpu_building`]. +/// +/// There are two varieties of these: one for when culling happens on CPU and +/// one for when culling happens on GPU. Having the two varieties avoids wasting +/// space if GPU culling is disabled. +#[derive(Default)] +pub enum RenderMeshInstanceGpuQueue { + /// The default value. + /// + /// This becomes [`RenderMeshInstanceGpuQueue::CpuCulling`] or + /// [`RenderMeshInstanceGpuQueue::GpuCulling`] once extraction starts. + #[default] + None, + /// The version of [`RenderMeshInstanceGpuQueue`] that omits the + /// [`MeshCullingDataGpuBuilder`], so that we don't waste space when GPU + /// culling is disabled. + CpuCulling(Vec<(Entity, RenderMeshInstanceGpuBuilder)>), + /// The version of [`RenderMeshInstanceGpuQueue`] that contains the + /// [`MeshCullingDataGpuBuilder`], used when any view has GPU culling + /// enabled. + GpuCulling(Vec<(Entity, RenderMeshInstanceGpuBuilder, MeshCullingData)>), +} + impl RenderMeshInstanceShared { fn from_components( previous_transform: Option<&PreviousGlobalTransform>, @@ -494,41 +580,147 @@ impl RenderMeshInstances { } } -pub(crate) trait RenderMeshInstancesTable { - /// Returns the ID of the mesh asset attached to the given entity, if any. - fn mesh_asset_id(&self, entity: Entity) -> Option>; +impl RenderMeshInstancesCpu { + fn mesh_asset_id(&self, entity: Entity) -> Option> { + self.get(&entity) + .map(|render_mesh_instance| render_mesh_instance.mesh_asset_id) + } - /// Constructs [`RenderMeshQueueData`] for the given entity, if it has a - /// mesh attached. - fn render_mesh_queue_data(&self, entity: Entity) -> Option; + fn render_mesh_queue_data(&self, entity: Entity) -> Option { + self.get(&entity) + .map(|render_mesh_instance| RenderMeshQueueData { + shared: &render_mesh_instance.shared, + translation: render_mesh_instance.transforms.transform.translation, + }) + } } -impl RenderMeshInstancesTable for RenderMeshInstancesCpu { +impl RenderMeshInstancesGpu { fn mesh_asset_id(&self, entity: Entity) -> Option> { - self.get(&entity).map(|instance| instance.mesh_asset_id) + self.get(&entity) + .map(|render_mesh_instance| render_mesh_instance.mesh_asset_id) } fn render_mesh_queue_data(&self, entity: Entity) -> Option { - self.get(&entity).map(|instance| RenderMeshQueueData { - shared: &instance.shared, - translation: instance.transforms.transform.translation, - }) + self.get(&entity) + .map(|render_mesh_instance| RenderMeshQueueData { + shared: &render_mesh_instance.shared, + translation: render_mesh_instance.translation, + }) } } -impl RenderMeshInstancesTable for RenderMeshInstancesGpu { - /// Returns the ID of the mesh asset attached to the given entity, if any. - fn mesh_asset_id(&self, entity: Entity) -> Option> { - self.get(&entity).map(|instance| instance.mesh_asset_id) +impl RenderMeshInstanceGpuQueue { + /// Clears out a [`RenderMeshInstanceGpuQueue`], creating or recreating it + /// as necessary. + /// + /// `any_gpu_culling` should be set to true if any view has GPU culling + /// enabled. + fn init(&mut self, any_gpu_culling: bool) { + match (any_gpu_culling, &mut *self) { + (true, RenderMeshInstanceGpuQueue::GpuCulling(queue)) => queue.clear(), + (true, _) => *self = RenderMeshInstanceGpuQueue::GpuCulling(vec![]), + (false, RenderMeshInstanceGpuQueue::CpuCulling(queue)) => queue.clear(), + (false, _) => *self = RenderMeshInstanceGpuQueue::CpuCulling(vec![]), + } } - /// Constructs [`RenderMeshQueueData`] for the given entity, if it has a - /// mesh attached. - fn render_mesh_queue_data(&self, entity: Entity) -> Option { - self.get(&entity).map(|instance| RenderMeshQueueData { - shared: &instance.shared, - translation: instance.translation, - }) + /// Adds a new mesh to this queue. + fn push( + &mut self, + entity: Entity, + instance_builder: RenderMeshInstanceGpuBuilder, + culling_data_builder: Option, + ) { + match (&mut *self, culling_data_builder) { + (&mut RenderMeshInstanceGpuQueue::CpuCulling(ref mut queue), None) => { + queue.push((entity, instance_builder)); + } + ( + &mut RenderMeshInstanceGpuQueue::GpuCulling(ref mut queue), + Some(culling_data_builder), + ) => { + queue.push((entity, instance_builder, culling_data_builder)); + } + (_, None) => { + *self = RenderMeshInstanceGpuQueue::CpuCulling(vec![(entity, instance_builder)]); + } + (_, Some(culling_data_builder)) => { + *self = RenderMeshInstanceGpuQueue::GpuCulling(vec![( + entity, + instance_builder, + culling_data_builder, + )]); + } + } + } +} + +impl RenderMeshInstanceGpuBuilder { + /// Flushes this mesh instance to the [`RenderMeshInstanceGpu`] and + /// [`MeshInputUniform`] tables. + fn add_to( + self, + entity: Entity, + render_mesh_instances: &mut EntityHashMap, + current_input_buffer: &mut BufferVec, + ) -> usize { + // Push the mesh input uniform. + let current_uniform_index = current_input_buffer.push(MeshInputUniform { + transform: self.transform.to_transpose(), + lightmap_uv_rect: self.lightmap_uv_rect, + flags: self.mesh_flags.bits(), + previous_input_index: match self.previous_input_index { + Some(previous_input_index) => previous_input_index.into(), + None => u32::MAX, + }, + }); + + // Record the [`RenderMeshInstance`]. + render_mesh_instances.insert( + entity, + RenderMeshInstanceGpu { + translation: self.transform.translation, + shared: self.shared, + current_uniform_index: (current_uniform_index as u32) + .try_into() + .unwrap_or_default(), + }, + ); + + current_uniform_index + } +} + +impl MeshCullingData { + /// Returns a new [`MeshCullingData`] initialized with the given AABB. + /// + /// If no AABB is provided, an infinitely-large one is conservatively + /// chosen. + fn new(aabb: Option<&Aabb>) -> Self { + match aabb { + Some(aabb) => MeshCullingData { + aabb_center: aabb.center.extend(0.0), + aabb_half_extents: aabb.half_extents.extend(0.0), + }, + None => MeshCullingData { + aabb_center: Vec3::ZERO.extend(0.0), + aabb_half_extents: Vec3::INFINITY.extend(0.0), + }, + } + } + + /// Flushes this mesh instance culling data to the + /// [`MeshCullingDataBuffer`]. + fn add_to(&self, mesh_culling_data_buffer: &mut MeshCullingDataBuffer) -> usize { + mesh_culling_data_buffer.push(*self) + } +} + +impl Default for MeshCullingDataBuffer { + #[inline] + fn default() -> Self { + Self(BufferVec::new(BufferUsages::STORAGE)) } } @@ -625,8 +817,8 @@ pub fn extract_meshes_for_cpu_building( render_mesh_instances.clear(); for queue in render_mesh_instance_queues.iter_mut() { - for (k, v) in queue.drain(..) { - render_mesh_instances.insert_unique_unchecked(k, v); + for (entity, render_mesh_instance) in queue.drain(..) { + render_mesh_instances.insert_unique_unchecked(entity, render_mesh_instance); } } } @@ -641,9 +833,8 @@ pub fn extract_meshes_for_gpu_building( mut batched_instance_buffers: ResMut< gpu_preprocessing::BatchedInstanceBuffers, >, - mut render_mesh_instance_queues: Local< - Parallel>, - >, + mut mesh_culling_data_buffer: ResMut, + mut render_mesh_instance_queues: Local>, meshes_query: Extract< Query<( Entity, @@ -651,6 +842,7 @@ pub fn extract_meshes_for_gpu_building( &GlobalTransform, Option<&PreviousGlobalTransform>, Option<&Lightmap>, + Option<&Aabb>, &Handle, Has, Has, @@ -658,25 +850,22 @@ pub fn extract_meshes_for_gpu_building( Has, )>, >, + cameras_query: Extract, With)>>, ) { + let any_gpu_culling = !cameras_query.is_empty(); + for render_mesh_instance_queue in render_mesh_instance_queues.iter_mut() { + render_mesh_instance_queue.init(any_gpu_culling); + } + // Collect render mesh instances. Build up the uniform buffer. let RenderMeshInstances::GpuBuilding(ref mut render_mesh_instances) = *render_mesh_instances else { panic!( - "`collect_render_mesh_instances_for_gpu_building` should only be called if we're \ - using GPU `MeshUniform` building" + "`extract_meshes_for_gpu_building` should only be called if we're \ + using GPU `MeshUniform` building" ); }; - let gpu_preprocessing::BatchedInstanceBuffers { - ref mut current_input_buffer, - ref mut previous_input_buffer, - .. - } = *batched_instance_buffers; - - // Swap buffers. - mem::swap(current_input_buffer, previous_input_buffer); - meshes_query.par_iter().for_each_init( || render_mesh_instance_queues.borrow_local_mut(), |queue, @@ -686,6 +875,7 @@ pub fn extract_meshes_for_gpu_building( transform, previous_transform, lightmap, + aabb, handle, not_shadow_receiver, transmitted_receiver, @@ -706,54 +896,92 @@ pub fn extract_meshes_for_gpu_building( no_automatic_batching, ); - let previous_input_index = shared - .flags - .contains(RenderMeshInstanceFlags::HAVE_PREVIOUS_TRANSFORM) - .then(|| { - render_mesh_instances - .get(&entity) - .map(|render_mesh_instance| { - render_mesh_instance.current_uniform_index.into() - }) - .unwrap_or(u32::MAX) - }) - .unwrap_or(u32::MAX); - let lightmap_uv_rect = lightmap::pack_lightmap_uv_rect(lightmap.map(|lightmap| lightmap.uv_rect)); - let affine3: Affine3 = (&transform.affine()).into(); - queue.push(( - entity, + let gpu_mesh_culling_data = any_gpu_culling.then(|| MeshCullingData::new(aabb)); + + let previous_input_index = if shared + .flags + .contains(RenderMeshInstanceFlags::HAVE_PREVIOUS_TRANSFORM) + { + render_mesh_instances + .get(&entity) + .map(|render_mesh_instance| render_mesh_instance.current_uniform_index) + } else { + None + }; + + let gpu_mesh_instance_builder = RenderMeshInstanceGpuBuilder { shared, - MeshInputUniform { - flags: mesh_flags.bits(), - lightmap_uv_rect, - transform: affine3.to_transpose(), - previous_input_index, - }, - )); + transform: (&transform.affine()).into(), + lightmap_uv_rect, + mesh_flags, + previous_input_index, + }; + + queue.push(entity, gpu_mesh_instance_builder, gpu_mesh_culling_data); }, ); + collect_meshes_for_gpu_building( + render_mesh_instances, + &mut batched_instance_buffers, + &mut mesh_culling_data_buffer, + &mut render_mesh_instance_queues, + ); +} + +/// Creates the [`RenderMeshInstanceGpu`]s and [`MeshInputUniform`]s when GPU +/// mesh uniforms are built. +fn collect_meshes_for_gpu_building( + render_mesh_instances: &mut RenderMeshInstancesGpu, + batched_instance_buffers: &mut gpu_preprocessing::BatchedInstanceBuffers< + MeshUniform, + MeshInputUniform, + >, + mesh_culling_data_buffer: &mut MeshCullingDataBuffer, + render_mesh_instance_queues: &mut Parallel, +) { + // Collect render mesh instances. Build up the uniform buffer. + + let gpu_preprocessing::BatchedInstanceBuffers { + ref mut current_input_buffer, + ref mut previous_input_buffer, + .. + } = batched_instance_buffers; + + // Swap buffers. + mem::swap(current_input_buffer, previous_input_buffer); + // Build the [`RenderMeshInstance`]s and [`MeshInputUniform`]s. render_mesh_instances.clear(); + for queue in render_mesh_instance_queues.iter_mut() { - for (entity, shared, mesh_uniform) in queue.drain(..) { - let buffer_index = current_input_buffer.push(mesh_uniform); - let translation = vec3( - mesh_uniform.transform[0].w, - mesh_uniform.transform[1].w, - mesh_uniform.transform[2].w, - ); - render_mesh_instances.insert_unique_unchecked( - entity, - RenderMeshInstanceGpu { - shared, - translation, - current_uniform_index: NonMaxU32::new(buffer_index as u32).unwrap_or_default(), - }, - ); + match *queue { + RenderMeshInstanceGpuQueue::None => { + // This can only happen if the queue is empty. + } + RenderMeshInstanceGpuQueue::CpuCulling(ref mut queue) => { + for (entity, mesh_instance_builder) in queue.drain(..) { + mesh_instance_builder.add_to( + entity, + render_mesh_instances, + current_input_buffer, + ); + } + } + RenderMeshInstanceGpuQueue::GpuCulling(ref mut queue) => { + for (entity, mesh_instance_builder, mesh_culling_builder) in queue.drain(..) { + let instance_data_index = mesh_instance_builder.add_to( + entity, + render_mesh_instances, + current_input_buffer, + ); + let culling_data_index = mesh_culling_builder.add_to(mesh_culling_data_buffer); + debug_assert_eq!(instance_data_index, culling_data_index); + } + } } } } @@ -876,7 +1104,11 @@ impl MeshPipeline { } impl GetBatchData for MeshPipeline { - type Param = (SRes, SRes); + type Param = ( + SRes, + SRes, + SRes>, + ); // The material bind group ID, the mesh ID, and the lightmap ID, // respectively. type CompareData = (MaterialBindGroupId, AssetId, Option>); @@ -884,7 +1116,7 @@ impl GetBatchData for MeshPipeline { type BufferData = MeshUniform; fn get_batch_data( - (mesh_instances, lightmaps): &SystemParamItem, + (mesh_instances, lightmaps, _): &SystemParamItem, entity: Entity, ) -> Option<(Self::BufferData, Option)> { let RenderMeshInstances::CpuBuilding(ref mesh_instances) = **mesh_instances else { @@ -915,7 +1147,7 @@ impl GetFullBatchData for MeshPipeline { type BufferInputData = MeshInputUniform; fn get_index_and_compare_data( - (mesh_instances, lightmaps): &SystemParamItem, + (mesh_instances, lightmaps, _): &SystemParamItem, entity: Entity, ) -> Option<(NonMaxU32, Option)> { // This should only be called during GPU building. @@ -941,7 +1173,7 @@ impl GetFullBatchData for MeshPipeline { } fn get_binned_batch_data( - (mesh_instances, lightmaps): &SystemParamItem, + (mesh_instances, lightmaps, _): &SystemParamItem, entity: Entity, ) -> Option { let RenderMeshInstances::CpuBuilding(ref mesh_instances) = **mesh_instances else { @@ -960,7 +1192,7 @@ impl GetFullBatchData for MeshPipeline { } fn get_binned_index( - (mesh_instances, _): &SystemParamItem, + (mesh_instances, _, _): &SystemParamItem, entity: Entity, ) -> Option { // This should only be called during GPU building. @@ -976,6 +1208,70 @@ impl GetFullBatchData for MeshPipeline { .get(&entity) .map(|entity| entity.current_uniform_index) } + + fn get_batch_indirect_parameters_index( + (mesh_instances, _, meshes): &SystemParamItem, + indirect_parameters_buffer: &mut IndirectParametersBuffer, + entity: Entity, + instance_index: u32, + ) -> Option { + get_batch_indirect_parameters_index( + mesh_instances, + meshes, + indirect_parameters_buffer, + entity, + instance_index, + ) + } +} + +/// Pushes a set of [`IndirectParameters`] onto the [`IndirectParametersBuffer`] +/// for the given mesh instance, and returns the index of those indirect +/// parameters. +fn get_batch_indirect_parameters_index( + mesh_instances: &RenderMeshInstances, + meshes: &RenderAssets, + indirect_parameters_buffer: &mut IndirectParametersBuffer, + entity: Entity, + instance_index: u32, +) -> Option { + // This should only be called during GPU building. + let RenderMeshInstances::GpuBuilding(ref mesh_instances) = *mesh_instances else { + error!( + "`get_batch_indirect_parameters_index` should never be called in CPU mesh uniform \ + building mode" + ); + return None; + }; + + let mesh_instance = mesh_instances.get(&entity)?; + let mesh = meshes.get(mesh_instance.mesh_asset_id)?; + + // Note that `IndirectParameters` covers both of these structures, even + // though they actually have distinct layouts. See the comment above that + // type for more information. + let indirect_parameters = match mesh.buffer_info { + GpuBufferInfo::Indexed { + count: index_count, .. + } => IndirectParameters { + vertex_or_index_count: index_count, + instance_count: 0, + first_vertex: 0, + base_vertex_or_first_instance: 0, + first_instance: instance_index, + }, + GpuBufferInfo::NonIndexed => IndirectParameters { + vertex_or_index_count: mesh.vertex_count, + instance_count: 0, + first_vertex: 0, + base_vertex_or_first_instance: instance_index, + first_instance: instance_index, + }, + }; + + (indirect_parameters_buffer.push(indirect_parameters) as u32) + .try_into() + .ok() } bitflags::bitflags! { @@ -1666,7 +1962,7 @@ impl RenderCommand

for SetMeshBindGroup { let mut dynamic_offsets: [u32; 3] = Default::default(); let mut offset_count = 0; - if let Some(dynamic_offset) = item.dynamic_offset() { + if let Some(dynamic_offset) = item.extra_index().as_dynamic_offset() { dynamic_offsets[offset_count] = dynamic_offset.get(); offset_count += 1; } @@ -1689,8 +1985,9 @@ impl RenderCommand

for DrawMesh { type Param = ( SRes>, SRes, + SRes, SRes, - Option>, + Option>, ); type ViewQuery = Has; type ItemQuery = (); @@ -1699,25 +1996,15 @@ impl RenderCommand

for DrawMesh { item: &P, has_preprocess_bind_group: ROQueryItem, _item_query: Option<()>, - (meshes, mesh_instances, pipeline_cache, preprocess_pipeline): SystemParamItem< - 'w, - '_, - Self::Param, - >, + (meshes, mesh_instances, indirect_parameters_buffer, pipeline_cache, preprocess_pipelines): SystemParamItem<'w, '_, Self::Param>, pass: &mut TrackedRenderPass<'w>, ) -> RenderCommandResult { // If we're using GPU preprocessing, then we're dependent on that // compute shader having been run, which of course can only happen if // it's compiled. Otherwise, our mesh instance data won't be present. - if let Some(preprocess_pipeline) = preprocess_pipeline { + if let Some(preprocess_pipelines) = preprocess_pipelines { if !has_preprocess_bind_group - || !preprocess_pipeline - .pipeline_id - .is_some_and(|preprocess_pipeline_id| { - pipeline_cache - .get_compute_pipeline(preprocess_pipeline_id) - .is_some() - }) + || !preprocess_pipelines.pipelines_are_loaded(&pipeline_cache) { return RenderCommandResult::Failure; } @@ -1725,6 +2012,7 @@ impl RenderCommand

for DrawMesh { let meshes = meshes.into_inner(); let mesh_instances = mesh_instances.into_inner(); + let indirect_parameters_buffer = indirect_parameters_buffer.into_inner(); let Some(mesh_asset_id) = mesh_instances.mesh_asset_id(item.entity()) else { return RenderCommandResult::Failure; @@ -1733,9 +2021,26 @@ impl RenderCommand

for DrawMesh { return RenderCommandResult::Failure; }; + // Calculate the indirect offset, and look up the buffer. + let indirect_parameters = match item.extra_index().as_indirect_parameters_index() { + None => None, + Some(index) => match indirect_parameters_buffer.buffer() { + None => { + warn!("Not rendering mesh because indirect parameters buffer wasn't present"); + return RenderCommandResult::Failure; + } + Some(buffer) => Some(( + index as u64 * mem::size_of::() as u64, + buffer, + )), + }, + }; + pass.set_vertex_buffer(0, gpu_mesh.vertex_buffer.slice(..)); let batch_range = item.batch_range(); + + // Draw either directly or indirectly, as appropriate. match &gpu_mesh.buffer_info { GpuBufferInfo::Indexed { buffer, @@ -1743,11 +2048,25 @@ impl RenderCommand

for DrawMesh { count, } => { pass.set_index_buffer(buffer.slice(..), 0, *index_format); - pass.draw_indexed(0..*count, 0, batch_range.clone()); - } - GpuBufferInfo::NonIndexed => { - pass.draw(0..gpu_mesh.vertex_count, batch_range.clone()); + match indirect_parameters { + None => { + pass.draw_indexed(0..*count, 0, batch_range.clone()); + } + Some((indirect_parameters_offset, indirect_parameters_buffer)) => pass + .draw_indexed_indirect( + indirect_parameters_buffer, + indirect_parameters_offset, + ), + } } + GpuBufferInfo::NonIndexed => match indirect_parameters { + None => { + pass.draw(0..gpu_mesh.vertex_count, batch_range.clone()); + } + Some((indirect_parameters_offset, indirect_parameters_buffer)) => { + pass.draw_indirect(indirect_parameters_buffer, indirect_parameters_offset); + } + }, } RenderCommandResult::Success } diff --git a/crates/bevy_pbr/src/render/mesh_preprocess.wgsl b/crates/bevy_pbr/src/render/mesh_preprocess.wgsl index c4adaa5105623..614c9091a6e2a 100644 --- a/crates/bevy_pbr/src/render/mesh_preprocess.wgsl +++ b/crates/bevy_pbr/src/render/mesh_preprocess.wgsl @@ -9,6 +9,7 @@ #import bevy_pbr::mesh_types::Mesh #import bevy_render::maths +#import bevy_render::view::View // Per-frame data that the CPU supplies to the GPU. struct MeshInput { @@ -23,15 +24,45 @@ struct MeshInput { previous_input_index: u32, } +// Information about each mesh instance needed to cull it on GPU. +// +// At the moment, this just consists of its axis-aligned bounding box (AABB). +struct MeshCullingData { + // The 3D center of the AABB in model space, padded with an extra unused + // float value. + aabb_center: vec4, + // The 3D extents of the AABB in model space, divided by two, padded with + // an extra unused float value. + aabb_half_extents: vec4, +} + // One invocation of this compute shader: i.e. one mesh instance in a view. struct PreprocessWorkItem { // The index of the `MeshInput` in the `current_input` buffer that we read // from. input_index: u32, - // The index of the `Mesh` in `output` that we write to. + // In direct mode, the index of the `Mesh` in `output` that we write to. In + // indirect mode, the index of the `IndirectParameters` in + // `indirect_parameters` that we write to. output_index: u32, } +// The `wgpu` indirect parameters structure. This is a union of two structures. +// For more information, see the corresponding comment in +// `gpu_preprocessing.rs`. +struct IndirectParameters { + // `vertex_count` or `index_count`. + data0: u32, + // `instance_count` in both structures. + instance_count: atomic, + // `first_vertex` in both structures. + first_vertex: u32, + // `first_instance` or `base_vertex`. + data1: u32, + // A read-only copy of `instance_index`. + instance_index: u32, +} + // The current frame's `MeshInput`. @group(0) @binding(0) var current_input: array; // The `MeshInput` values from the previous frame. @@ -43,20 +74,82 @@ struct PreprocessWorkItem { // The output array of `Mesh`es. @group(0) @binding(3) var output: array; +#ifdef INDIRECT +// The array of indirect parameters for drawcalls. +@group(0) @binding(4) var indirect_parameters: array; +#endif + +#ifdef FRUSTUM_CULLING +// Data needed to cull the meshes. +// +// At the moment, this consists only of AABBs. +@group(0) @binding(5) var mesh_culling_data: array; + +// The view data, including the view matrix. +@group(0) @binding(6) var view: View; + +// Returns true if the view frustum intersects an oriented bounding box (OBB). +// +// `aabb_center.w` should be 1.0. +fn view_frustum_intersects_obb( + model: mat4x4, + aabb_center: vec4, + aabb_half_extents: vec3, +) -> bool { + + for (var i = 0; i < 5; i += 1) { + // Calculate relative radius of the sphere associated with this plane. + let plane_normal = view.frustum[i]; + let relative_radius = dot( + abs( + vec3( + dot(plane_normal, model[0]), + dot(plane_normal, model[1]), + dot(plane_normal, model[2]), + ) + ), + aabb_half_extents + ); + + // Check the frustum plane. + if (!maths::sphere_intersects_plane_half_space( + plane_normal, aabb_center, relative_radius)) { + return false; + } + } + + return true; +} +#endif + @compute @workgroup_size(64) fn main(@builtin(global_invocation_id) global_invocation_id: vec3) { + // Figure out our instance index. If this thread doesn't correspond to any + // index, bail. let instance_index = global_invocation_id.x; if (instance_index >= arrayLength(&work_items)) { return; } // Unpack. - let mesh_index = work_items[instance_index].input_index; + let input_index = work_items[instance_index].input_index; let output_index = work_items[instance_index].output_index; - let model_affine_transpose = current_input[mesh_index].model; + let model_affine_transpose = current_input[input_index].model; let model = maths::affine3_to_square(model_affine_transpose); + // Cull if necessary. +#ifdef FRUSTUM_CULLING + let aabb_center = mesh_culling_data[input_index].aabb_center.xyz; + let aabb_half_extents = mesh_culling_data[input_index].aabb_half_extents.xyz; + + // Do an OBB-based frustum cull. + let model_center = model * vec4(aabb_center, 1.0); + if (!view_frustum_intersects_obb(model, model_center, aabb_half_extents)) { + return; + } +#endif + // Calculate inverse transpose. let inverse_transpose_model = transpose(maths::inverse_affine3(transpose( model_affine_transpose))); @@ -68,7 +161,7 @@ fn main(@builtin(global_invocation_id) global_invocation_id: vec3) { let inverse_transpose_model_b = inverse_transpose_model[2].z; // Look up the previous model matrix. - let previous_input_index = current_input[mesh_index].previous_input_index; + let previous_input_index = current_input[input_index].previous_input_index; var previous_model: mat3x4; if (previous_input_index == 0xffffffff) { previous_model = model_affine_transpose; @@ -76,11 +169,21 @@ fn main(@builtin(global_invocation_id) global_invocation_id: vec3) { previous_model = previous_input[previous_input_index].model; } + // Figure out the output index. In indirect mode, this involves bumping the + // instance index in the indirect parameters structure. Otherwise, this + // index was directly supplied to us. +#ifdef INDIRECT + let mesh_output_index = indirect_parameters[output_index].instance_index + + atomicAdd(&indirect_parameters[output_index].instance_count, 1u); +#else + let mesh_output_index = output_index; +#endif + // Write the output. - output[output_index].model = model_affine_transpose; - output[output_index].previous_model = previous_model; - output[output_index].inverse_transpose_model_a = inverse_transpose_model_a; - output[output_index].inverse_transpose_model_b = inverse_transpose_model_b; - output[output_index].flags = current_input[mesh_index].flags; - output[output_index].lightmap_uv_rect = current_input[mesh_index].lightmap_uv_rect; + output[mesh_output_index].model = model_affine_transpose; + output[mesh_output_index].previous_model = previous_model; + output[mesh_output_index].inverse_transpose_model_a = inverse_transpose_model_a; + output[mesh_output_index].inverse_transpose_model_b = inverse_transpose_model_b; + output[mesh_output_index].flags = current_input[input_index].flags; + output[mesh_output_index].lightmap_uv_rect = current_input[input_index].lightmap_uv_rect; } diff --git a/crates/bevy_render/src/batching/gpu_preprocessing.rs b/crates/bevy_render/src/batching/gpu_preprocessing.rs index 22bebd8bf94e9..661ff7f301ed9 100644 --- a/crates/bevy_render/src/batching/gpu_preprocessing.rs +++ b/crates/bevy_render/src/batching/gpu_preprocessing.rs @@ -1,29 +1,74 @@ //! Batching functionality when GPU preprocessing is in use. -use std::marker::PhantomData; - +use bevy_app::{App, Plugin}; +use bevy_derive::{Deref, DerefMut}; use bevy_ecs::{ entity::Entity, - query::With, + query::{Has, With}, + schedule::IntoSystemConfigs as _, system::{Query, Res, ResMut, Resource, StaticSystemParam}, + world::{FromWorld, World}, }; use bevy_encase_derive::ShaderType; use bevy_utils::EntityHashMap; use bytemuck::{Pod, Zeroable}; +use nonmax::NonMaxU32; use smallvec::smallvec; -use wgpu::{BindingResource, BufferUsages}; +use wgpu::{BindingResource, BufferUsages, DownlevelFlags, Features}; use crate::{ render_phase::{ BinnedPhaseItem, BinnedRenderPhase, BinnedRenderPhaseBatch, CachedRenderPipelinePhaseItem, - SortedPhaseItem, SortedRenderPhase, + PhaseItemExtraIndex, SortedPhaseItem, SortedRenderPhase, UnbatchableBinnedEntityIndices, }, - render_resource::{BufferVec, GpuArrayBufferIndex, GpuArrayBufferable, UninitBufferVec}, - renderer::{RenderDevice, RenderQueue}, - view::ViewTarget, + render_resource::{BufferVec, GpuArrayBufferable, UninitBufferVec}, + renderer::{RenderAdapter, RenderDevice, RenderQueue}, + view::{GpuCulling, ViewTarget}, + Render, RenderApp, RenderSet, }; -use super::GetFullBatchData; +use super::{BatchMeta, GetBatchData, GetFullBatchData}; + +pub struct BatchingPlugin; + +impl Plugin for BatchingPlugin { + fn build(&self, app: &mut App) { + let Some(render_app) = app.get_sub_app_mut(RenderApp) else { + return; + }; + + render_app.add_systems( + Render, + write_indirect_parameters_buffer.in_set(RenderSet::PrepareResourcesFlush), + ); + } + + fn finish(&self, app: &mut App) { + let Some(render_app) = app.get_sub_app_mut(RenderApp) else { + return; + }; + + render_app.init_resource::(); + } +} + +/// Records whether GPU preprocessing and/or GPU culling are supported on the +/// device. +/// +/// No GPU preprocessing is supported on WebGL because of the lack of compute +/// shader support. GPU preprocessing is supported on DirectX 12, but due to [a +/// `wgpu` limitation] GPU culling is not. +/// +/// [a `wgpu` limitation]: https://github.com/gfx-rs/wgpu/issues/2471 +#[derive(Clone, Copy, PartialEq, Resource)] +pub enum GpuPreprocessingSupport { + /// No GPU preprocessing support is available at all. + None, + /// GPU preprocessing is available, but GPU culling isn't. + PreprocessingOnly, + /// Both GPU preprocessing and GPU culling are available. + Culling, +} /// The GPU buffers holding the data needed to render batches. /// @@ -51,7 +96,7 @@ where /// corresponds to each instance. /// /// This is keyed off each view. Each view has a separate buffer. - pub work_item_buffers: EntityHashMap>, + pub work_item_buffers: EntityHashMap, /// The uniform data inputs for the current frame. /// @@ -68,6 +113,14 @@ where pub previous_input_buffer: BufferVec, } +/// The buffer of GPU preprocessing work items for a single view. +pub struct PreprocessWorkItemBuffer { + /// The buffer of work items. + pub buffer: BufferVec, + /// True if we're using GPU culling. + pub gpu_culling: bool, +} + /// One invocation of the preprocessing shader: i.e. one mesh instance in a /// view. #[derive(Clone, Copy, Pod, Zeroable, ShaderType)] @@ -76,10 +129,114 @@ pub struct PreprocessWorkItem { /// The index of the batch input data in the input buffer that the shader /// reads from. pub input_index: u32, - /// The index of the `MeshUniform` in the output buffer that we write to. + /// In direct mode, this is the index of the `MeshUniform` in the output + /// buffer that we write to. In indirect mode, this is the index of the + /// [`IndirectParameters`]. pub output_index: u32, } +/// The `wgpu` indirect parameters structure. +/// +/// This is actually a union of the two following structures: +/// +/// ``` +/// #[repr(C)] +/// struct ArrayIndirectParameters { +/// vertex_count: u32, +/// instance_count: u32, +/// first_vertex: u32, +/// first_instance: u32, +/// } +/// +/// #[repr(C)] +/// struct ElementIndirectParameters { +/// index_count: u32, +/// instance_count: u32, +/// first_vertex: u32, +/// base_vertex: u32, +/// first_instance: u32, +/// } +/// ``` +/// +/// We actually generally treat these two variants identically in code. To do +/// that, we make the following two observations: +/// +/// 1. `instance_count` is in the same place in both structures. So we can +/// access it regardless of the structure we're looking at. +/// +/// 2. The second structure is one word larger than the first. Thus we need to +/// pad out the first structure by one word in order to place both structures in +/// an array. If we pad out `ArrayIndirectParameters` by copying the +/// `first_instance` field into the padding, then the resulting union structure +/// will always have a read-only copy of `first_instance` in the final word. We +/// take advantage of this in the shader to reduce branching. +#[derive(Clone, Copy, Pod, Zeroable, ShaderType)] +#[repr(C)] +pub struct IndirectParameters { + /// For `ArrayIndirectParameters`, `vertex_count`; for + /// `ElementIndirectParameters`, `index_count`. + pub vertex_or_index_count: u32, + + /// The number of instances we're going to draw. + /// + /// This field is in the same place in both structures. + pub instance_count: u32, + + /// The index of the first vertex we're to draw. + pub first_vertex: u32, + + /// For `ArrayIndirectParameters`, `first_instance`; for + /// `ElementIndirectParameters`, `base_vertex`.` + pub base_vertex_or_first_instance: u32, + + /// For `ArrayIndirectParameters`, this is padding; for + /// `ElementIndirectParameters`, this is `first_instance`. + /// + /// Conventionally, we copy `first_instance` into this field when padding + /// out `ArrayIndirectParameters`. That way, shader code can read this value + /// at the same place, regardless of the specific structure this represents. + pub first_instance: u32, +} + +/// The buffer containing the list of [`IndirectParameters`], for draw commands. +#[derive(Resource, Deref, DerefMut)] +pub struct IndirectParametersBuffer(pub BufferVec); + +impl IndirectParametersBuffer { + /// Creates the indirect parameters buffer. + pub fn new() -> IndirectParametersBuffer { + IndirectParametersBuffer(BufferVec::new( + BufferUsages::STORAGE | BufferUsages::INDIRECT, + )) + } +} + +impl Default for IndirectParametersBuffer { + fn default() -> Self { + Self::new() + } +} + +impl FromWorld for GpuPreprocessingSupport { + fn from_world(world: &mut World) -> Self { + let adapter = world.resource::(); + let device = world.resource::(); + + if device.limits().max_compute_workgroup_size_x == 0 { + GpuPreprocessingSupport::None + } else if !device + .features() + .contains(Features::INDIRECT_FIRST_INSTANCE) || + !adapter.get_downlevel_capabilities().flags.contains( + DownlevelFlags::VERTEX_AND_INSTANCE_INDEX_RESPECTS_RESPECTIVE_FIRST_VALUE_IN_INDIRECT_DRAW) + { + GpuPreprocessingSupport::PreprocessingOnly + } else { + GpuPreprocessingSupport::Culling + } + } +} + impl BatchedInstanceBuffers where BD: GpuArrayBufferable + Sync + Send + 'static, @@ -110,7 +267,7 @@ where self.current_input_buffer.clear(); self.previous_input_buffer.clear(); for work_item_buffer in self.work_item_buffers.values_mut() { - work_item_buffer.clear(); + work_item_buffer.buffer.clear(); } } } @@ -125,6 +282,51 @@ where } } +/// Information about a render batch that we're building up during a sorted +/// render phase. +struct SortedRenderBatch +where + F: GetBatchData, +{ + /// The index of the first phase item in this batch in the list of phase + /// items. + phase_item_start_index: u32, + + /// The index of the first instance in this batch in the instance buffer. + instance_start_index: u32, + + /// The index of the indirect parameters for this batch in the + /// [`IndirectParametersBuffer`]. + /// + /// If CPU culling is being used, then this will be `None`. + indirect_parameters_index: Option, + + /// Metadata that can be used to determine whether an instance can be placed + /// into this batch. + meta: BatchMeta, +} + +impl SortedRenderBatch +where + F: GetBatchData, +{ + /// Finalizes this batch and updates the [`SortedRenderPhase`] with the + /// appropriate indices. + /// + /// `instance_end_index` is the index of the last instance in this batch + /// plus one. + fn flush(self, instance_end_index: u32, phase: &mut SortedRenderPhase) + where + I: CachedRenderPipelinePhaseItem + SortedPhaseItem, + { + let (batch_range, batch_extra_index) = + phase.items[self.phase_item_start_index as usize].batch_range_and_extra_index_mut(); + *batch_range = self.instance_start_index..instance_end_index; + *batch_extra_index = + PhaseItemExtraIndex::maybe_indirect_parameters_index(self.indirect_parameters_index); + } +} + /// A system that runs early in extraction and clears out all the /// [`BatchedInstanceBuffers`] for the frame. /// @@ -166,53 +368,126 @@ pub fn delete_old_work_item_buffers( /// is in use. This means comparing metadata needed to draw each phase item and /// trying to combine the draws into a batch. pub fn batch_and_prepare_sorted_render_phase( - gpu_batched_instance_buffers: ResMut< - BatchedInstanceBuffers, - >, - mut views: Query<(Entity, &mut SortedRenderPhase)>, - param: StaticSystemParam, + gpu_array_buffer: ResMut>, + mut indirect_parameters_buffer: ResMut, + mut views: Query<(Entity, &mut SortedRenderPhase, Has)>, + system_param_item: StaticSystemParam, ) where I: CachedRenderPipelinePhaseItem + SortedPhaseItem, GFBD: GetFullBatchData, { - let system_param_item = param.into_inner(); - // We only process GPU-built batch data in this function. let BatchedInstanceBuffers { ref mut data_buffer, ref mut work_item_buffers, .. - } = gpu_batched_instance_buffers.into_inner(); + } = gpu_array_buffer.into_inner(); - for (view, mut phase) in &mut views { + for (view, mut phase, gpu_culling) in &mut views { // Create the work item buffer if necessary. - let work_item_buffer = work_item_buffers - .entry(view) - .or_insert_with(|| BufferVec::new(BufferUsages::STORAGE)); + let work_item_buffer = + work_item_buffers + .entry(view) + .or_insert_with(|| PreprocessWorkItemBuffer { + buffer: BufferVec::new(BufferUsages::STORAGE), + gpu_culling, + }); - super::batch_and_prepare_sorted_render_phase::(&mut phase, |item| { - let (input_index, compare_data) = - GFBD::get_index_and_compare_data(&system_param_item, item.entity())?; - let output_index = data_buffer.add() as u32; + // Walk through the list of phase items, building up batches as we go. + let mut batch: Option> = None; + for current_index in 0..phase.items.len() { + // Get the index of the input data, and comparison metadata, for + // this entity. + let current_batch_input_index = GFBD::get_index_and_compare_data( + &system_param_item, + phase.items[current_index].entity(), + ); + + // Unpack that index and metadata. Note that it's possible for index + // and/or metadata to not be present, which signifies that this + // entity is unbatchable. In that case, we break the batch here and + // otherwise ignore the phase item. + let (current_input_index, current_meta); + match current_batch_input_index { + Some((input_index, Some(current_compare_data))) => { + current_input_index = Some(input_index); + current_meta = Some(BatchMeta::new( + &phase.items[current_index], + current_compare_data, + )); + } + _ => { + current_input_index = None; + current_meta = None; + } + } - work_item_buffer.push(PreprocessWorkItem { - input_index: input_index.into(), - output_index, + // Determine if this entity can be included in the batch we're + // building up. + let can_batch = batch.as_ref().is_some_and(|batch| { + current_meta + .as_ref() + .is_some_and(|current_meta| batch.meta == *current_meta) }); - *item.batch_range_mut() = output_index..output_index + 1; + // Make space in the data buffer for this instance. + let current_entity = phase.items[current_index].entity(); + let output_index = data_buffer.add() as u32; + + // If we can't batch, break the existing batch and make a new one. + if !can_batch { + // Break a batch if we need to. + if let Some(batch) = batch.take() { + batch.flush(output_index, &mut phase); + } + + // Start a new batch. + batch = current_meta.map(|meta| { + let indirect_parameters_index = if gpu_culling { + GFBD::get_batch_indirect_parameters_index( + &system_param_item, + &mut indirect_parameters_buffer, + current_entity, + output_index, + ) + } else { + None + }; + SortedRenderBatch { + phase_item_start_index: current_index as u32, + instance_start_index: output_index, + indirect_parameters_index, + meta, + } + }); + } - compare_data - }); + // Add a new preprocessing work item so that the preprocessing + // shader will copy the per-instance data over. + if let (Some(batch), Some(input_index)) = (batch.as_ref(), current_input_index.as_ref()) + { + work_item_buffer.buffer.push(PreprocessWorkItem { + input_index: (*input_index).into(), + output_index: match batch.indirect_parameters_index { + Some(indirect_parameters_index) => indirect_parameters_index.into(), + None => output_index, + }, + }); + } + } + + // Flush the final batch if necessary. + if let Some(batch) = batch.take() { + batch.flush(data_buffer.len() as u32, &mut phase); + } } } /// Creates batches for a render phase that uses bins. pub fn batch_and_prepare_binned_render_phase( - gpu_batched_instance_buffers: ResMut< - BatchedInstanceBuffers, - >, - mut views: Query<(Entity, &mut BinnedRenderPhase)>, + gpu_array_buffer: ResMut>, + mut indirect_parameters_buffer: ResMut, + mut views: Query<(Entity, &mut BinnedRenderPhase, Has)>, param: StaticSystemParam, ) where BPI: BinnedPhaseItem, @@ -224,16 +499,20 @@ pub fn batch_and_prepare_binned_render_phase( ref mut data_buffer, ref mut work_item_buffers, .. - } = gpu_batched_instance_buffers.into_inner(); + } = gpu_array_buffer.into_inner(); - for (view, mut phase) in &mut views { + for (view, mut phase, gpu_culling) in &mut views { let phase = &mut *phase; // Borrow checker. // Create the work item buffer if necessary; otherwise, just mark it as // used this frame. - let work_item_buffer = work_item_buffers - .entry(view) - .or_insert_with(|| BufferVec::new(BufferUsages::STORAGE)); + let work_item_buffer = + work_item_buffers + .entry(view) + .or_insert_with(|| PreprocessWorkItemBuffer { + buffer: BufferVec::new(BufferUsages::STORAGE), + gpu_culling, + }); // Prepare batchables. @@ -245,19 +524,50 @@ pub fn batch_and_prepare_binned_render_phase( }; let output_index = data_buffer.add() as u32; - work_item_buffer.push(PreprocessWorkItem { - input_index: input_index.into(), - output_index, - }); - - batch - .get_or_insert(BinnedRenderPhaseBatch { - representative_entity: entity, - instance_range: output_index..output_index, - dynamic_offset: None, - }) - .instance_range - .end = output_index + 1; + match batch { + Some(ref mut batch) => { + batch.instance_range.end = output_index + 1; + work_item_buffer.buffer.push(PreprocessWorkItem { + input_index: input_index.into(), + output_index: batch + .extra_index + .as_indirect_parameters_index() + .unwrap_or(output_index), + }); + } + + None if gpu_culling => { + let indirect_parameters_index = GFBD::get_batch_indirect_parameters_index( + &system_param_item, + &mut indirect_parameters_buffer, + entity, + output_index, + ); + work_item_buffer.buffer.push(PreprocessWorkItem { + input_index: input_index.into(), + output_index: indirect_parameters_index.unwrap_or_default().into(), + }); + batch = Some(BinnedRenderPhaseBatch { + representative_entity: entity, + instance_range: output_index..output_index + 1, + extra_index: PhaseItemExtraIndex::maybe_indirect_parameters_index( + indirect_parameters_index, + ), + }); + } + + None => { + work_item_buffer.buffer.push(PreprocessWorkItem { + input_index: input_index.into(), + output_index, + }); + batch = Some(BinnedRenderPhaseBatch { + representative_entity: entity, + instance_range: output_index..output_index + 1, + extra_index: PhaseItemExtraIndex::NONE, + }); + } + } } if let Some(batch) = batch { @@ -274,18 +584,38 @@ pub fn batch_and_prepare_binned_render_phase( }; let output_index = data_buffer.add() as u32; - work_item_buffer.push(PreprocessWorkItem { - input_index: input_index.into(), - output_index, - }); - - unbatchables - .buffer_indices - .add(GpuArrayBufferIndex:: { - index: output_index, - dynamic_offset: None, - element_type: PhantomData, + if gpu_culling { + let indirect_parameters_index = GFBD::get_batch_indirect_parameters_index( + &system_param_item, + &mut indirect_parameters_buffer, + entity, + output_index, + ) + .unwrap_or_default(); + work_item_buffer.buffer.push(PreprocessWorkItem { + input_index: input_index.into(), + output_index: indirect_parameters_index.into(), + }); + unbatchables + .buffer_indices + .add(UnbatchableBinnedEntityIndices { + instance_index: indirect_parameters_index.into(), + extra_index: PhaseItemExtraIndex::indirect_parameters_index( + indirect_parameters_index.into(), + ), + }); + } else { + work_item_buffer.buffer.push(PreprocessWorkItem { + input_index: input_index.into(), + output_index, }); + unbatchables + .buffer_indices + .add(UnbatchableBinnedEntityIndices { + instance_index: output_index, + extra_index: PhaseItemExtraIndex::NONE, + }); + } } } } @@ -295,29 +625,34 @@ pub fn batch_and_prepare_binned_render_phase( pub fn write_batched_instance_buffers( render_device: Res, render_queue: Res, - mut gpu_batched_instance_buffers: ResMut< - BatchedInstanceBuffers, - >, + gpu_array_buffer: ResMut>, ) where GFBD: GetFullBatchData, { - gpu_batched_instance_buffers - .data_buffer - .write_buffer(&render_device); - gpu_batched_instance_buffers - .current_input_buffer - .write_buffer(&render_device, &render_queue); + let BatchedInstanceBuffers { + ref mut data_buffer, + work_item_buffers: ref mut index_buffers, + ref mut current_input_buffer, + previous_input_buffer: _, + } = gpu_array_buffer.into_inner(); + + data_buffer.write_buffer(&render_device); + current_input_buffer.write_buffer(&render_device, &render_queue); // There's no need to write `previous_input_buffer`, as we wrote // that on the previous frame, and it hasn't changed. - for work_item_buffer in gpu_batched_instance_buffers.work_item_buffers.values_mut() { - work_item_buffer.write_buffer(&render_device, &render_queue); + for index_buffer in index_buffers.values_mut() { + index_buffer + .buffer + .write_buffer(&render_device, &render_queue); } } -/// Determines whether it's possible to run preprocessing on the GPU. -/// -/// Currently, this simply checks to see whether compute shaders are supported. -pub fn can_preprocess_on_gpu(render_device: &RenderDevice) -> bool { - render_device.limits().max_compute_workgroup_size_x > 0 +pub fn write_indirect_parameters_buffer( + render_device: Res, + render_queue: Res, + mut indirect_parameters_buffer: ResMut, +) { + indirect_parameters_buffer.write_buffer(&render_device, &render_queue); + indirect_parameters_buffer.clear(); } diff --git a/crates/bevy_render/src/batching/mod.rs b/crates/bevy_render/src/batching/mod.rs index 3ce9aaf38bb10..0e1a6ada7ceb6 100644 --- a/crates/bevy_render/src/batching/mod.rs +++ b/crates/bevy_render/src/batching/mod.rs @@ -14,6 +14,8 @@ use crate::{ render_resource::{CachedRenderPipelineId, GpuArrayBufferable}, }; +use self::gpu_preprocessing::IndirectParametersBuffer; + pub mod gpu_preprocessing; pub mod no_gpu_preprocessing; @@ -52,7 +54,7 @@ impl BatchMeta { BatchMeta { pipeline_id: item.cached_pipeline(), draw_function_id: item.draw_function(), - dynamic_offset: item.dynamic_offset(), + dynamic_offset: item.extra_index().as_dynamic_offset(), user_data, } } @@ -133,6 +135,19 @@ pub trait GetFullBatchData: GetBatchData { param: &SystemParamItem, query_item: Entity, ) -> Option; + + /// Pushes [`gpu_preprocessing::IndirectParameters`] necessary to draw this + /// batch onto the given [`IndirectParametersBuffer`], and returns its + /// index. + /// + /// This is only used if GPU culling is enabled (which requires GPU + /// preprocessing). + fn get_batch_indirect_parameters_index( + param: &SystemParamItem, + indirect_parameters_buffer: &mut IndirectParametersBuffer, + entity: Entity, + instance_index: u32, + ) -> Option; } /// Sorts a render phase that uses bins. diff --git a/crates/bevy_render/src/batching/no_gpu_preprocessing.rs b/crates/bevy_render/src/batching/no_gpu_preprocessing.rs index 3387243d13dbd..15dfa7842a009 100644 --- a/crates/bevy_render/src/batching/no_gpu_preprocessing.rs +++ b/crates/bevy_render/src/batching/no_gpu_preprocessing.rs @@ -8,7 +8,7 @@ use wgpu::BindingResource; use crate::{ render_phase::{ BinnedPhaseItem, BinnedRenderPhase, BinnedRenderPhaseBatch, CachedRenderPipelinePhaseItem, - SortedPhaseItem, SortedRenderPhase, + PhaseItemExtraIndex, SortedPhaseItem, SortedRenderPhase, }, render_resource::{GpuArrayBuffer, GpuArrayBufferable}, renderer::{RenderDevice, RenderQueue}, @@ -79,8 +79,9 @@ pub fn batch_and_prepare_sorted_render_phase( let buffer_index = batched_instance_buffer.push(buffer_data); let index = buffer_index.index; - *item.batch_range_mut() = index..index + 1; - *item.dynamic_offset_mut() = buffer_index.dynamic_offset; + let (batch_range, extra_index) = item.batch_range_and_extra_index_mut(); + *batch_range = index..index + 1; + *extra_index = PhaseItemExtraIndex::maybe_dynamic_offset(buffer_index.dynamic_offset); compare_data }); @@ -90,13 +91,14 @@ pub fn batch_and_prepare_sorted_render_phase( /// Creates batches for a render phase that uses bins, when GPU batch data /// building isn't in use. pub fn batch_and_prepare_binned_render_phase( - mut buffer: ResMut>, + gpu_array_buffer: ResMut>, mut views: Query<&mut BinnedRenderPhase>, param: StaticSystemParam, ) where BPI: BinnedPhaseItem, GFBD: GetFullBatchData, { + let gpu_array_buffer = gpu_array_buffer.into_inner(); let system_param_item = param.into_inner(); for mut phase in &mut views { @@ -111,7 +113,7 @@ pub fn batch_and_prepare_binned_render_phase( else { continue; }; - let instance = buffer.push(buffer_data); + let instance = gpu_array_buffer.push(buffer_data); // If the dynamic offset has changed, flush the batch. // @@ -120,12 +122,15 @@ pub fn batch_and_prepare_binned_render_phase( // with no storage buffers. if !batch_set.last().is_some_and(|batch| { batch.instance_range.end == instance.index - && batch.dynamic_offset == instance.dynamic_offset + && batch.extra_index + == PhaseItemExtraIndex::maybe_dynamic_offset(instance.dynamic_offset) }) { batch_set.push(BinnedRenderPhaseBatch { representative_entity: entity, instance_range: instance.index..instance.index, - dynamic_offset: instance.dynamic_offset, + extra_index: PhaseItemExtraIndex::maybe_dynamic_offset( + instance.dynamic_offset, + ), }); } @@ -145,8 +150,8 @@ pub fn batch_and_prepare_binned_render_phase( else { continue; }; - let instance = buffer.push(buffer_data); - unbatchables.buffer_indices.add(instance); + let instance = gpu_array_buffer.push(buffer_data); + unbatchables.buffer_indices.add(instance.into()); } } } diff --git a/crates/bevy_render/src/camera/camera.rs b/crates/bevy_render/src/camera/camera.rs index 215aa2bb3911c..4d0f334f01d49 100644 --- a/crates/bevy_render/src/camera/camera.rs +++ b/crates/bevy_render/src/camera/camera.rs @@ -1,4 +1,5 @@ use crate::{ + batching::gpu_preprocessing::GpuPreprocessingSupport, camera::{CameraProjection, ManualTextureViewHandle, ManualTextureViews}, prelude::Image, primitives::Frustum, @@ -6,7 +7,9 @@ use crate::{ render_graph::{InternedRenderSubGraph, RenderSubGraph}, render_resource::TextureView, texture::GpuImage, - view::{ColorGrading, ExtractedView, ExtractedWindows, RenderLayers, VisibleEntities}, + view::{ + ColorGrading, ExtractedView, ExtractedWindows, GpuCulling, RenderLayers, VisibleEntities, + }, Extract, }; use bevy_asset::{AssetEvent, AssetId, Assets, Handle}; @@ -17,6 +20,7 @@ use bevy_ecs::{ entity::Entity, event::EventReader, prelude::With, + query::Has, reflect::ReflectComponent, system::{Commands, Query, Res, ResMut, Resource}, }; @@ -24,7 +28,7 @@ use bevy_math::{vec2, Dir3, Mat4, Ray3d, Rect, URect, UVec2, UVec4, Vec2, Vec3}; use bevy_reflect::prelude::*; use bevy_render_macros::ExtractComponent; use bevy_transform::components::GlobalTransform; -use bevy_utils::tracing::warn; +use bevy_utils::{tracing::warn, warn_once}; use bevy_utils::{HashMap, HashSet}; use bevy_window::{ NormalizedWindowRef, PrimaryWindow, Window, WindowCreated, WindowRef, WindowResized, @@ -827,9 +831,11 @@ pub fn extract_cameras( Option<&TemporalJitter>, Option<&RenderLayers>, Option<&Projection>, + Has, )>, >, primary_window: Extract>>, + gpu_preprocessing_support: Res, ) { let primary_window = primary_window.iter().next(); for ( @@ -844,6 +850,7 @@ pub fn extract_cameras( temporal_jitter, render_layers, projection, + gpu_culling, ) in query.iter() { let color_grading = *color_grading.unwrap_or(&ColorGrading::default()); @@ -915,6 +922,16 @@ pub fn extract_cameras( if let Some(perspective) = projection { commands.insert(perspective.clone()); } + + if gpu_culling { + if *gpu_preprocessing_support == GpuPreprocessingSupport::Culling { + commands.insert(GpuCulling); + } else { + warn_once!( + "GPU culling isn't supported on this platform; ignoring `GpuCulling`." + ); + } + } } } } diff --git a/crates/bevy_render/src/lib.rs b/crates/bevy_render/src/lib.rs index 1ba6d9cbe371e..60d8ef648aa77 100644 --- a/crates/bevy_render/src/lib.rs +++ b/crates/bevy_render/src/lib.rs @@ -52,6 +52,7 @@ pub mod prelude { }; } +use batching::gpu_preprocessing::BatchingPlugin; use bevy_ecs::schedule::ScheduleBuildSettings; use bevy_utils::prelude::default; pub use extract_param::Extract; @@ -334,6 +335,7 @@ impl Plugin for RenderPlugin { MeshPlugin, GlobalsPlugin, MorphPlugin, + BatchingPlugin, )); app.init_resource::() diff --git a/crates/bevy_render/src/maths.wgsl b/crates/bevy_render/src/maths.wgsl index 4070a8679a5b8..720e6bac46a08 100644 --- a/crates/bevy_render/src/maths.wgsl +++ b/crates/bevy_render/src/maths.wgsl @@ -52,6 +52,11 @@ fn inverse_affine3(affine: mat4x3) -> mat4x3 { return mat4x3(inv_matrix3[0], inv_matrix3[1], inv_matrix3[2], -(inv_matrix3 * affine[3])); } +// Extracts the upper 3x3 portion of a 4x4 matrix. +fn mat4x4_to_mat3x3(m: mat4x4) -> mat3x3 { + return mat3x3(m[0].xyz, m[1].xyz, m[2].xyz); +} + // Creates an orthonormal basis given a Z vector and an up vector (which becomes // Y after orthonormalization). // @@ -64,3 +69,16 @@ fn orthonormalize(z_unnormalized: vec3, up: vec3) -> mat3x3 { let y_basis = cross(z_basis, x_basis); return mat3x3(x_basis, y_basis, z_basis); } + +// Returns true if any part of a sphere is on the positive side of a plane. +// +// `sphere_center.w` should be 1.0. +// +// This is used for frustum culling. +fn sphere_intersects_plane_half_space( + plane: vec4, + sphere_center: vec4, + sphere_radius: f32 +) -> bool { + return dot(plane, sphere_center) + sphere_radius > 0.0; +} diff --git a/crates/bevy_render/src/render_phase/mod.rs b/crates/bevy_render/src/render_phase/mod.rs index 58e1fa550f4e2..a1125fa938e2c 100644 --- a/crates/bevy_render/src/render_phase/mod.rs +++ b/crates/bevy_render/src/render_phase/mod.rs @@ -51,7 +51,14 @@ use bevy_ecs::{ system::{lifetimeless::SRes, SystemParamItem}, }; use smallvec::SmallVec; -use std::{hash::Hash, marker::PhantomData, ops::Range, slice::SliceIndex}; +use std::{ + fmt::{self, Debug, Formatter}, + hash::Hash, + iter, + marker::PhantomData, + ops::Range, + slice::SliceIndex, +}; /// A collection of all rendering instructions, that will be executed by the GPU, for a /// single render phase for a single view. @@ -124,7 +131,7 @@ pub struct BinnedRenderPhaseBatch { /// /// Note that dynamic offsets are only used on platforms that don't support /// storage buffers. - pub dynamic_offset: Option, + pub extra_index: PhaseItemExtraIndex, } /// Information about the unbatchable entities in a bin. @@ -133,7 +140,7 @@ pub(crate) struct UnbatchableBinnedEntities { pub(crate) entities: Vec, /// The GPU array buffer indices of each unbatchable binned entity. - pub(crate) buffer_indices: UnbatchableBinnedEntityBufferIndex, + pub(crate) buffer_indices: UnbatchableBinnedEntityIndexSet, } /// Stores instance indices and dynamic offsets for unbatchable entities in a @@ -145,7 +152,7 @@ pub(crate) struct UnbatchableBinnedEntities { /// platforms that aren't WebGL 2. #[derive(Default)] -pub(crate) enum UnbatchableBinnedEntityBufferIndex { +pub(crate) enum UnbatchableBinnedEntityIndexSet { /// There are no unbatchable entities in this bin (yet). #[default] NoEntities, @@ -155,26 +162,42 @@ pub(crate) enum UnbatchableBinnedEntityBufferIndex { /// /// This is the typical case on platforms other than WebGL 2. We special /// case this to avoid allocation on those platforms. - NoDynamicOffsets { + Sparse { /// The range of indices. instance_range: Range, + /// The index of the first indirect instance parameters. + /// + /// The other indices immediately follow these. + first_indirect_parameters_index: Option, }, /// Dynamic uniforms are present for unbatchable entities in this bin. /// /// We fall back to this on WebGL 2. - DynamicOffsets(Vec), + Dense(Vec), } /// The instance index and dynamic offset (if present) for an unbatchable entity. /// /// This is only useful on platforms that don't support storage buffers. #[derive(Clone, Copy)] -pub(crate) struct UnbatchableBinnedEntityDynamicOffset { +pub(crate) struct UnbatchableBinnedEntityIndices { /// The instance index. - instance_index: u32, - /// The dynamic offset, if present. - dynamic_offset: Option, + pub(crate) instance_index: u32, + /// The [`PhaseItemExtraIndex`], if present. + pub(crate) extra_index: PhaseItemExtraIndex, +} + +impl From> for UnbatchableBinnedEntityIndices +where + T: Clone + ShaderSize + WriteInto, +{ + fn from(value: GpuArrayBufferIndex) -> Self { + UnbatchableBinnedEntityIndices { + instance_index: value.index, + extra_index: PhaseItemExtraIndex::maybe_dynamic_offset(value.dynamic_offset), + } + } } impl BinnedRenderPhase @@ -227,7 +250,7 @@ where key.clone(), batch.representative_entity, batch.instance_range.clone(), - batch.dynamic_offset, + batch.extra_index, ); // Fetch the draw function. @@ -246,17 +269,26 @@ where let unbatchable_entities = &self.unbatchable_values[key]; for (entity_index, &entity) in unbatchable_entities.entities.iter().enumerate() { let unbatchable_dynamic_offset = match &unbatchable_entities.buffer_indices { - UnbatchableBinnedEntityBufferIndex::NoEntities => { + UnbatchableBinnedEntityIndexSet::NoEntities => { // Shouldn't happen… continue; } - UnbatchableBinnedEntityBufferIndex::NoDynamicOffsets { instance_range } => { - UnbatchableBinnedEntityDynamicOffset { - instance_index: instance_range.start + entity_index as u32, - dynamic_offset: None, - } - } - UnbatchableBinnedEntityBufferIndex::DynamicOffsets(ref dynamic_offsets) => { + UnbatchableBinnedEntityIndexSet::Sparse { + instance_range, + first_indirect_parameters_index, + } => UnbatchableBinnedEntityIndices { + instance_index: instance_range.start + entity_index as u32, + extra_index: match first_indirect_parameters_index { + None => PhaseItemExtraIndex::NONE, + Some(first_indirect_parameters_index) => { + PhaseItemExtraIndex::indirect_parameters_index( + u32::from(*first_indirect_parameters_index) + + entity_index as u32, + ) + } + }, + }, + UnbatchableBinnedEntityIndexSet::Dense(ref dynamic_offsets) => { dynamic_offsets[entity_index] } }; @@ -266,7 +298,7 @@ where entity, unbatchable_dynamic_offset.instance_index ..(unbatchable_dynamic_offset.instance_index + 1), - unbatchable_dynamic_offset.dynamic_offset, + unbatchable_dynamic_offset.extra_index, ); // Fetch the draw function. @@ -300,6 +332,42 @@ where } } +impl UnbatchableBinnedEntityIndexSet { + /// Returns the [`UnbatchableBinnedEntityIndices`] for the given entity. + fn indices_for_entity_index( + &self, + entity_index: u32, + ) -> Option { + match self { + UnbatchableBinnedEntityIndexSet::NoEntities => None, + UnbatchableBinnedEntityIndexSet::Sparse { instance_range, .. } + if entity_index >= instance_range.len() as u32 => + { + None + } + UnbatchableBinnedEntityIndexSet::Sparse { + instance_range, + first_indirect_parameters_index: None, + } => Some(UnbatchableBinnedEntityIndices { + instance_index: instance_range.start + entity_index, + extra_index: PhaseItemExtraIndex::NONE, + }), + UnbatchableBinnedEntityIndexSet::Sparse { + instance_range, + first_indirect_parameters_index: Some(first_indirect_parameters_index), + } => Some(UnbatchableBinnedEntityIndices { + instance_index: instance_range.start + entity_index, + extra_index: PhaseItemExtraIndex::indirect_parameters_index( + u32::from(*first_indirect_parameters_index) + entity_index, + ), + }), + UnbatchableBinnedEntityIndexSet::Dense(ref indices) => { + indices.get(entity_index as usize).copied() + } + } + } +} + /// A convenient abstraction for adding all the systems necessary for a binned /// render phase to the render app. /// @@ -395,74 +463,62 @@ where } } -impl UnbatchableBinnedEntityBufferIndex { +impl UnbatchableBinnedEntityIndexSet { /// Adds a new entity to the list of unbatchable binned entities. - pub fn add(&mut self, gpu_array_buffer_index: GpuArrayBufferIndex) - where - T: ShaderSize + WriteInto + Clone, - { - match (&mut *self, gpu_array_buffer_index.dynamic_offset) { - (UnbatchableBinnedEntityBufferIndex::NoEntities, None) => { - // This is the first entity we've seen, and we're not on WebGL - // 2. Initialize the fast path. - *self = UnbatchableBinnedEntityBufferIndex::NoDynamicOffsets { - instance_range: gpu_array_buffer_index.index - ..(gpu_array_buffer_index.index + 1), + pub fn add(&mut self, indices: UnbatchableBinnedEntityIndices) { + match self { + UnbatchableBinnedEntityIndexSet::NoEntities => { + if indices.extra_index.is_dynamic_offset() { + // This is the first entity we've seen, and we don't have + // compute shaders. Initialize an array. + *self = UnbatchableBinnedEntityIndexSet::Dense(vec![indices]); + } else { + // This is the first entity we've seen, and we have compute + // shaders. Initialize the fast path. + *self = UnbatchableBinnedEntityIndexSet::Sparse { + instance_range: indices.instance_index..indices.instance_index + 1, + first_indirect_parameters_index: indices + .extra_index + .as_indirect_parameters_index() + .and_then(|index| NonMaxU32::try_from(index).ok()), + } } } - (UnbatchableBinnedEntityBufferIndex::NoEntities, Some(dynamic_offset)) => { - // This is the first entity we've seen, and we're on WebGL 2. - // Initialize an array. - *self = UnbatchableBinnedEntityBufferIndex::DynamicOffsets(vec![ - UnbatchableBinnedEntityDynamicOffset { - instance_index: gpu_array_buffer_index.index, - dynamic_offset: Some(dynamic_offset), - }, - ]); - } - - ( - UnbatchableBinnedEntityBufferIndex::NoDynamicOffsets { - ref mut instance_range, - }, - None, - ) if instance_range.end == gpu_array_buffer_index.index => { + UnbatchableBinnedEntityIndexSet::Sparse { + ref mut instance_range, + first_indirect_parameters_index, + } if instance_range.end == indices.instance_index + && ((first_indirect_parameters_index.is_none() + && indices.extra_index == PhaseItemExtraIndex::NONE) + || first_indirect_parameters_index.is_some_and( + |first_indirect_parameters_index| { + Some( + u32::from(first_indirect_parameters_index) + instance_range.end + - instance_range.start, + ) == indices.extra_index.as_indirect_parameters_index() + }, + )) => + { // This is the normal case on non-WebGL 2. instance_range.end += 1; } - ( - UnbatchableBinnedEntityBufferIndex::DynamicOffsets(ref mut offsets), - dynamic_offset, - ) => { - // This is the normal case on WebGL 2. - offsets.push(UnbatchableBinnedEntityDynamicOffset { - instance_index: gpu_array_buffer_index.index, - dynamic_offset, - }); - } - - ( - UnbatchableBinnedEntityBufferIndex::NoDynamicOffsets { instance_range }, - dynamic_offset, - ) => { + UnbatchableBinnedEntityIndexSet::Sparse { instance_range, .. } => { // We thought we were in non-WebGL 2 mode, but we got a dynamic // offset or non-contiguous index anyway. This shouldn't happen, // but let's go ahead and do the sensible thing anyhow: demote // the compressed `NoDynamicOffsets` field to the full // `DynamicOffsets` array. - let mut new_dynamic_offsets: Vec<_> = instance_range - .map(|instance_index| UnbatchableBinnedEntityDynamicOffset { - instance_index, - dynamic_offset: None, - }) + let new_dynamic_offsets = (0..instance_range.len() as u32) + .flat_map(|entity_index| self.indices_for_entity_index(entity_index)) + .chain(iter::once(indices)) .collect(); - new_dynamic_offsets.push(UnbatchableBinnedEntityDynamicOffset { - instance_index: gpu_array_buffer_index.index, - dynamic_offset, - }); - *self = UnbatchableBinnedEntityBufferIndex::DynamicOffsets(new_dynamic_offsets); + *self = UnbatchableBinnedEntityIndexSet::Dense(new_dynamic_offsets); + } + + UnbatchableBinnedEntityIndexSet::Dense(ref mut dense_indices) => { + dense_indices.push(indices); } } } @@ -487,6 +543,7 @@ pub struct SortedRenderPhase where I: SortedPhaseItem, { + /// The items within this [`SortedRenderPhase`]. pub items: Vec, } @@ -604,8 +661,144 @@ pub trait PhaseItem: Sized + Send + Sync + 'static { fn batch_range(&self) -> &Range; fn batch_range_mut(&mut self) -> &mut Range; - fn dynamic_offset(&self) -> Option; - fn dynamic_offset_mut(&mut self) -> &mut Option; + /// Returns the [`PhaseItemExtraIndex`]. + /// + /// If present, this is either a dynamic offset or an indirect parameters + /// index. + fn extra_index(&self) -> PhaseItemExtraIndex; + + /// Returns a pair of mutable references to both the batch range and extra + /// index. + fn batch_range_and_extra_index_mut(&mut self) -> (&mut Range, &mut PhaseItemExtraIndex); +} + +/// The "extra index" associated with some [`PhaseItem`]s, alongside the +/// indirect instance index. +/// +/// Sometimes phase items require another index in addition to the range of +/// instances they already have. These can be: +/// +/// * The *dynamic offset*: a `wgpu` dynamic offset into the uniform buffer of +/// instance data. This is used on platforms that don't support storage +/// buffers, to work around uniform buffer size limitations. +/// +/// * The *indirect parameters index*: an index into the buffer that specifies +/// the indirect parameters for this [`PhaseItem`]'s drawcall. This is used when +/// indirect mode is on (as used for GPU culling). +/// +/// Note that our indirect draw functionality requires storage buffers, so it's +/// impossible to have both a dynamic offset and an indirect parameters index. +/// This convenient fact allows us to pack both indices into a single `u32`. +#[derive(Clone, Copy, PartialEq, Eq, Hash)] +pub struct PhaseItemExtraIndex(pub u32); + +impl Debug for PhaseItemExtraIndex { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + if self.is_dynamic_offset() { + write!(f, "DynamicOffset({})", self.offset()) + } else if self.is_indirect_parameters_index() { + write!(f, "IndirectParametersIndex({})", self.offset()) + } else { + write!(f, "None") + } + } +} + +impl PhaseItemExtraIndex { + /// The flag that indicates that this index is an indirect parameter. If not + /// set, this is a dynamic offset. + pub const INDIRECT_PARAMETER_INDEX: u32 = 1 << 31; + /// To extract the index from a packed [`PhaseItemExtraIndex`], bitwise-and + /// the contents with this value. + pub const OFFSET_MASK: u32 = Self::INDIRECT_PARAMETER_INDEX - 1; + /// To extract the flag from a packed [`PhaseItemExtraIndex`], bitwise-and + /// the contents with this value. + pub const FLAGS_MASK: u32 = !Self::OFFSET_MASK; + + /// The special value that indicates that no extra index is present. + pub const NONE: PhaseItemExtraIndex = PhaseItemExtraIndex(u32::MAX); + + /// Returns either the indirect parameters index or the dynamic offset, + /// depending on which is in use. + #[inline] + fn offset(&self) -> u32 { + self.0 & Self::OFFSET_MASK + } + + /// Determines whether this extra index is a dynamic offset. + #[inline] + fn is_dynamic_offset(&self) -> bool { + *self != Self::NONE && (self.0 & Self::INDIRECT_PARAMETER_INDEX) == 0 + } + + /// Determines whether this extra index is an indirect parameters index. + #[inline] + fn is_indirect_parameters_index(&self) -> bool { + *self != Self::NONE && (self.0 & Self::INDIRECT_PARAMETER_INDEX) != 0 + } + + /// Packs a indirect parameters index into this extra index. + #[inline] + pub fn indirect_parameters_index(indirect_parameter_index: u32) -> PhaseItemExtraIndex { + // Make sure we didn't overflow. + debug_assert_eq!(indirect_parameter_index & Self::FLAGS_MASK, 0); + PhaseItemExtraIndex(indirect_parameter_index | Self::INDIRECT_PARAMETER_INDEX) + } + + /// Returns either an indirect parameters index or + /// [`PhaseItemExtraIndex::NONE`], as appropriate. + #[inline] + pub fn maybe_indirect_parameters_index( + maybe_indirect_parameters_index: Option, + ) -> PhaseItemExtraIndex { + match maybe_indirect_parameters_index { + Some(indirect_parameters_index) => { + Self::indirect_parameters_index(indirect_parameters_index.into()) + } + None => PhaseItemExtraIndex::NONE, + } + } + + /// Packs a dynamic offset into this extra index. + #[inline] + pub fn dynamic_offset(dynamic_offset: u32) -> PhaseItemExtraIndex { + // Make sure we didn't overflow. + debug_assert_eq!(dynamic_offset & Self::FLAGS_MASK, 0); + + PhaseItemExtraIndex(dynamic_offset) + } + + /// Returns either a dynamic offset or [`PhaseItemExtraIndex::NONE`], as + /// appropriate. + #[inline] + pub fn maybe_dynamic_offset(maybe_dynamic_offset: Option) -> PhaseItemExtraIndex { + match maybe_dynamic_offset { + Some(dynamic_offset) => Self::dynamic_offset(dynamic_offset.into()), + None => PhaseItemExtraIndex::NONE, + } + } + + /// If this extra index describes a dynamic offset, returns it; otherwise, + /// returns `None`. + #[inline] + pub fn as_dynamic_offset(&self) -> Option { + if self.is_dynamic_offset() { + NonMaxU32::try_from(self.0 & Self::OFFSET_MASK).ok() + } else { + None + } + } + + /// If this extra index describes an indirect parameters index, returns it; + /// otherwise, returns `None`. + #[inline] + pub fn as_indirect_parameters_index(&self) -> Option { + if self.is_indirect_parameters_index() { + Some(self.0 & Self::OFFSET_MASK) + } else { + None + } + } } /// Represents phase items that are placed into bins. The `BinKey` specifies @@ -633,7 +826,7 @@ pub trait BinnedPhaseItem: PhaseItem { key: Self::BinKey, representative_entity: Entity, batch_range: Range, - dynamic_offset: Option, + extra_index: PhaseItemExtraIndex, ) -> Self; } diff --git a/crates/bevy_render/src/render_resource/buffer_vec.rs b/crates/bevy_render/src/render_resource/buffer_vec.rs index 8a0f77daafb3d..7ff60c1f44078 100644 --- a/crates/bevy_render/src/render_resource/buffer_vec.rs +++ b/crates/bevy_render/src/render_resource/buffer_vec.rs @@ -228,6 +228,11 @@ where self.len = 0; } + /// Returns the length of the buffer. + pub fn len(&self) -> usize { + self.len + } + /// Materializes the buffer on the GPU with space for `capacity` elements. /// /// If the buffer is already big enough, this function doesn't reallocate diff --git a/crates/bevy_render/src/view/mod.rs b/crates/bevy_render/src/view/mod.rs index 1fef36c5844a0..bdeff34b0d407 100644 --- a/crates/bevy_render/src/view/mod.rs +++ b/crates/bevy_render/src/view/mod.rs @@ -208,6 +208,12 @@ pub struct PostProcessWrite<'a> { pub destination: &'a TextureView, } +#[derive(Component)] +pub struct GpuCulling; + +#[derive(Component)] +pub struct NoCpuCulling; + impl ViewTarget { pub const TEXTURE_FORMAT_HDR: TextureFormat = TextureFormat::Rgba16Float; diff --git a/crates/bevy_render/src/view/visibility/mod.rs b/crates/bevy_render/src/view/visibility/mod.rs index 72ba3c16a8681..f10d47b710ac7 100644 --- a/crates/bevy_render/src/view/visibility/mod.rs +++ b/crates/bevy_render/src/view/visibility/mod.rs @@ -20,6 +20,8 @@ use crate::{ primitives::{Aabb, Frustum, Sphere}, }; +use super::NoCpuCulling; + /// User indication of whether an entity is visible. Propagates down the entity hierarchy. /// /// If an entity is hidden in this way, all [`Children`] (and all of their children and so on) who @@ -397,6 +399,7 @@ pub fn check_visibility( &Frustum, Option<&RenderLayers>, &Camera, + Has, )>, mut visible_aabb_query: Query< ( @@ -413,7 +416,8 @@ pub fn check_visibility( ) where QF: QueryFilter + 'static, { - for (mut visible_entities, frustum, maybe_view_mask, camera) in &mut view_query { + for (mut visible_entities, frustum, maybe_view_mask, camera, no_cpu_culling) in &mut view_query + { if !camera.is_active { continue; } @@ -445,7 +449,7 @@ pub fn check_visibility( } // If we have an aabb, do frustum culling - if !no_frustum_culling { + if !no_frustum_culling && !no_cpu_culling { if let Some(model_aabb) = maybe_model_aabb { let model = transform.affine(); let model_sphere = Sphere { diff --git a/crates/bevy_sprite/src/mesh2d/material.rs b/crates/bevy_sprite/src/mesh2d/material.rs index 682d2296817ff..2805840a67694 100644 --- a/crates/bevy_sprite/src/mesh2d/material.rs +++ b/crates/bevy_sprite/src/mesh2d/material.rs @@ -17,8 +17,8 @@ use bevy_render::{ prepare_assets, PrepareAssetError, RenderAsset, RenderAssetPlugin, RenderAssets, }, render_phase::{ - AddRenderCommand, DrawFunctions, PhaseItem, RenderCommand, RenderCommandResult, - SetItemPipeline, SortedRenderPhase, TrackedRenderPass, + AddRenderCommand, DrawFunctions, PhaseItem, PhaseItemExtraIndex, RenderCommand, + RenderCommandResult, SetItemPipeline, SortedRenderPhase, TrackedRenderPass, }, render_resource::{ AsBindGroup, AsBindGroupError, BindGroup, BindGroupId, BindGroupLayout, @@ -451,7 +451,7 @@ pub fn queue_material2d_meshes( sort_key: FloatOrd(mesh_z + material2d.depth_bias), // Batching is done in batch_and_prepare_render_phase batch_range: 0..1, - dynamic_offset: None, + extra_index: PhaseItemExtraIndex::NONE, }); } } diff --git a/crates/bevy_sprite/src/mesh2d/mesh.rs b/crates/bevy_sprite/src/mesh2d/mesh.rs index 90a67f7517179..0dec7f0cbdac5 100644 --- a/crates/bevy_sprite/src/mesh2d/mesh.rs +++ b/crates/bevy_sprite/src/mesh2d/mesh.rs @@ -646,7 +646,7 @@ impl RenderCommand

for SetMesh2dBindGroup { ) -> RenderCommandResult { let mut dynamic_offsets: [u32; 1] = Default::default(); let mut offset_count = 0; - if let Some(dynamic_offset) = item.dynamic_offset() { + if let Some(dynamic_offset) = item.extra_index().as_dynamic_offset() { dynamic_offsets[offset_count] = dynamic_offset.get(); offset_count += 1; } diff --git a/crates/bevy_sprite/src/render/mod.rs b/crates/bevy_sprite/src/render/mod.rs index ff0a475af815c..f0f2b524a32a4 100644 --- a/crates/bevy_sprite/src/render/mod.rs +++ b/crates/bevy_sprite/src/render/mod.rs @@ -19,8 +19,8 @@ use bevy_math::{Affine3A, FloatOrd, Quat, Rect, Vec2, Vec4}; use bevy_render::{ render_asset::RenderAssets, render_phase::{ - DrawFunctions, PhaseItem, RenderCommand, RenderCommandResult, SetItemPipeline, - SortedRenderPhase, TrackedRenderPass, + DrawFunctions, PhaseItem, PhaseItemExtraIndex, RenderCommand, RenderCommandResult, + SetItemPipeline, SortedRenderPhase, TrackedRenderPass, }, render_resource::{ binding_types::{sampler, texture_2d, uniform_buffer}, @@ -516,7 +516,7 @@ pub fn queue_sprites( sort_key, // batch_range and dynamic_offset will be calculated in prepare_sprites batch_range: 0..0, - dynamic_offset: None, + extra_index: PhaseItemExtraIndex::NONE, }); } } diff --git a/crates/bevy_ui/src/render/mod.rs b/crates/bevy_ui/src/render/mod.rs index d4c27a2141b33..881e6918fd88f 100644 --- a/crates/bevy_ui/src/render/mod.rs +++ b/crates/bevy_ui/src/render/mod.rs @@ -7,8 +7,12 @@ use bevy_core_pipeline::core_2d::graph::{Core2d, Node2d}; use bevy_core_pipeline::core_3d::graph::{Core3d, Node3d}; use bevy_core_pipeline::{core_2d::Camera2d, core_3d::Camera3d}; use bevy_hierarchy::Parent; -use bevy_render::texture::GpuImage; -use bevy_render::{render_phase::PhaseItem, view::ViewVisibility, ExtractSchedule, Render}; +use bevy_render::{ + render_phase::{PhaseItem, PhaseItemExtraIndex}, + texture::GpuImage, + view::ViewVisibility, + ExtractSchedule, Render, +}; use bevy_sprite::{SpriteAssetEvents, TextureAtlas}; pub use pipeline::*; pub use render_pass::*; @@ -901,7 +905,7 @@ pub fn queue_uinodes( ), // batch_range will be calculated in prepare_uinodes batch_range: 0..0, - dynamic_offset: None, + extra_index: PhaseItemExtraIndex::NONE, }); } } diff --git a/crates/bevy_ui/src/render/render_pass.rs b/crates/bevy_ui/src/render/render_pass.rs index e398a46d93d24..d403a44bedc7f 100644 --- a/crates/bevy_ui/src/render/render_pass.rs +++ b/crates/bevy_ui/src/render/render_pass.rs @@ -15,7 +15,6 @@ use bevy_render::{ renderer::*, view::*, }; -use nonmax::NonMaxU32; pub struct UiPassNode { ui_view_query: QueryState< @@ -92,7 +91,7 @@ pub struct TransparentUi { pub pipeline: CachedRenderPipelineId, pub draw_function: DrawFunctionId, pub batch_range: Range, - pub dynamic_offset: Option, + pub extra_index: PhaseItemExtraIndex, } impl PhaseItem for TransparentUi { @@ -117,13 +116,13 @@ impl PhaseItem for TransparentUi { } #[inline] - fn dynamic_offset(&self) -> Option { - self.dynamic_offset + fn extra_index(&self) -> PhaseItemExtraIndex { + self.extra_index } #[inline] - fn dynamic_offset_mut(&mut self) -> &mut Option { - &mut self.dynamic_offset + fn batch_range_and_extra_index_mut(&mut self) -> (&mut Range, &mut PhaseItemExtraIndex) { + (&mut self.batch_range, &mut self.extra_index) } } diff --git a/crates/bevy_ui/src/render/ui_material_pipeline.rs b/crates/bevy_ui/src/render/ui_material_pipeline.rs index b4d13ac65ac13..b1beaaa5d11c6 100644 --- a/crates/bevy_ui/src/render/ui_material_pipeline.rs +++ b/crates/bevy_ui/src/render/ui_material_pipeline.rs @@ -671,7 +671,7 @@ pub fn queue_ui_material_nodes( entity.index(), ), batch_range: 0..0, - dynamic_offset: None, + extra_index: PhaseItemExtraIndex::NONE, }); } } diff --git a/examples/2d/mesh2d_manual.rs b/examples/2d/mesh2d_manual.rs index 9be4c11f8067a..343db4be1d153 100644 --- a/examples/2d/mesh2d_manual.rs +++ b/examples/2d/mesh2d_manual.rs @@ -13,7 +13,10 @@ use bevy::{ render::{ mesh::{GpuMesh, Indices, MeshVertexAttribute}, render_asset::{RenderAssetUsages, RenderAssets}, - render_phase::{AddRenderCommand, DrawFunctions, SetItemPipeline, SortedRenderPhase}, + render_phase::{ + AddRenderCommand, DrawFunctions, PhaseItemExtraIndex, SetItemPipeline, + SortedRenderPhase, + }, render_resource::{ BlendState, ColorTargetState, ColorWrites, Face, FragmentState, FrontFace, MultisampleState, PipelineCache, PolygonMode, PrimitiveState, PrimitiveTopology, @@ -392,7 +395,7 @@ pub fn queue_colored_mesh2d( sort_key: FloatOrd(mesh_z), // This material is not batched batch_range: 0..1, - dynamic_offset: None, + extra_index: PhaseItemExtraIndex::NONE, }); } } diff --git a/examples/3d/deferred_rendering.rs b/examples/3d/deferred_rendering.rs index 165d1de9098fc..6ee41a335a60b 100644 --- a/examples/3d/deferred_rendering.rs +++ b/examples/3d/deferred_rendering.rs @@ -7,9 +7,10 @@ use bevy::{ fxaa::Fxaa, prepass::{DeferredPrepass, DepthPrepass, MotionVectorPrepass, NormalPrepass}, }, - pbr::NotShadowReceiver, - pbr::{CascadeShadowConfigBuilder, DirectionalLightShadowMap}, - pbr::{DefaultOpaqueRendererMethod, NotShadowCaster, OpaqueRendererMethod}, + pbr::{ + CascadeShadowConfigBuilder, DefaultOpaqueRendererMethod, DirectionalLightShadowMap, + NotShadowCaster, NotShadowReceiver, OpaqueRendererMethod, + }, prelude::*, render::render_resource::TextureFormat, }; diff --git a/examples/shader/shader_instancing.rs b/examples/shader/shader_instancing.rs index b1c91c8973b07..17fd7823c95ad 100644 --- a/examples/shader/shader_instancing.rs +++ b/examples/shader/shader_instancing.rs @@ -15,8 +15,8 @@ use bevy::{ mesh::{GpuBufferInfo, GpuMesh, MeshVertexBufferLayoutRef}, render_asset::RenderAssets, render_phase::{ - AddRenderCommand, DrawFunctions, PhaseItem, RenderCommand, RenderCommandResult, - SetItemPipeline, SortedRenderPhase, TrackedRenderPass, + AddRenderCommand, DrawFunctions, PhaseItem, PhaseItemExtraIndex, RenderCommand, + RenderCommandResult, SetItemPipeline, SortedRenderPhase, TrackedRenderPass, }, render_resource::*, renderer::RenderDevice, @@ -144,7 +144,7 @@ fn queue_custom( draw_function: draw_custom, distance: rangefinder.distance_translation(&mesh_instance.translation), batch_range: 0..1, - dynamic_offset: None, + extra_index: PhaseItemExtraIndex::NONE, }); } } diff --git a/examples/stress_tests/many_cubes.rs b/examples/stress_tests/many_cubes.rs index 8d583d0535e6a..a10ced7d0f74b 100644 --- a/examples/stress_tests/many_cubes.rs +++ b/examples/stress_tests/many_cubes.rs @@ -20,7 +20,7 @@ use bevy::{ batching::NoAutomaticBatching, render_asset::RenderAssetUsages, render_resource::{Extent3d, TextureDimension, TextureFormat}, - view::NoFrustumCulling, + view::{GpuCulling, NoCpuCulling, NoFrustumCulling}, }, window::{PresentMode, WindowResolution}, winit::{UpdateMode, WinitSettings}, @@ -51,7 +51,7 @@ struct Args { #[argh(option, default = "1")] mesh_count: usize, - /// whether to disable frustum culling. Stresses queuing and batching as all mesh material entities in the scene are always drawn. + /// whether to disable all frustum culling. Stresses queuing and batching as all mesh material entities in the scene are always drawn. #[argh(switch)] no_frustum_culling: bool, @@ -59,6 +59,14 @@ struct Args { #[argh(switch)] no_automatic_batching: bool, + /// whether to enable GPU culling. + #[argh(switch)] + gpu_culling: bool, + + /// whether to disable CPU culling. + #[argh(switch)] + no_cpu_culling: bool, + /// whether to enable directional light cascaded shadow mapping. #[argh(switch)] shadows: bool, @@ -172,7 +180,14 @@ fn setup( } // camera - commands.spawn(Camera3dBundle::default()); + let mut camera = commands.spawn(Camera3dBundle::default()); + if args.gpu_culling { + camera.insert(GpuCulling); + } + if args.no_cpu_culling { + camera.insert(NoCpuCulling); + } + // Inside-out box around the meshes onto which shadows are cast (though you cannot see them...) commands.spawn(( PbrBundle {