diff --git a/Cargo.toml b/Cargo.toml index 476c27e0977be..7c9f3ad6aebe0 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -321,6 +321,12 @@ embedded_watcher = ["bevy_internal/embedded_watcher"] # Enable stepping-based debugging of Bevy systems bevy_debug_stepping = ["bevy_internal/bevy_debug_stepping"] +# Enables the meshlet renderer for dense high-poly scenes (experimental) +meshlet = ["bevy_internal/meshlet"] + +# Enables processing meshes into meshlet meshes for bevy_pbr +meshlet_processor = ["bevy_internal/meshlet_processor"] + # Enable support for the ios_simulator by downgrading some rendering capabilities ios_simulator = ["bevy_internal/ios_simulator"] @@ -950,6 +956,18 @@ description = "Demonstrates irradiance volumes" category = "3D Rendering" wasm = false +[[example]] +name = "meshlet" +path = "examples/3d/meshlet.rs" +doc-scrape-examples = true +required-features = ["meshlet"] + +[package.metadata.example.meshlet] +name = "Meshlet" +description = "Meshlet rendering for dense high-poly scenes (experimental)" +category = "3D Rendering" +wasm = false + [[example]] name = "lightmaps" path = "examples/3d/lightmaps.rs" diff --git a/assets/models/bunny.meshlet_mesh b/assets/models/bunny.meshlet_mesh new file mode 100644 index 0000000000000..735d002601293 Binary files /dev/null and b/assets/models/bunny.meshlet_mesh differ diff --git a/crates/bevy_core_pipeline/src/lib.rs b/crates/bevy_core_pipeline/src/lib.rs index d6ce9af95dab8..238f46e95a739 100644 --- a/crates/bevy_core_pipeline/src/lib.rs +++ b/crates/bevy_core_pipeline/src/lib.rs @@ -20,6 +20,8 @@ pub mod upscaling; pub use skybox::Skybox; /// Experimental features that are not yet finished. Please report any issues you encounter! +/// +/// Expect bugs, missing features, compatibility issues, low performance, and/or future breaking changes. pub mod experimental { pub mod taa { pub use crate::taa::{ diff --git a/crates/bevy_internal/Cargo.toml b/crates/bevy_internal/Cargo.toml index fc7eb39ff1ae9..ebb255a82aa60 100644 --- a/crates/bevy_internal/Cargo.toml +++ b/crates/bevy_internal/Cargo.toml @@ -158,6 +158,12 @@ bevy_debug_stepping = [ "bevy_app/bevy_debug_stepping", ] +# Enables the meshlet renderer for dense high-poly scenes (experimental) +meshlet = ["bevy_pbr?/meshlet"] + +# Enables processing meshes into meshlet meshes for bevy_pbr +meshlet_processor = ["bevy_pbr?/meshlet_processor"] + # Provides a collection of developer tools bevy_dev_tools = ["dep:bevy_dev_tools"] diff --git a/crates/bevy_pbr/Cargo.toml b/crates/bevy_pbr/Cargo.toml index 0b148fc2729bd..961c0a7ddcca3 100644 --- a/crates/bevy_pbr/Cargo.toml +++ b/crates/bevy_pbr/Cargo.toml @@ -15,6 +15,10 @@ pbr_transmission_textures = [] shader_format_glsl = ["bevy_render/shader_format_glsl"] trace = ["bevy_render/trace"] ios_simulator = ["bevy_render/ios_simulator"] +# Enables the meshlet renderer for dense high-poly scenes (experimental) +meshlet = [] +# Enables processing meshes into meshlet meshes +meshlet_processor = ["dep:meshopt", "dep:thiserror"] [dependencies] # bevy @@ -34,12 +38,17 @@ bevy_window = { path = "../bevy_window", version = "0.14.0-dev" } bevy_derive = { path = "../bevy_derive", version = "0.14.0-dev" } # other +meshopt = { version = "0.2", optional = true } +thiserror = { version = "1", optional = true } bitflags = "2.3" fixedbitset = "0.5" # direct dependency required for derive macro bytemuck = { version = "1", features = ["derive"] } radsort = "0.1" smallvec = "1.6" +serde = { version = "1", features = ["derive", "rc"] } +bincode = "1" +range-alloc = "0.1" nonmax = "0.5" [lints] diff --git a/crates/bevy_pbr/src/deferred/pbr_deferred_functions.wgsl b/crates/bevy_pbr/src/deferred/pbr_deferred_functions.wgsl index d8d923ede88c2..c02e55a24f8ff 100644 --- a/crates/bevy_pbr/src/deferred/pbr_deferred_functions.wgsl +++ b/crates/bevy_pbr/src/deferred/pbr_deferred_functions.wgsl @@ -7,10 +7,16 @@ rgb9e5, mesh_view_bindings::view, utils::{octahedral_encode, octahedral_decode}, - prepass_io::{VertexOutput, FragmentOutput}, + prepass_io::FragmentOutput, view_transformations::{position_ndc_to_world, frag_coord_to_ndc}, } +#ifdef MESHLET_MESH_MATERIAL_PASS +#import bevy_pbr::meshlet_visibility_buffer_resolve::VertexOutput +#else +#import bevy_pbr::prepass_io::VertexOutput +#endif + #ifdef MOTION_VECTOR_PREPASS #import bevy_pbr::pbr_prepass_functions::calculate_motion_vector #endif @@ -116,7 +122,11 @@ fn deferred_output(in: VertexOutput, pbr_input: PbrInput) -> FragmentOutput { #endif // motion vectors if required #ifdef MOTION_VECTOR_PREPASS +#ifdef MESHLET_MESH_MATERIAL_PASS + out.motion_vector = in.motion_vector; +#else out.motion_vector = calculate_motion_vector(in.world_position, in.previous_world_position); +#endif #endif return out; diff --git a/crates/bevy_pbr/src/extended_material.rs b/crates/bevy_pbr/src/extended_material.rs index 5ad2e1a8eb42b..a5c46ea6ffa8d 100644 --- a/crates/bevy_pbr/src/extended_material.rs +++ b/crates/bevy_pbr/src/extended_material.rs @@ -67,6 +67,30 @@ pub trait MaterialExtension: Asset + AsBindGroup + Clone + Sized { ShaderRef::Default } + /// Returns this material's [`crate::meshlet::MeshletMesh`] fragment shader. If [`ShaderRef::Default`] is returned, + /// the default meshlet mesh fragment shader will be used. + #[allow(unused_variables)] + #[cfg(feature = "meshlet")] + fn meshlet_mesh_fragment_shader() -> ShaderRef { + ShaderRef::Default + } + + /// Returns this material's [`crate::meshlet::MeshletMesh`] prepass fragment shader. If [`ShaderRef::Default`] is returned, + /// the default meshlet mesh prepass fragment shader will be used. + #[allow(unused_variables)] + #[cfg(feature = "meshlet")] + fn meshlet_mesh_prepass_fragment_shader() -> ShaderRef { + ShaderRef::Default + } + + /// Returns this material's [`crate::meshlet::MeshletMesh`] deferred fragment shader. If [`ShaderRef::Default`] is returned, + /// the default meshlet mesh deferred fragment shader will be used. + #[allow(unused_variables)] + #[cfg(feature = "meshlet")] + fn meshlet_mesh_deferred_fragment_shader() -> ShaderRef { + ShaderRef::Default + } + /// Customizes the default [`RenderPipelineDescriptor`] for a specific entity using the entity's /// [`MaterialPipelineKey`] and [`MeshVertexBufferLayoutRef`] as input. /// Specialization for the base material is applied before this function is called. @@ -211,6 +235,30 @@ impl Material for ExtendedMaterial { } } + #[cfg(feature = "meshlet")] + fn meshlet_mesh_fragment_shader() -> ShaderRef { + match E::meshlet_mesh_fragment_shader() { + ShaderRef::Default => B::meshlet_mesh_fragment_shader(), + specified => specified, + } + } + + #[cfg(feature = "meshlet")] + fn meshlet_mesh_prepass_fragment_shader() -> ShaderRef { + match E::meshlet_mesh_prepass_fragment_shader() { + ShaderRef::Default => B::meshlet_mesh_prepass_fragment_shader(), + specified => specified, + } + } + + #[cfg(feature = "meshlet")] + fn meshlet_mesh_deferred_fragment_shader() -> ShaderRef { + match E::meshlet_mesh_deferred_fragment_shader() { + ShaderRef::Default => B::meshlet_mesh_deferred_fragment_shader(), + specified => specified, + } + } + fn specialize( pipeline: &MaterialPipeline, descriptor: &mut RenderPipelineDescriptor, diff --git a/crates/bevy_pbr/src/lib.rs b/crates/bevy_pbr/src/lib.rs index 429aa85135e01..341ad0459c4b1 100644 --- a/crates/bevy_pbr/src/lib.rs +++ b/crates/bevy_pbr/src/lib.rs @@ -2,8 +2,20 @@ #![allow(missing_docs)] #![cfg_attr(docsrs, feature(doc_auto_cfg))] +#[cfg(feature = "meshlet")] +mod meshlet; pub mod wireframe; +/// Experimental features that are not yet finished. Please report any issues you encounter! +/// +/// Expect bugs, missing features, compatibility issues, low performance, and/or future breaking changes. +#[cfg(feature = "meshlet")] +pub mod experimental { + pub mod meshlet { + pub use crate::meshlet::*; + } +} + mod bundle; pub mod deferred; mod extended_material; @@ -107,6 +119,8 @@ pub const PBR_PREPASS_FUNCTIONS_SHADER_HANDLE: Handle = pub const PBR_DEFERRED_TYPES_HANDLE: Handle = Handle::weak_from_u128(3221241127431430599); pub const PBR_DEFERRED_FUNCTIONS_HANDLE: Handle = Handle::weak_from_u128(72019026415438599); pub const RGB9E5_FUNCTIONS_HANDLE: Handle = Handle::weak_from_u128(2659010996143919192); +const MESHLET_VISIBILITY_BUFFER_RESOLVE_SHADER_HANDLE: Handle = + Handle::weak_from_u128(2325134235233421); /// Sets up the entire PBR infrastructure of bevy. pub struct PbrPlugin { @@ -232,6 +246,13 @@ impl Plugin for PbrPlugin { "render/view_transformations.wgsl", Shader::from_wgsl ); + // Setup dummy shaders for when MeshletPlugin is not used to prevent shader import errors. + load_internal_asset!( + app, + MESHLET_VISIBILITY_BUFFER_RESOLVE_SHADER_HANDLE, + "meshlet/dummy_visibility_buffer_resolve.wgsl", + Shader::from_wgsl + ); app.register_asset_reflect::() .register_type::() diff --git a/crates/bevy_pbr/src/material.rs b/crates/bevy_pbr/src/material.rs index 96c32bfd6834c..4e42cde7f5696 100644 --- a/crates/bevy_pbr/src/material.rs +++ b/crates/bevy_pbr/src/material.rs @@ -1,3 +1,8 @@ +#[cfg(feature = "meshlet")] +use crate::meshlet::{ + prepare_material_meshlet_meshes_main_opaque_pass, queue_material_meshlet_meshes, + MeshletGpuScene, +}; use crate::*; use bevy_asset::{Asset, AssetEvent, AssetId, AssetServer}; use bevy_core_pipeline::{ @@ -170,6 +175,36 @@ pub trait Material: Asset + AsBindGroup + Clone + Sized { ShaderRef::Default } + /// Returns this material's [`crate::meshlet::MeshletMesh`] fragment shader. If [`ShaderRef::Default`] is returned, + /// the default meshlet mesh fragment shader will be used. + /// + /// This is part of an experimental feature, and is unnecessary to implement unless you are using `MeshletMesh`'s. + #[allow(unused_variables)] + #[cfg(feature = "meshlet")] + fn meshlet_mesh_fragment_shader() -> ShaderRef { + ShaderRef::Default + } + + /// Returns this material's [`crate::meshlet::MeshletMesh`] prepass fragment shader. If [`ShaderRef::Default`] is returned, + /// the default meshlet mesh prepass fragment shader will be used. + /// + /// This is part of an experimental feature, and is unnecessary to implement unless you are using `MeshletMesh`'s. + #[allow(unused_variables)] + #[cfg(feature = "meshlet")] + fn meshlet_mesh_prepass_fragment_shader() -> ShaderRef { + ShaderRef::Default + } + + /// Returns this material's [`crate::meshlet::MeshletMesh`] deferred fragment shader. If [`ShaderRef::Default`] is returned, + /// the default meshlet mesh deferred fragment shader will be used. + /// + /// This is part of an experimental feature, and is unnecessary to implement unless you are using `MeshletMesh`'s. + #[allow(unused_variables)] + #[cfg(feature = "meshlet")] + fn meshlet_mesh_deferred_fragment_shader() -> ShaderRef { + ShaderRef::Default + } + /// Customizes the default [`RenderPipelineDescriptor`] for a specific entity using the entity's /// [`MaterialPipelineKey`] and [`MeshVertexBufferLayoutRef`] as input. #[allow(unused_variables)] @@ -248,6 +283,18 @@ where .after(prepare_materials::),), ); } + + #[cfg(feature = "meshlet")] + render_app.add_systems( + Render, + ( + prepare_material_meshlet_meshes_main_opaque_pass::, + queue_material_meshlet_meshes::, + ) + .chain() + .in_set(RenderSet::Queue) + .run_if(resource_exists::), + ); } if self.shadows_enabled || self.prepass_enabled { diff --git a/crates/bevy_pbr/src/meshlet/asset.rs b/crates/bevy_pbr/src/meshlet/asset.rs new file mode 100644 index 0000000000000..b0c0cd89f19d6 --- /dev/null +++ b/crates/bevy_pbr/src/meshlet/asset.rs @@ -0,0 +1,102 @@ +use bevy_asset::{ + io::{Reader, Writer}, + saver::{AssetSaver, SavedAsset}, + Asset, AssetLoader, AsyncReadExt, AsyncWriteExt, LoadContext, +}; +use bevy_math::Vec3; +use bevy_reflect::TypePath; +use bytemuck::{Pod, Zeroable}; +use serde::{Deserialize, Serialize}; +use std::sync::Arc; + +/// A mesh that has been pre-processed into multiple small clusters of triangles called meshlets. +/// +/// A [`bevy_render::mesh::Mesh`] can be converted to a [`MeshletMesh`] using `MeshletMesh::from_mesh` when the `meshlet_processor` cargo feature is enabled. +/// The conversion step is very slow, and is meant to be ran once ahead of time, and not during runtime. This type of mesh is not suitable for +/// dynamically generated geometry. +/// +/// There are restrictions on the [`crate::Material`] functionality that can be used with this type of mesh. +/// * Materials have no control over the vertex shader or vertex attributes. +/// * Materials must be opaque. Transparent, alpha masked, and transmissive materials are not supported. +/// * Materials must use the [`crate::Material::meshlet_mesh_fragment_shader`] method (and similar variants for prepass/deferred shaders) +/// which requires certain shader patterns that differ from the regular material shaders. +/// * Limited control over [`bevy_render::render_resource::RenderPipelineDescriptor`] attributes. +/// +/// See also [`super::MaterialMeshletMeshBundle`] and [`super::MeshletPlugin`]. +#[derive(Asset, TypePath, Serialize, Deserialize, Clone)] +pub struct MeshletMesh { + /// The total amount of triangles summed across all meshlets in the mesh. + pub total_meshlet_triangles: u64, + /// Raw vertex data bytes for the overall mesh. + pub vertex_data: Arc<[u8]>, + /// Indices into `vertex_data`. + pub vertex_ids: Arc<[u32]>, + /// Indices into `vertex_ids`. + pub indices: Arc<[u8]>, + /// The list of meshlets making up this mesh. + pub meshlets: Arc<[Meshlet]>, + /// A list of spherical bounding volumes, 1 per meshlet. + pub meshlet_bounding_spheres: Arc<[MeshletBoundingSphere]>, +} + +/// A single meshlet within a [`MeshletMesh`]. +#[derive(Serialize, Deserialize, Copy, Clone, Pod, Zeroable)] +#[repr(C)] +pub struct Meshlet { + /// The offset within the parent mesh's [`MeshletMesh::vertex_ids`] buffer where the indices for this meshlet begin. + pub start_vertex_id: u32, + /// The offset within the parent mesh's [`MeshletMesh::indices`] buffer where the indices for this meshlet begin. + pub start_index_id: u32, + /// The amount of triangles in this meshlet. + pub triangle_count: u32, +} + +/// A spherical bounding volume used for culling a [`Meshlet`]. +#[derive(Serialize, Deserialize, Copy, Clone, Pod, Zeroable)] +#[repr(C)] +pub struct MeshletBoundingSphere { + pub center: Vec3, + pub radius: f32, +} + +/// An [`AssetLoader`] and [`AssetSaver`] for `.meshlet_mesh` [`MeshletMesh`] assets. +pub struct MeshletMeshSaverLoad; + +impl AssetLoader for MeshletMeshSaverLoad { + type Asset = MeshletMesh; + type Settings = (); + type Error = bincode::Error; + + async fn load<'a>( + &'a self, + reader: &'a mut Reader<'_>, + _settings: &'a Self::Settings, + _load_context: &'a mut LoadContext<'_>, + ) -> Result { + let mut bytes = Vec::new(); + reader.read_to_end(&mut bytes).await?; + bincode::deserialize(&bytes) + } + + fn extensions(&self) -> &[&str] { + &["meshlet_mesh"] + } +} + +impl AssetSaver for MeshletMeshSaverLoad { + type Asset = MeshletMesh; + type Settings = (); + type OutputLoader = Self; + type Error = bincode::Error; + + async fn save<'a>( + &'a self, + writer: &'a mut Writer, + asset: SavedAsset<'a, Self::Asset>, + _settings: &'a Self::Settings, + ) -> Result<(), Self::Error> { + let bytes = bincode::serialize(asset.get())?; + writer.write_all(&bytes).await?; + Ok(()) + } +} diff --git a/crates/bevy_pbr/src/meshlet/copy_material_depth.wgsl b/crates/bevy_pbr/src/meshlet/copy_material_depth.wgsl new file mode 100644 index 0000000000000..177cbc35a3424 --- /dev/null +++ b/crates/bevy_pbr/src/meshlet/copy_material_depth.wgsl @@ -0,0 +1,10 @@ +#import bevy_core_pipeline::fullscreen_vertex_shader::FullscreenVertexOutput + +@group(0) @binding(0) var material_depth: texture_2d; + +/// This pass copies the R16Uint material depth texture to an actual Depth16Unorm depth texture. + +@fragment +fn copy_material_depth(in: FullscreenVertexOutput) -> @builtin(frag_depth) f32 { + return f32(textureLoad(material_depth, vec2(in.position.xy), 0).r) / 65535.0; +} diff --git a/crates/bevy_pbr/src/meshlet/cull_meshlets.wgsl b/crates/bevy_pbr/src/meshlet/cull_meshlets.wgsl new file mode 100644 index 0000000000000..015ed6ee11ff3 --- /dev/null +++ b/crates/bevy_pbr/src/meshlet/cull_meshlets.wgsl @@ -0,0 +1,118 @@ +#import bevy_pbr::meshlet_bindings::{ + meshlet_thread_meshlet_ids, + meshlet_bounding_spheres, + meshlet_thread_instance_ids, + meshlet_instance_uniforms, + meshlet_occlusion, + view, + should_cull_instance, + get_meshlet_previous_occlusion, +} +#ifdef MESHLET_SECOND_CULLING_PASS +#import bevy_pbr::meshlet_bindings::depth_pyramid +#endif +#import bevy_render::maths::affine3_to_square + +/// Culls individual clusters (1 per thread) in two passes (two pass occlusion culling), and outputs a bitmask of which clusters survived. +/// 1. The first pass is only frustum culling, on only the clusters that were visible last frame. +/// 2. The second pass performs both frustum and occlusion culling (using the depth buffer generated from the first pass), on all clusters. + +@compute +@workgroup_size(128, 1, 1) // 128 threads per workgroup, 1 instanced meshlet per thread +fn cull_meshlets(@builtin(global_invocation_id) cluster_id: vec3) { + // Fetch the instanced meshlet data + if cluster_id.x >= arrayLength(&meshlet_thread_meshlet_ids) { return; } + let instance_id = meshlet_thread_instance_ids[cluster_id.x]; + if should_cull_instance(instance_id) { + return; + } + let meshlet_id = meshlet_thread_meshlet_ids[cluster_id.x]; + let bounding_sphere = meshlet_bounding_spheres[meshlet_id]; + let instance_uniform = meshlet_instance_uniforms[instance_id]; + let model = affine3_to_square(instance_uniform.model); + let model_scale = max(length(model[0]), max(length(model[1]), length(model[2]))); + let bounding_sphere_center = model * vec4(bounding_sphere.center, 1.0); + let bounding_sphere_radius = model_scale * bounding_sphere.radius; + + // In the first pass, operate only on the clusters visible last frame. In the second pass, operate on all clusters. +#ifdef MESHLET_SECOND_CULLING_PASS + var meshlet_visible = true; +#else + var meshlet_visible = get_meshlet_previous_occlusion(cluster_id.x); + if !meshlet_visible { return; } +#endif + + // Frustum culling + // TODO: Faster method from https://vkguide.dev/docs/gpudriven/compute_culling/#frustum-culling-function + for (var i = 0u; i < 6u; i++) { + if !meshlet_visible { break; } + meshlet_visible &= dot(view.frustum[i], bounding_sphere_center) > -bounding_sphere_radius; + } + +#ifdef MESHLET_SECOND_CULLING_PASS + // In the second culling pass, cull against the depth pyramid generated from the first pass + if meshlet_visible { + let bounding_sphere_center_view_space = (view.inverse_view * vec4(bounding_sphere_center.xyz, 1.0)).xyz; + let aabb = project_view_space_sphere_to_screen_space_aabb(bounding_sphere_center_view_space, bounding_sphere_radius); + + // Halve the AABB size because the first depth mip resampling pass cut the full screen resolution into a power of two conservatively + let depth_pyramid_size_mip_0 = vec2(textureDimensions(depth_pyramid, 0)) * 0.5; + let width = (aabb.z - aabb.x) * depth_pyramid_size_mip_0.x; + let height = (aabb.w - aabb.y) * depth_pyramid_size_mip_0.y; + let depth_level = max(0, i32(ceil(log2(max(width, height))))); // TODO: Naga doesn't like this being a u32 + let depth_pyramid_size = vec2(textureDimensions(depth_pyramid, depth_level)); + let aabb_top_left = vec2(aabb.xy * depth_pyramid_size); + + let depth_quad_a = textureLoad(depth_pyramid, aabb_top_left, depth_level).x; + let depth_quad_b = textureLoad(depth_pyramid, aabb_top_left + vec2(1u, 0u), depth_level).x; + let depth_quad_c = textureLoad(depth_pyramid, aabb_top_left + vec2(0u, 1u), depth_level).x; + let depth_quad_d = textureLoad(depth_pyramid, aabb_top_left + vec2(1u, 1u), depth_level).x; + + let occluder_depth = min(min(depth_quad_a, depth_quad_b), min(depth_quad_c, depth_quad_d)); + if view.projection[3][3] == 1.0 { + // Orthographic + let sphere_depth = view.projection[3][2] + (bounding_sphere_center_view_space.z + bounding_sphere_radius) * view.projection[2][2]; + meshlet_visible &= sphere_depth >= occluder_depth; + } else { + // Perspective + let sphere_depth = -view.projection[3][2] / (bounding_sphere_center_view_space.z + bounding_sphere_radius); + meshlet_visible &= sphere_depth >= occluder_depth; + } + } +#endif + + // Write the bitmask of whether or not the cluster was culled + let occlusion_bit = u32(meshlet_visible) << (cluster_id.x % 32u); + atomicOr(&meshlet_occlusion[cluster_id.x / 32u], occlusion_bit); +} + +// https://zeux.io/2023/01/12/approximate-projected-bounds +fn project_view_space_sphere_to_screen_space_aabb(cp: vec3, r: f32) -> vec4 { + let inv_width = view.projection[0][0] * 0.5; + let inv_height = view.projection[1][1] * 0.5; + if view.projection[3][3] == 1.0 { + // Orthographic + let min_x = cp.x - r; + let max_x = cp.x + r; + + let min_y = cp.y - r; + let max_y = cp.y + r; + + return vec4(min_x * inv_width, 1.0 - max_y * inv_height, max_x * inv_width, 1.0 - min_y * inv_height); + } else { + // Perspective + let c = vec3(cp.xy, -cp.z); + let cr = c * r; + let czr2 = c.z * c.z - r * r; + + let vx = sqrt(c.x * c.x + czr2); + let min_x = (vx * c.x - cr.z) / (vx * c.z + cr.x); + let max_x = (vx * c.x + cr.z) / (vx * c.z - cr.x); + + let vy = sqrt(c.y * c.y + czr2); + let min_y = (vy * c.y - cr.z) / (vy * c.z + cr.y); + let max_y = (vy * c.y + cr.z) / (vy * c.z - cr.y); + + return vec4(min_x * inv_width, -max_y * inv_height, max_x * inv_width, -min_y * inv_height) + vec4(0.5); + } +} diff --git a/crates/bevy_pbr/src/meshlet/downsample_depth.wgsl b/crates/bevy_pbr/src/meshlet/downsample_depth.wgsl new file mode 100644 index 0000000000000..fbb70bf31679f --- /dev/null +++ b/crates/bevy_pbr/src/meshlet/downsample_depth.wgsl @@ -0,0 +1,16 @@ +#import bevy_core_pipeline::fullscreen_vertex_shader::FullscreenVertexOutput + +@group(0) @binding(0) var input_depth: texture_2d; +@group(0) @binding(1) var samplr: sampler; + +/// Performs a 2x2 downsample on a depth texture to generate the next mip level of a hierarchical depth buffer. + +@fragment +fn downsample_depth(in: FullscreenVertexOutput) -> @location(0) vec4 { + let depth_quad = textureGather(0, input_depth, samplr, in.uv); + let downsampled_depth = min( + min(depth_quad.x, depth_quad.y), + min(depth_quad.z, depth_quad.w), + ); + return vec4(downsampled_depth, 0.0, 0.0, 0.0); +} diff --git a/crates/bevy_pbr/src/meshlet/dummy_visibility_buffer_resolve.wgsl b/crates/bevy_pbr/src/meshlet/dummy_visibility_buffer_resolve.wgsl new file mode 100644 index 0000000000000..243a4009976e4 --- /dev/null +++ b/crates/bevy_pbr/src/meshlet/dummy_visibility_buffer_resolve.wgsl @@ -0,0 +1,4 @@ +#define_import_path bevy_pbr::meshlet_visibility_buffer_resolve + +/// Dummy shader to prevent naga_oil from complaining about missing imports when the MeshletPlugin is not loaded, +/// as naga_oil tries to resolve imports even if they're behind an #ifdef. diff --git a/crates/bevy_pbr/src/meshlet/from_mesh.rs b/crates/bevy_pbr/src/meshlet/from_mesh.rs new file mode 100644 index 0000000000000..c794c11c23885 --- /dev/null +++ b/crates/bevy_pbr/src/meshlet/from_mesh.rs @@ -0,0 +1,98 @@ +use super::asset::{Meshlet, MeshletBoundingSphere, MeshletMesh}; +use bevy_render::{ + mesh::{Indices, Mesh}, + render_resource::PrimitiveTopology, +}; +use meshopt::{build_meshlets, compute_meshlet_bounds_decoder, VertexDataAdapter}; +use std::borrow::Cow; + +impl MeshletMesh { + /// Process a [`Mesh`] to generate a [`MeshletMesh`]. + /// + /// This process is very slow, and should be done ahead of time, and not at runtime. + /// + /// This function requires the `meshlet_processor` cargo feature. + /// + /// The input mesh must: + /// 1. Use [`PrimitiveTopology::TriangleList`] + /// 2. Use indices + /// 3. Have the exact following set of vertex attributes: `{POSITION, NORMAL, UV_0, TANGENT}` + pub fn from_mesh(mesh: &Mesh) -> Result { + // Validate mesh format + if mesh.primitive_topology() != PrimitiveTopology::TriangleList { + return Err(MeshToMeshletMeshConversionError::WrongMeshPrimitiveTopology); + } + if mesh.attributes().map(|(id, _)| id).ne([ + Mesh::ATTRIBUTE_POSITION.id, + Mesh::ATTRIBUTE_NORMAL.id, + Mesh::ATTRIBUTE_UV_0.id, + Mesh::ATTRIBUTE_TANGENT.id, + ]) { + return Err(MeshToMeshletMeshConversionError::WrongMeshVertexAttributes); + } + let indices = match mesh.indices() { + Some(Indices::U32(indices)) => Cow::Borrowed(indices.as_slice()), + Some(Indices::U16(indices)) => indices.iter().map(|i| *i as u32).collect(), + _ => return Err(MeshToMeshletMeshConversionError::MeshMissingIndices), + }; + let vertex_buffer = mesh.get_vertex_buffer_data(); + let vertices = + VertexDataAdapter::new(&vertex_buffer, mesh.get_vertex_size() as usize, 0).unwrap(); + + // Split the mesh into meshlets + let meshopt_meshlets = build_meshlets(&indices, &vertices, 64, 64, 0.0); + + // Calculate meshlet bounding spheres + let meshlet_bounding_spheres = meshopt_meshlets + .iter() + .map(|meshlet| { + compute_meshlet_bounds_decoder( + meshlet, + mesh.attribute(Mesh::ATTRIBUTE_POSITION) + .unwrap() + .as_float3() + .unwrap(), + ) + }) + .map(|bounds| MeshletBoundingSphere { + center: bounds.center.into(), + radius: bounds.radius, + }) + .collect(); + + // Assemble into the final asset + let mut total_meshlet_triangles = 0; + let meshlets = meshopt_meshlets + .meshlets + .into_iter() + .map(|m| { + total_meshlet_triangles += m.triangle_count as u64; + Meshlet { + start_vertex_id: m.vertex_offset, + start_index_id: m.triangle_offset, + triangle_count: m.triangle_count, + } + }) + .collect(); + + Ok(Self { + total_meshlet_triangles, + vertex_data: vertex_buffer.into(), + vertex_ids: meshopt_meshlets.vertices.into(), + indices: meshopt_meshlets.triangles.into(), + meshlets, + meshlet_bounding_spheres, + }) + } +} + +/// An error produced by [`MeshletMesh::from_mesh`]. +#[derive(thiserror::Error, Debug)] +pub enum MeshToMeshletMeshConversionError { + #[error("Mesh primitive topology was not TriangleList")] + WrongMeshPrimitiveTopology, + #[error("Mesh attributes were not {{POSITION, NORMAL, UV_0, TANGENT}}")] + WrongMeshVertexAttributes, + #[error("Mesh had no indices")] + MeshMissingIndices, +} diff --git a/crates/bevy_pbr/src/meshlet/gpu_scene.rs b/crates/bevy_pbr/src/meshlet/gpu_scene.rs new file mode 100644 index 0000000000000..492179dc2bcfe --- /dev/null +++ b/crates/bevy_pbr/src/meshlet/gpu_scene.rs @@ -0,0 +1,977 @@ +use super::{persistent_buffer::PersistentGpuBuffer, Meshlet, MeshletBoundingSphere, MeshletMesh}; +use crate::{ + Material, MeshFlags, MeshTransforms, MeshUniform, NotShadowCaster, NotShadowReceiver, + PreviousGlobalTransform, RenderMaterialInstances, ShadowView, +}; +use bevy_asset::{AssetEvent, AssetId, AssetServer, Assets, Handle, UntypedAssetId}; +use bevy_core_pipeline::core_3d::Camera3d; +use bevy_ecs::{ + component::Component, + entity::{Entity, EntityHashMap}, + event::EventReader, + query::{AnyOf, Has}, + system::{Commands, Query, Res, ResMut, Resource, SystemState}, + world::{FromWorld, World}, +}; +use bevy_render::{ + render_resource::{binding_types::*, *}, + renderer::{RenderDevice, RenderQueue}, + texture::{CachedTexture, TextureCache}, + view::{ExtractedView, RenderLayers, ViewDepthTexture, ViewUniform, ViewUniforms}, + MainWorld, +}; +use bevy_transform::components::GlobalTransform; +use bevy_utils::{default, HashMap, HashSet}; +use encase::internal::WriteInto; +use std::{ + iter, + mem::size_of, + ops::{DerefMut, Range}, + sync::Arc, +}; + +/// Create and queue for uploading to the GPU [`MeshUniform`] components for +/// [`MeshletMesh`] entities, as well as queuing uploads for any new meshlet mesh +/// assets that have not already been uploaded to the GPU. +pub fn extract_meshlet_meshes( + // TODO: Replace main_world when Extract>> is possible + mut main_world: ResMut, + mut gpu_scene: ResMut, +) { + let mut system_state: SystemState<( + Query<( + Entity, + &Handle, + &GlobalTransform, + Option<&PreviousGlobalTransform>, + Option<&RenderLayers>, + Has, + Has, + )>, + Res, + ResMut>, + EventReader>, + )> = SystemState::new(&mut main_world); + let (instances_query, asset_server, mut assets, mut asset_events) = + system_state.get_mut(&mut main_world); + + // Reset all temporary data for MeshletGpuScene + gpu_scene.reset(); + + // Free GPU buffer space for any modified or dropped MeshletMesh assets + for asset_event in asset_events.read() { + if let AssetEvent::Unused { id } | AssetEvent::Modified { id } = asset_event { + if let Some(( + [vertex_data_slice, vertex_ids_slice, indices_slice, meshlets_slice, meshlet_bounding_spheres_slice], + _, + )) = gpu_scene.meshlet_mesh_slices.remove(id) + { + gpu_scene.vertex_data.mark_slice_unused(vertex_data_slice); + gpu_scene.vertex_ids.mark_slice_unused(vertex_ids_slice); + gpu_scene.indices.mark_slice_unused(indices_slice); + gpu_scene.meshlets.mark_slice_unused(meshlets_slice); + gpu_scene + .meshlet_bounding_spheres + .mark_slice_unused(meshlet_bounding_spheres_slice); + } + } + } + + for ( + instance_index, + ( + instance, + handle, + transform, + previous_transform, + render_layers, + not_shadow_receiver, + not_shadow_caster, + ), + ) in instances_query.iter().enumerate() + { + // Skip instances with an unloaded MeshletMesh asset + if asset_server.is_managed(handle.id()) + && !asset_server.is_loaded_with_dependencies(handle.id()) + { + continue; + } + + // Upload the instance's MeshletMesh asset data, if not done already, along with other per-frame per-instance data. + gpu_scene.queue_meshlet_mesh_upload( + instance, + render_layers.cloned().unwrap_or(default()), + not_shadow_caster, + handle, + &mut assets, + instance_index as u32, + ); + + // Build a MeshUniform for each instance + let transform = transform.affine(); + let previous_transform = previous_transform.map(|t| t.0).unwrap_or(transform); + let mut flags = if not_shadow_receiver { + MeshFlags::empty() + } else { + MeshFlags::SHADOW_RECEIVER + }; + if transform.matrix3.determinant().is_sign_positive() { + flags |= MeshFlags::SIGN_DETERMINANT_MODEL_3X3; + } + let transforms = MeshTransforms { + transform: (&transform).into(), + previous_transform: (&previous_transform).into(), + flags: flags.bits(), + }; + gpu_scene + .instance_uniforms + .get_mut() + .push(MeshUniform::new(&transforms, None)); + } +} + +/// Upload all newly queued [`MeshletMesh`] asset data from [`extract_meshlet_meshes`] to the GPU. +pub fn perform_pending_meshlet_mesh_writes( + mut gpu_scene: ResMut, + render_queue: Res, + render_device: Res, +) { + gpu_scene + .vertex_data + .perform_writes(&render_queue, &render_device); + gpu_scene + .vertex_ids + .perform_writes(&render_queue, &render_device); + gpu_scene + .indices + .perform_writes(&render_queue, &render_device); + gpu_scene + .meshlets + .perform_writes(&render_queue, &render_device); + gpu_scene + .meshlet_bounding_spheres + .perform_writes(&render_queue, &render_device); +} + +/// For each entity in the scene, record what material ID (for use with depth testing during the meshlet mesh material draw nodes) +/// its material was assigned in the `prepare_material_meshlet_meshes` systems, and note that the material is used by at least one entity in the scene. +pub fn queue_material_meshlet_meshes( + mut gpu_scene: ResMut, + render_material_instances: Res>, +) { + // TODO: Ideally we could parallelize this system, both between different materials, and the loop over instances + let gpu_scene = gpu_scene.deref_mut(); + + for (i, (instance, _, _)) in gpu_scene.instances.iter().enumerate() { + if let Some(material_asset_id) = render_material_instances.get(instance) { + let material_asset_id = material_asset_id.untyped(); + if let Some(material_id) = gpu_scene.material_id_lookup.get(&material_asset_id) { + gpu_scene.material_ids_present_in_scene.insert(*material_id); + gpu_scene.instance_material_ids.get_mut()[i] = *material_id; + } + } + } +} + +// TODO: Try using Queue::write_buffer_with() in queue_meshlet_mesh_upload() to reduce copies +fn upload_storage_buffer( + buffer: &mut StorageBuffer>, + render_device: &RenderDevice, + render_queue: &RenderQueue, +) where + Vec: WriteInto, +{ + let inner = buffer.buffer(); + let capacity = inner.map_or(0, |b| b.size()); + let size = buffer.get().size().get() as BufferAddress; + + if capacity >= size { + let inner = inner.unwrap(); + let bytes = bytemuck::cast_slice(buffer.get().as_slice()); + render_queue.write_buffer(inner, 0, bytes); + } else { + buffer.write_buffer(render_device, render_queue); + } +} + +pub fn prepare_meshlet_per_frame_resources( + mut gpu_scene: ResMut, + views: Query<( + Entity, + &ExtractedView, + Option<&RenderLayers>, + AnyOf<(&Camera3d, &ShadowView)>, + )>, + mut texture_cache: ResMut, + render_queue: Res, + render_device: Res, + mut commands: Commands, +) { + gpu_scene + .previous_cluster_id_starts + .retain(|_, (_, active)| *active); + + if gpu_scene.scene_meshlet_count == 0 { + return; + } + + let gpu_scene = gpu_scene.as_mut(); + + gpu_scene + .instance_uniforms + .write_buffer(&render_device, &render_queue); + upload_storage_buffer( + &mut gpu_scene.instance_material_ids, + &render_device, + &render_queue, + ); + upload_storage_buffer( + &mut gpu_scene.thread_instance_ids, + &render_device, + &render_queue, + ); + upload_storage_buffer( + &mut gpu_scene.thread_meshlet_ids, + &render_device, + &render_queue, + ); + upload_storage_buffer( + &mut gpu_scene.previous_cluster_ids, + &render_device, + &render_queue, + ); + + let needed_buffer_size = 4 * gpu_scene.scene_triangle_count; + let visibility_buffer_draw_index_buffer = + match &mut gpu_scene.visibility_buffer_draw_index_buffer { + Some(buffer) if buffer.size() >= needed_buffer_size => buffer.clone(), + slot => { + let buffer = render_device.create_buffer(&BufferDescriptor { + label: Some("meshlet_visibility_buffer_draw_index_buffer"), + size: needed_buffer_size, + usage: BufferUsages::STORAGE | BufferUsages::INDEX, + mapped_at_creation: false, + }); + *slot = Some(buffer.clone()); + buffer + } + }; + + let needed_buffer_size = gpu_scene.scene_meshlet_count.div_ceil(32) as u64 * 4; + for (view_entity, view, render_layers, (_, shadow_view)) in &views { + let instance_visibility = gpu_scene + .view_instance_visibility + .entry(view_entity) + .or_insert_with(|| { + let mut buffer = StorageBuffer::default(); + buffer.set_label(Some("meshlet_view_instance_visibility")); + buffer + }); + for (instance_index, (_, layers, not_shadow_caster)) in + gpu_scene.instances.iter().enumerate() + { + // If either the layers don't match the view's layers or this is a shadow view + // and the instance is not a shadow caster, hide the instance for this view + if !render_layers.unwrap_or(&default()).intersects(layers) + || (shadow_view.is_some() && *not_shadow_caster) + { + let vec = instance_visibility.get_mut(); + let index = instance_index / 32; + let bit = instance_index - index * 32; + if vec.len() <= index { + vec.extend(iter::repeat(0).take(index - vec.len() + 1)); + } + vec[index] |= 1 << bit; + } + } + upload_storage_buffer(instance_visibility, &render_device, &render_queue); + let instance_visibility = instance_visibility.buffer().unwrap().clone(); + + // Early submission for GPU data uploads to start while the render graph records commands + render_queue.submit([]); + + let create_occlusion_buffer = || { + render_device.create_buffer(&BufferDescriptor { + label: Some("meshlet_occlusion_buffer"), + size: needed_buffer_size, + usage: BufferUsages::STORAGE | BufferUsages::COPY_DST, + mapped_at_creation: false, + }) + }; + let (previous_occlusion_buffer, occlusion_buffer, occlusion_buffer_needs_clearing) = + match gpu_scene.previous_occlusion_buffers.get(&view_entity) { + Some((buffer_a, buffer_b)) if buffer_b.size() >= needed_buffer_size => { + (buffer_a.clone(), buffer_b.clone(), true) + } + Some((buffer_a, _)) => (buffer_a.clone(), create_occlusion_buffer(), false), + None => (create_occlusion_buffer(), create_occlusion_buffer(), false), + }; + gpu_scene.previous_occlusion_buffers.insert( + view_entity, + (occlusion_buffer.clone(), previous_occlusion_buffer.clone()), + ); + + let visibility_buffer = TextureDescriptor { + label: Some("meshlet_visibility_buffer"), + size: Extent3d { + width: view.viewport.z, + height: view.viewport.w, + depth_or_array_layers: 1, + }, + mip_level_count: 1, + sample_count: 1, + dimension: TextureDimension::D2, + format: TextureFormat::R32Uint, + usage: TextureUsages::RENDER_ATTACHMENT | TextureUsages::TEXTURE_BINDING, + view_formats: &[], + }; + + let visibility_buffer_draw_indirect_args_first = + render_device.create_buffer_with_data(&BufferInitDescriptor { + label: Some("meshlet_visibility_buffer_draw_indirect_args_first"), + contents: DrawIndirectArgs { + vertex_count: 0, + instance_count: 1, + first_vertex: 0, + first_instance: 0, + } + .as_bytes(), + usage: BufferUsages::STORAGE | BufferUsages::INDIRECT, + }); + let visibility_buffer_draw_indirect_args_second = + render_device.create_buffer_with_data(&BufferInitDescriptor { + label: Some("meshlet_visibility_buffer_draw_indirect_args_second"), + contents: DrawIndirectArgs { + vertex_count: 0, + instance_count: 1, + first_vertex: 0, + first_instance: 0, + } + .as_bytes(), + usage: BufferUsages::STORAGE | BufferUsages::INDIRECT, + }); + + let depth_size = Extent3d { + // If not a power of 2, round down to the nearest power of 2 to ensure depth is conservative + width: previous_power_of_2(view.viewport.z), + height: previous_power_of_2(view.viewport.w), + depth_or_array_layers: 1, + }; + let depth_mip_count = depth_size.width.max(depth_size.height).ilog2() + 1; + let depth_pyramid = texture_cache.get( + &render_device, + TextureDescriptor { + label: Some("meshlet_depth_pyramid"), + size: depth_size, + mip_level_count: depth_mip_count, + sample_count: 1, + dimension: TextureDimension::D2, + format: TextureFormat::R32Float, + usage: TextureUsages::RENDER_ATTACHMENT | TextureUsages::TEXTURE_BINDING, + view_formats: &[], + }, + ); + let depth_pyramid_mips = (0..depth_mip_count) + .map(|i| { + depth_pyramid.texture.create_view(&TextureViewDescriptor { + label: Some("meshlet_depth_pyramid_texture_view"), + format: Some(TextureFormat::R32Float), + dimension: Some(TextureViewDimension::D2), + aspect: TextureAspect::All, + base_mip_level: i, + mip_level_count: Some(1), + base_array_layer: 0, + array_layer_count: None, + }) + }) + .collect(); + + let material_depth_color = TextureDescriptor { + label: Some("meshlet_material_depth_color"), + size: Extent3d { + width: view.viewport.z, + height: view.viewport.w, + depth_or_array_layers: 1, + }, + mip_level_count: 1, + sample_count: 1, + dimension: TextureDimension::D2, + format: TextureFormat::R16Uint, + usage: TextureUsages::RENDER_ATTACHMENT | TextureUsages::TEXTURE_BINDING, + view_formats: &[], + }; + + let material_depth = TextureDescriptor { + label: Some("meshlet_material_depth"), + size: Extent3d { + width: view.viewport.z, + height: view.viewport.w, + depth_or_array_layers: 1, + }, + mip_level_count: 1, + sample_count: 1, + dimension: TextureDimension::D2, + format: TextureFormat::Depth16Unorm, + usage: TextureUsages::RENDER_ATTACHMENT, + view_formats: &[], + }; + + let not_shadow_view = shadow_view.is_none(); + commands.entity(view_entity).insert(MeshletViewResources { + scene_meshlet_count: gpu_scene.scene_meshlet_count, + previous_occlusion_buffer, + occlusion_buffer, + occlusion_buffer_needs_clearing, + instance_visibility, + visibility_buffer: not_shadow_view + .then(|| texture_cache.get(&render_device, visibility_buffer)), + visibility_buffer_draw_indirect_args_first, + visibility_buffer_draw_indirect_args_second, + visibility_buffer_draw_index_buffer: visibility_buffer_draw_index_buffer.clone(), + depth_pyramid, + depth_pyramid_mips, + material_depth_color: not_shadow_view + .then(|| texture_cache.get(&render_device, material_depth_color)), + material_depth: not_shadow_view + .then(|| texture_cache.get(&render_device, material_depth)), + }); + } +} + +pub fn prepare_meshlet_view_bind_groups( + gpu_scene: Res, + views: Query<( + Entity, + &MeshletViewResources, + AnyOf<(&ViewDepthTexture, &ShadowView)>, + )>, + view_uniforms: Res, + render_device: Res, + mut commands: Commands, +) { + let Some(view_uniforms) = view_uniforms.uniforms.binding() else { + return; + }; + + for (view_entity, view_resources, view_depth) in &views { + let entries = BindGroupEntries::sequential(( + gpu_scene.thread_meshlet_ids.binding().unwrap(), + gpu_scene.meshlet_bounding_spheres.binding(), + gpu_scene.thread_instance_ids.binding().unwrap(), + gpu_scene.instance_uniforms.binding().unwrap(), + gpu_scene.view_instance_visibility[&view_entity] + .binding() + .unwrap(), + view_resources.occlusion_buffer.as_entire_binding(), + gpu_scene.previous_cluster_ids.binding().unwrap(), + view_resources.previous_occlusion_buffer.as_entire_binding(), + view_uniforms.clone(), + &view_resources.depth_pyramid.default_view, + )); + let culling = render_device.create_bind_group( + "meshlet_culling_bind_group", + &gpu_scene.culling_bind_group_layout, + &entries, + ); + + let entries = BindGroupEntries::sequential(( + view_resources.occlusion_buffer.as_entire_binding(), + gpu_scene.thread_meshlet_ids.binding().unwrap(), + gpu_scene.previous_cluster_ids.binding().unwrap(), + view_resources.previous_occlusion_buffer.as_entire_binding(), + gpu_scene.meshlets.binding(), + view_resources + .visibility_buffer_draw_indirect_args_first + .as_entire_binding(), + view_resources + .visibility_buffer_draw_index_buffer + .as_entire_binding(), + )); + let write_index_buffer_first = render_device.create_bind_group( + "meshlet_write_index_buffer_first_bind_group", + &gpu_scene.write_index_buffer_bind_group_layout, + &entries, + ); + + let entries = BindGroupEntries::sequential(( + view_resources.occlusion_buffer.as_entire_binding(), + gpu_scene.thread_meshlet_ids.binding().unwrap(), + gpu_scene.previous_cluster_ids.binding().unwrap(), + view_resources.previous_occlusion_buffer.as_entire_binding(), + gpu_scene.meshlets.binding(), + view_resources + .visibility_buffer_draw_indirect_args_second + .as_entire_binding(), + view_resources + .visibility_buffer_draw_index_buffer + .as_entire_binding(), + )); + let write_index_buffer_second = render_device.create_bind_group( + "meshlet_write_index_buffer_second_bind_group", + &gpu_scene.write_index_buffer_bind_group_layout, + &entries, + ); + + let view_depth_texture = match view_depth { + (Some(view_depth), None) => view_depth.view(), + (None, Some(shadow_view)) => &shadow_view.depth_attachment.view, + _ => unreachable!(), + }; + let downsample_depth = (0..view_resources.depth_pyramid_mips.len()) + .map(|i| { + render_device.create_bind_group( + "meshlet_downsample_depth_bind_group", + &gpu_scene.downsample_depth_bind_group_layout, + &BindGroupEntries::sequential(( + if i == 0 { + view_depth_texture + } else { + &view_resources.depth_pyramid_mips[i - 1] + }, + &gpu_scene.depth_pyramid_sampler, + )), + ) + }) + .collect(); + + let entries = BindGroupEntries::sequential(( + gpu_scene.thread_meshlet_ids.binding().unwrap(), + gpu_scene.meshlets.binding(), + gpu_scene.indices.binding(), + gpu_scene.vertex_ids.binding(), + gpu_scene.vertex_data.binding(), + gpu_scene.thread_instance_ids.binding().unwrap(), + gpu_scene.instance_uniforms.binding().unwrap(), + gpu_scene.instance_material_ids.binding().unwrap(), + view_resources + .visibility_buffer_draw_index_buffer + .as_entire_binding(), + view_uniforms.clone(), + )); + let visibility_buffer_raster = render_device.create_bind_group( + "meshlet_visibility_raster_buffer_bind_group", + &gpu_scene.visibility_buffer_raster_bind_group_layout, + &entries, + ); + + let copy_material_depth = + view_resources + .material_depth_color + .as_ref() + .map(|material_depth_color| { + render_device.create_bind_group( + "meshlet_copy_material_depth_bind_group", + &gpu_scene.copy_material_depth_bind_group_layout, + &[BindGroupEntry { + binding: 0, + resource: BindingResource::TextureView( + &material_depth_color.default_view, + ), + }], + ) + }); + + let material_draw = view_resources + .visibility_buffer + .as_ref() + .map(|visibility_buffer| { + let entries = BindGroupEntries::sequential(( + &visibility_buffer.default_view, + gpu_scene.thread_meshlet_ids.binding().unwrap(), + gpu_scene.meshlets.binding(), + gpu_scene.indices.binding(), + gpu_scene.vertex_ids.binding(), + gpu_scene.vertex_data.binding(), + gpu_scene.thread_instance_ids.binding().unwrap(), + gpu_scene.instance_uniforms.binding().unwrap(), + )); + render_device.create_bind_group( + "meshlet_mesh_material_draw_bind_group", + &gpu_scene.material_draw_bind_group_layout, + &entries, + ) + }); + + commands.entity(view_entity).insert(MeshletViewBindGroups { + culling, + write_index_buffer_first, + write_index_buffer_second, + downsample_depth, + visibility_buffer_raster, + copy_material_depth, + material_draw, + }); + } +} + +/// A resource that manages GPU data for rendering [`MeshletMesh`]'s. +#[derive(Resource)] +pub struct MeshletGpuScene { + vertex_data: PersistentGpuBuffer>, + vertex_ids: PersistentGpuBuffer>, + indices: PersistentGpuBuffer>, + meshlets: PersistentGpuBuffer>, + meshlet_bounding_spheres: PersistentGpuBuffer>, + meshlet_mesh_slices: HashMap, ([Range; 5], u64)>, + + scene_meshlet_count: u32, + scene_triangle_count: u64, + next_material_id: u32, + material_id_lookup: HashMap, + material_ids_present_in_scene: HashSet, + /// Per-instance Entity, RenderLayers, and NotShadowCaster + instances: Vec<(Entity, RenderLayers, bool)>, + /// Per-instance transforms, model matrices, and render flags + instance_uniforms: StorageBuffer>, + /// Per-view per-instance visibility bit. Used for RenderLayer and NotShadowCaster support. + view_instance_visibility: EntityHashMap>>, + instance_material_ids: StorageBuffer>, + thread_instance_ids: StorageBuffer>, + thread_meshlet_ids: StorageBuffer>, + previous_cluster_ids: StorageBuffer>, + previous_cluster_id_starts: HashMap<(Entity, AssetId), (u32, bool)>, + previous_occlusion_buffers: EntityHashMap<(Buffer, Buffer)>, + visibility_buffer_draw_index_buffer: Option, + + culling_bind_group_layout: BindGroupLayout, + write_index_buffer_bind_group_layout: BindGroupLayout, + visibility_buffer_raster_bind_group_layout: BindGroupLayout, + downsample_depth_bind_group_layout: BindGroupLayout, + copy_material_depth_bind_group_layout: BindGroupLayout, + material_draw_bind_group_layout: BindGroupLayout, + depth_pyramid_sampler: Sampler, +} + +impl FromWorld for MeshletGpuScene { + fn from_world(world: &mut World) -> Self { + let render_device = world.resource::(); + + Self { + vertex_data: PersistentGpuBuffer::new("meshlet_vertex_data", render_device), + vertex_ids: PersistentGpuBuffer::new("meshlet_vertex_ids", render_device), + indices: PersistentGpuBuffer::new("meshlet_indices", render_device), + meshlets: PersistentGpuBuffer::new("meshlets", render_device), + meshlet_bounding_spheres: PersistentGpuBuffer::new( + "meshlet_bounding_spheres", + render_device, + ), + meshlet_mesh_slices: HashMap::new(), + + scene_meshlet_count: 0, + scene_triangle_count: 0, + next_material_id: 0, + material_id_lookup: HashMap::new(), + material_ids_present_in_scene: HashSet::new(), + instances: Vec::new(), + instance_uniforms: { + let mut buffer = StorageBuffer::default(); + buffer.set_label(Some("meshlet_instance_uniforms")); + buffer + }, + view_instance_visibility: EntityHashMap::default(), + instance_material_ids: { + let mut buffer = StorageBuffer::default(); + buffer.set_label(Some("meshlet_instance_material_ids")); + buffer + }, + thread_instance_ids: { + let mut buffer = StorageBuffer::default(); + buffer.set_label(Some("meshlet_thread_instance_ids")); + buffer + }, + thread_meshlet_ids: { + let mut buffer = StorageBuffer::default(); + buffer.set_label(Some("meshlet_thread_meshlet_ids")); + buffer + }, + previous_cluster_ids: { + let mut buffer = StorageBuffer::default(); + buffer.set_label(Some("meshlet_previous_cluster_ids")); + buffer + }, + previous_cluster_id_starts: HashMap::new(), + previous_occlusion_buffers: EntityHashMap::default(), + visibility_buffer_draw_index_buffer: None, + + // TODO: Buffer min sizes + culling_bind_group_layout: render_device.create_bind_group_layout( + "meshlet_culling_bind_group_layout", + &BindGroupLayoutEntries::sequential( + ShaderStages::COMPUTE, + ( + storage_buffer_read_only_sized(false, None), + storage_buffer_read_only_sized(false, None), + storage_buffer_read_only_sized(false, None), + storage_buffer_read_only_sized(false, None), + storage_buffer_read_only_sized(false, None), + storage_buffer_sized(false, None), + storage_buffer_read_only_sized(false, None), + storage_buffer_read_only_sized(false, None), + uniform_buffer::(true), + texture_2d(TextureSampleType::Float { filterable: false }), + ), + ), + ), + write_index_buffer_bind_group_layout: render_device.create_bind_group_layout( + "meshlet_write_index_buffer_bind_group_layout", + &BindGroupLayoutEntries::sequential( + ShaderStages::COMPUTE, + ( + storage_buffer_read_only_sized(false, None), + storage_buffer_read_only_sized(false, None), + storage_buffer_read_only_sized(false, None), + storage_buffer_read_only_sized(false, None), + storage_buffer_read_only_sized(false, None), + storage_buffer_sized(false, None), + storage_buffer_sized(false, None), + ), + ), + ), + downsample_depth_bind_group_layout: render_device.create_bind_group_layout( + "meshlet_downsample_depth_bind_group_layout", + &BindGroupLayoutEntries::sequential( + ShaderStages::FRAGMENT, + ( + texture_2d(TextureSampleType::Float { filterable: false }), + sampler(SamplerBindingType::NonFiltering), + ), + ), + ), + visibility_buffer_raster_bind_group_layout: render_device.create_bind_group_layout( + "meshlet_visibility_buffer_raster_bind_group_layout", + &BindGroupLayoutEntries::sequential( + ShaderStages::VERTEX, + ( + storage_buffer_read_only_sized(false, None), + storage_buffer_read_only_sized(false, None), + storage_buffer_read_only_sized(false, None), + storage_buffer_read_only_sized(false, None), + storage_buffer_read_only_sized(false, None), + storage_buffer_read_only_sized(false, None), + storage_buffer_read_only_sized(false, None), + storage_buffer_read_only_sized(false, None), + storage_buffer_read_only_sized(false, None), + uniform_buffer::(true), + ), + ), + ), + copy_material_depth_bind_group_layout: render_device.create_bind_group_layout( + "meshlet_copy_material_depth_bind_group_layout", + &BindGroupLayoutEntries::single( + ShaderStages::FRAGMENT, + texture_2d(TextureSampleType::Uint), + ), + ), + material_draw_bind_group_layout: render_device.create_bind_group_layout( + "meshlet_mesh_material_draw_bind_group_layout", + &BindGroupLayoutEntries::sequential( + ShaderStages::FRAGMENT, + ( + texture_2d(TextureSampleType::Uint), + storage_buffer_read_only_sized(false, None), + storage_buffer_read_only_sized(false, None), + storage_buffer_read_only_sized(false, None), + storage_buffer_read_only_sized(false, None), + storage_buffer_read_only_sized(false, None), + storage_buffer_read_only_sized(false, None), + storage_buffer_read_only_sized(false, None), + ), + ), + ), + depth_pyramid_sampler: render_device.create_sampler(&SamplerDescriptor { + label: Some("meshlet_depth_pyramid_sampler"), + ..default() + }), + } + } +} + +impl MeshletGpuScene { + /// Clear per-frame CPU->GPU upload buffers and reset all per-frame data. + fn reset(&mut self) { + // TODO: Shrink capacity if saturation is low + self.scene_meshlet_count = 0; + self.scene_triangle_count = 0; + self.next_material_id = 0; + self.material_id_lookup.clear(); + self.material_ids_present_in_scene.clear(); + self.instances.clear(); + self.view_instance_visibility + .values_mut() + .for_each(|b| b.get_mut().clear()); + self.instance_uniforms.get_mut().clear(); + self.instance_material_ids.get_mut().clear(); + self.thread_instance_ids.get_mut().clear(); + self.thread_meshlet_ids.get_mut().clear(); + self.previous_cluster_ids.get_mut().clear(); + self.previous_cluster_id_starts + .values_mut() + .for_each(|(_, active)| *active = false); + // TODO: Remove unused entries for previous_occlusion_buffers + } + + fn queue_meshlet_mesh_upload( + &mut self, + instance: Entity, + render_layers: RenderLayers, + not_shadow_caster: bool, + handle: &Handle, + assets: &mut Assets, + instance_index: u32, + ) { + let queue_meshlet_mesh = |asset_id: &AssetId| { + let meshlet_mesh = assets.remove_untracked(*asset_id).expect( + "MeshletMesh asset was already unloaded but is not registered with MeshletGpuScene", + ); + + let vertex_data_slice = self + .vertex_data + .queue_write(Arc::clone(&meshlet_mesh.vertex_data), ()); + let vertex_ids_slice = self.vertex_ids.queue_write( + Arc::clone(&meshlet_mesh.vertex_ids), + vertex_data_slice.start, + ); + let indices_slice = self + .indices + .queue_write(Arc::clone(&meshlet_mesh.indices), ()); + let meshlets_slice = self.meshlets.queue_write( + Arc::clone(&meshlet_mesh.meshlets), + (vertex_ids_slice.start, indices_slice.start), + ); + let meshlet_bounding_spheres_slice = self + .meshlet_bounding_spheres + .queue_write(Arc::clone(&meshlet_mesh.meshlet_bounding_spheres), ()); + + ( + [ + vertex_data_slice, + vertex_ids_slice, + indices_slice, + meshlets_slice, + meshlet_bounding_spheres_slice, + ], + meshlet_mesh.total_meshlet_triangles, + ) + }; + + // Append instance data for this frame + self.instances + .push((instance, render_layers, not_shadow_caster)); + self.instance_material_ids.get_mut().push(0); + + // If the MeshletMesh asset has not been uploaded to the GPU yet, queue it for uploading + let ([_, _, _, meshlets_slice, _], triangle_count) = self + .meshlet_mesh_slices + .entry(handle.id()) + .or_insert_with_key(queue_meshlet_mesh) + .clone(); + + let meshlets_slice = (meshlets_slice.start as u32 / size_of::() as u32) + ..(meshlets_slice.end as u32 / size_of::() as u32); + + let current_cluster_id_start = self.scene_meshlet_count; + + self.scene_meshlet_count += meshlets_slice.end - meshlets_slice.start; + self.scene_triangle_count += triangle_count; + + // Calculate the previous cluster IDs for each meshlet for this instance + let previous_cluster_id_start = self + .previous_cluster_id_starts + .entry((instance, handle.id())) + .or_insert((0, true)); + let previous_cluster_ids = if previous_cluster_id_start.1 { + 0..(meshlets_slice.len() as u32) + } else { + let start = previous_cluster_id_start.0; + start..(meshlets_slice.len() as u32 + start) + }; + + // Append per-cluster data for this frame + self.thread_instance_ids + .get_mut() + .extend(std::iter::repeat(instance_index).take(meshlets_slice.len())); + self.thread_meshlet_ids.get_mut().extend(meshlets_slice); + self.previous_cluster_ids + .get_mut() + .extend(previous_cluster_ids); + + *previous_cluster_id_start = (current_cluster_id_start, true); + } + + /// Get the depth value for use with the material depth texture for a given [`Material`] asset. + pub fn get_material_id(&mut self, material_id: UntypedAssetId) -> u32 { + *self + .material_id_lookup + .entry(material_id) + .or_insert_with(|| { + self.next_material_id += 1; + self.next_material_id + }) + } + + pub fn material_present_in_scene(&self, material_id: &u32) -> bool { + self.material_ids_present_in_scene.contains(material_id) + } + + pub fn culling_bind_group_layout(&self) -> BindGroupLayout { + self.culling_bind_group_layout.clone() + } + + pub fn write_index_buffer_bind_group_layout(&self) -> BindGroupLayout { + self.write_index_buffer_bind_group_layout.clone() + } + + pub fn downsample_depth_bind_group_layout(&self) -> BindGroupLayout { + self.downsample_depth_bind_group_layout.clone() + } + + pub fn visibility_buffer_raster_bind_group_layout(&self) -> BindGroupLayout { + self.visibility_buffer_raster_bind_group_layout.clone() + } + + pub fn copy_material_depth_bind_group_layout(&self) -> BindGroupLayout { + self.copy_material_depth_bind_group_layout.clone() + } + + pub fn material_draw_bind_group_layout(&self) -> BindGroupLayout { + self.material_draw_bind_group_layout.clone() + } +} + +#[derive(Component)] +pub struct MeshletViewResources { + pub scene_meshlet_count: u32, + previous_occlusion_buffer: Buffer, + pub occlusion_buffer: Buffer, + pub occlusion_buffer_needs_clearing: bool, + pub instance_visibility: Buffer, + pub visibility_buffer: Option, + pub visibility_buffer_draw_indirect_args_first: Buffer, + pub visibility_buffer_draw_indirect_args_second: Buffer, + visibility_buffer_draw_index_buffer: Buffer, + pub depth_pyramid: CachedTexture, + pub depth_pyramid_mips: Box<[TextureView]>, + pub material_depth_color: Option, + pub material_depth: Option, +} + +#[derive(Component)] +pub struct MeshletViewBindGroups { + pub culling: BindGroup, + pub write_index_buffer_first: BindGroup, + pub write_index_buffer_second: BindGroup, + pub downsample_depth: Box<[BindGroup]>, + pub visibility_buffer_raster: BindGroup, + pub copy_material_depth: Option, + pub material_draw: Option, +} + +fn previous_power_of_2(x: u32) -> u32 { + // If x is a power of 2, halve it + if x.count_ones() == 1 { + x / 2 + } else { + // Else calculate the largest power of 2 that is less than x + 1 << (31 - x.leading_zeros()) + } +} diff --git a/crates/bevy_pbr/src/meshlet/material_draw_nodes.rs b/crates/bevy_pbr/src/meshlet/material_draw_nodes.rs new file mode 100644 index 0000000000000..f87751ba4b9c1 --- /dev/null +++ b/crates/bevy_pbr/src/meshlet/material_draw_nodes.rs @@ -0,0 +1,379 @@ +use super::{ + gpu_scene::{MeshletViewBindGroups, MeshletViewResources}, + material_draw_prepare::{ + MeshletViewMaterialsDeferredGBufferPrepass, MeshletViewMaterialsMainOpaquePass, + MeshletViewMaterialsPrepass, + }, + MeshletGpuScene, +}; +use crate::{ + MeshViewBindGroup, PrepassViewBindGroup, PreviousViewProjectionUniformOffset, + ViewFogUniformOffset, ViewLightProbesUniformOffset, ViewLightsUniformOffset, +}; +use bevy_core_pipeline::prepass::ViewPrepassTextures; +use bevy_ecs::{query::QueryItem, world::World}; +use bevy_render::{ + camera::ExtractedCamera, + render_graph::{NodeRunError, RenderGraphContext, ViewNode}, + render_resource::{ + LoadOp, Operations, PipelineCache, RenderPassDepthStencilAttachment, RenderPassDescriptor, + StoreOp, + }, + renderer::RenderContext, + view::{ViewTarget, ViewUniformOffset}, +}; + +/// Fullscreen shading pass based on the visibility buffer generated from rasterizing meshlets. +#[derive(Default)] +pub struct MeshletMainOpaquePass3dNode; +impl ViewNode for MeshletMainOpaquePass3dNode { + type ViewQuery = ( + &'static ExtractedCamera, + &'static ViewTarget, + &'static MeshViewBindGroup, + &'static ViewUniformOffset, + &'static ViewLightsUniformOffset, + &'static ViewFogUniformOffset, + &'static ViewLightProbesUniformOffset, + &'static MeshletViewMaterialsMainOpaquePass, + &'static MeshletViewBindGroups, + &'static MeshletViewResources, + ); + + fn run( + &self, + _graph: &mut RenderGraphContext, + render_context: &mut RenderContext, + ( + camera, + target, + mesh_view_bind_group, + view_uniform_offset, + view_lights_offset, + view_fog_offset, + view_light_probes_offset, + meshlet_view_materials, + meshlet_view_bind_groups, + meshlet_view_resources, + ): QueryItem, + world: &World, + ) -> Result<(), NodeRunError> { + if meshlet_view_materials.is_empty() { + return Ok(()); + } + + let ( + Some(meshlet_gpu_scene), + Some(pipeline_cache), + Some(meshlet_material_depth), + Some(meshlet_material_draw_bind_group), + ) = ( + world.get_resource::(), + world.get_resource::(), + meshlet_view_resources.material_depth.as_ref(), + meshlet_view_bind_groups.material_draw.as_ref(), + ) + else { + return Ok(()); + }; + + let mut render_pass = render_context.begin_tracked_render_pass(RenderPassDescriptor { + label: Some("meshlet_main_opaque_pass_3d"), + color_attachments: &[Some(target.get_color_attachment())], + depth_stencil_attachment: Some(RenderPassDepthStencilAttachment { + view: &meshlet_material_depth.default_view, + depth_ops: Some(Operations { + load: LoadOp::Load, + store: StoreOp::Store, + }), + stencil_ops: None, + }), + timestamp_writes: None, + occlusion_query_set: None, + }); + if let Some(viewport) = camera.viewport.as_ref() { + render_pass.set_camera_viewport(viewport); + } + + render_pass.set_bind_group( + 0, + &mesh_view_bind_group.value, + &[ + view_uniform_offset.offset, + view_lights_offset.offset, + view_fog_offset.offset, + **view_light_probes_offset, + ], + ); + render_pass.set_bind_group(1, meshlet_material_draw_bind_group, &[]); + + // 1 fullscreen triangle draw per material + for (material_id, material_pipeline_id, material_bind_group) in + meshlet_view_materials.iter() + { + if meshlet_gpu_scene.material_present_in_scene(material_id) { + if let Some(material_pipeline) = + pipeline_cache.get_render_pipeline(*material_pipeline_id) + { + let x = *material_id * 3; + render_pass.set_bind_group(2, material_bind_group, &[]); + render_pass.set_render_pipeline(material_pipeline); + render_pass.draw(x..(x + 3), 0..1); + } + } + } + + Ok(()) + } +} + +/// Fullscreen pass to generate prepass textures based on the visibility buffer generated from rasterizing meshlets. +#[derive(Default)] +pub struct MeshletPrepassNode; +impl ViewNode for MeshletPrepassNode { + type ViewQuery = ( + &'static ExtractedCamera, + &'static ViewPrepassTextures, + &'static ViewUniformOffset, + Option<&'static PreviousViewProjectionUniformOffset>, + &'static MeshletViewMaterialsPrepass, + &'static MeshletViewBindGroups, + &'static MeshletViewResources, + ); + + fn run( + &self, + _graph: &mut RenderGraphContext, + render_context: &mut RenderContext, + ( + camera, + view_prepass_textures, + view_uniform_offset, + previous_view_projection_uniform_offset, + meshlet_view_materials, + meshlet_view_bind_groups, + meshlet_view_resources, + ): QueryItem, + world: &World, + ) -> Result<(), NodeRunError> { + if meshlet_view_materials.is_empty() { + return Ok(()); + } + + let ( + Some(prepass_view_bind_group), + Some(meshlet_gpu_scene), + Some(pipeline_cache), + Some(meshlet_material_depth), + Some(meshlet_material_draw_bind_group), + ) = ( + world.get_resource::(), + world.get_resource::(), + world.get_resource::(), + meshlet_view_resources.material_depth.as_ref(), + meshlet_view_bind_groups.material_draw.as_ref(), + ) + else { + return Ok(()); + }; + + let color_attachments = vec![ + view_prepass_textures + .normal + .as_ref() + .map(|normals_texture| normals_texture.get_attachment()), + view_prepass_textures + .motion_vectors + .as_ref() + .map(|motion_vectors_texture| motion_vectors_texture.get_attachment()), + // Use None in place of Deferred attachments + None, + None, + ]; + + let mut render_pass = render_context.begin_tracked_render_pass(RenderPassDescriptor { + label: Some("meshlet_prepass"), + color_attachments: &color_attachments, + depth_stencil_attachment: Some(RenderPassDepthStencilAttachment { + view: &meshlet_material_depth.default_view, + depth_ops: Some(Operations { + load: LoadOp::Load, + store: StoreOp::Store, + }), + stencil_ops: None, + }), + timestamp_writes: None, + occlusion_query_set: None, + }); + if let Some(viewport) = camera.viewport.as_ref() { + render_pass.set_camera_viewport(viewport); + } + + if let Some(previous_view_projection_uniform_offset) = + previous_view_projection_uniform_offset + { + render_pass.set_bind_group( + 0, + prepass_view_bind_group.motion_vectors.as_ref().unwrap(), + &[ + view_uniform_offset.offset, + previous_view_projection_uniform_offset.offset, + ], + ); + } else { + render_pass.set_bind_group( + 0, + prepass_view_bind_group.no_motion_vectors.as_ref().unwrap(), + &[view_uniform_offset.offset], + ); + } + + render_pass.set_bind_group(1, meshlet_material_draw_bind_group, &[]); + + // 1 fullscreen triangle draw per material + for (material_id, material_pipeline_id, material_bind_group) in + meshlet_view_materials.iter() + { + if meshlet_gpu_scene.material_present_in_scene(material_id) { + if let Some(material_pipeline) = + pipeline_cache.get_render_pipeline(*material_pipeline_id) + { + let x = *material_id * 3; + render_pass.set_bind_group(2, material_bind_group, &[]); + render_pass.set_render_pipeline(material_pipeline); + render_pass.draw(x..(x + 3), 0..1); + } + } + } + + Ok(()) + } +} + +/// Fullscreen pass to generate a gbuffer based on the visibility buffer generated from rasterizing meshlets. +#[derive(Default)] +pub struct MeshletDeferredGBufferPrepassNode; +impl ViewNode for MeshletDeferredGBufferPrepassNode { + type ViewQuery = ( + &'static ExtractedCamera, + &'static ViewPrepassTextures, + &'static ViewUniformOffset, + Option<&'static PreviousViewProjectionUniformOffset>, + &'static MeshletViewMaterialsDeferredGBufferPrepass, + &'static MeshletViewBindGroups, + &'static MeshletViewResources, + ); + + fn run( + &self, + _graph: &mut RenderGraphContext, + render_context: &mut RenderContext, + ( + camera, + view_prepass_textures, + view_uniform_offset, + previous_view_projection_uniform_offset, + meshlet_view_materials, + meshlet_view_bind_groups, + meshlet_view_resources, + ): QueryItem, + world: &World, + ) -> Result<(), NodeRunError> { + if meshlet_view_materials.is_empty() { + return Ok(()); + } + + let ( + Some(prepass_view_bind_group), + Some(meshlet_gpu_scene), + Some(pipeline_cache), + Some(meshlet_material_depth), + Some(meshlet_material_draw_bind_group), + ) = ( + world.get_resource::(), + world.get_resource::(), + world.get_resource::(), + meshlet_view_resources.material_depth.as_ref(), + meshlet_view_bind_groups.material_draw.as_ref(), + ) + else { + return Ok(()); + }; + + let color_attachments = vec![ + view_prepass_textures + .normal + .as_ref() + .map(|normals_texture| normals_texture.get_attachment()), + view_prepass_textures + .motion_vectors + .as_ref() + .map(|motion_vectors_texture| motion_vectors_texture.get_attachment()), + view_prepass_textures + .deferred + .as_ref() + .map(|deferred_texture| deferred_texture.get_attachment()), + view_prepass_textures + .deferred_lighting_pass_id + .as_ref() + .map(|deferred_lighting_pass_id| deferred_lighting_pass_id.get_attachment()), + ]; + + let mut render_pass = render_context.begin_tracked_render_pass(RenderPassDescriptor { + label: Some("meshlet_deferred_prepass"), + color_attachments: &color_attachments, + depth_stencil_attachment: Some(RenderPassDepthStencilAttachment { + view: &meshlet_material_depth.default_view, + depth_ops: Some(Operations { + load: LoadOp::Load, + store: StoreOp::Store, + }), + stencil_ops: None, + }), + timestamp_writes: None, + occlusion_query_set: None, + }); + if let Some(viewport) = camera.viewport.as_ref() { + render_pass.set_camera_viewport(viewport); + } + + if let Some(previous_view_projection_uniform_offset) = + previous_view_projection_uniform_offset + { + render_pass.set_bind_group( + 0, + prepass_view_bind_group.motion_vectors.as_ref().unwrap(), + &[ + view_uniform_offset.offset, + previous_view_projection_uniform_offset.offset, + ], + ); + } else { + render_pass.set_bind_group( + 0, + prepass_view_bind_group.no_motion_vectors.as_ref().unwrap(), + &[view_uniform_offset.offset], + ); + } + + render_pass.set_bind_group(1, meshlet_material_draw_bind_group, &[]); + + // 1 fullscreen triangle draw per material + for (material_id, material_pipeline_id, material_bind_group) in + meshlet_view_materials.iter() + { + if meshlet_gpu_scene.material_present_in_scene(material_id) { + if let Some(material_pipeline) = + pipeline_cache.get_render_pipeline(*material_pipeline_id) + { + let x = *material_id * 3; + render_pass.set_bind_group(2, material_bind_group, &[]); + render_pass.set_render_pipeline(material_pipeline); + render_pass.draw(x..(x + 3), 0..1); + } + } + } + + Ok(()) + } +} diff --git a/crates/bevy_pbr/src/meshlet/material_draw_prepare.rs b/crates/bevy_pbr/src/meshlet/material_draw_prepare.rs new file mode 100644 index 0000000000000..937651834c10c --- /dev/null +++ b/crates/bevy_pbr/src/meshlet/material_draw_prepare.rs @@ -0,0 +1,405 @@ +use super::{MeshletGpuScene, MESHLET_MESH_MATERIAL_SHADER_HANDLE}; +use crate::{environment_map::EnvironmentMapLight, irradiance_volume::IrradianceVolume, *}; +use bevy_asset::AssetServer; +use bevy_core_pipeline::{ + core_3d::Camera3d, + prepass::{DeferredPrepass, DepthPrepass, MotionVectorPrepass, NormalPrepass}, + tonemapping::{DebandDither, Tonemapping}, +}; +use bevy_derive::{Deref, DerefMut}; +use bevy_render::{ + camera::TemporalJitter, + mesh::{Mesh, MeshVertexBufferLayout, MeshVertexBufferLayoutRef, MeshVertexBufferLayouts}, + render_resource::*, + view::ExtractedView, +}; +use bevy_utils::HashMap; +use std::hash::Hash; + +/// A list of `(Material ID, Pipeline, BindGroup)` for a view for use in [`super::MeshletMainOpaquePass3dNode`]. +#[derive(Component, Deref, DerefMut, Default)] +pub struct MeshletViewMaterialsMainOpaquePass(pub Vec<(u32, CachedRenderPipelineId, BindGroup)>); + +/// Prepare [`Material`] pipelines for [`super::MeshletMesh`] entities for use in [`super::MeshletMainOpaquePass3dNode`], +/// and register the material with [`MeshletGpuScene`]. +#[allow(clippy::too_many_arguments)] +pub fn prepare_material_meshlet_meshes_main_opaque_pass( + mut gpu_scene: ResMut, + mut cache: Local>, + pipeline_cache: Res, + material_pipeline: Res>, + mesh_pipeline: Res, + render_materials: Res>, + render_material_instances: Res>, + asset_server: Res, + mut mesh_vertex_buffer_layouts: ResMut, + mut views: Query< + ( + &mut MeshletViewMaterialsMainOpaquePass, + &ExtractedView, + Option<&Tonemapping>, + Option<&DebandDither>, + Option<&ShadowFilteringMethod>, + Has, + ( + Has, + Has, + Has, + Has, + ), + Has, + Option<&Projection>, + Has>, + Has>, + ), + With, + >, +) where + M::Data: PartialEq + Eq + Hash + Clone, +{ + let fake_vertex_buffer_layout = &fake_vertex_buffer_layout(&mut mesh_vertex_buffer_layouts); + + for ( + mut materials, + view, + tonemapping, + dither, + shadow_filter_method, + ssao, + (normal_prepass, depth_prepass, motion_vector_prepass, deferred_prepass), + temporal_jitter, + projection, + has_environment_maps, + has_irradiance_volumes, + ) in &mut views + { + let mut view_key = + MeshPipelineKey::from_msaa_samples(1) | MeshPipelineKey::from_hdr(view.hdr); + + if normal_prepass { + view_key |= MeshPipelineKey::NORMAL_PREPASS; + } + if depth_prepass { + view_key |= MeshPipelineKey::DEPTH_PREPASS; + } + if motion_vector_prepass { + view_key |= MeshPipelineKey::MOTION_VECTOR_PREPASS; + } + if deferred_prepass { + view_key |= MeshPipelineKey::DEFERRED_PREPASS; + } + + if temporal_jitter { + view_key |= MeshPipelineKey::TEMPORAL_JITTER; + } + + if has_environment_maps { + view_key |= MeshPipelineKey::ENVIRONMENT_MAP; + } + + if has_irradiance_volumes { + view_key |= MeshPipelineKey::IRRADIANCE_VOLUME; + } + + if let Some(projection) = projection { + view_key |= match projection { + Projection::Perspective(_) => MeshPipelineKey::VIEW_PROJECTION_PERSPECTIVE, + Projection::Orthographic(_) => MeshPipelineKey::VIEW_PROJECTION_ORTHOGRAPHIC, + }; + } + + match shadow_filter_method.unwrap_or(&ShadowFilteringMethod::default()) { + ShadowFilteringMethod::Hardware2x2 => { + view_key |= MeshPipelineKey::SHADOW_FILTER_METHOD_HARDWARE_2X2; + } + ShadowFilteringMethod::Castano13 => { + view_key |= MeshPipelineKey::SHADOW_FILTER_METHOD_CASTANO_13; + } + ShadowFilteringMethod::Jimenez14 => { + view_key |= MeshPipelineKey::SHADOW_FILTER_METHOD_JIMENEZ_14; + } + } + + if !view.hdr { + if let Some(tonemapping) = tonemapping { + view_key |= MeshPipelineKey::TONEMAP_IN_SHADER; + view_key |= tonemapping_pipeline_key(*tonemapping); + } + if let Some(DebandDither::Enabled) = dither { + view_key |= MeshPipelineKey::DEBAND_DITHER; + } + } + + if ssao { + view_key |= MeshPipelineKey::SCREEN_SPACE_AMBIENT_OCCLUSION; + } + + // TODO: Lightmaps + + view_key |= MeshPipelineKey::from_primitive_topology(PrimitiveTopology::TriangleList); + + for material_id in render_material_instances.values() { + let Some(material) = render_materials.get(material_id) else { + continue; + }; + + if material.properties.alpha_mode != AlphaMode::Opaque + || material.properties.reads_view_transmission_texture + { + continue; + } + + let Ok(material_pipeline_descriptor) = material_pipeline.specialize( + MaterialPipelineKey { + mesh_key: view_key, + bind_group_data: material.key.clone(), + }, + fake_vertex_buffer_layout, + ) else { + continue; + }; + let material_fragment = material_pipeline_descriptor.fragment.unwrap(); + + let mut shader_defs = material_fragment.shader_defs; + shader_defs.push("MESHLET_MESH_MATERIAL_PASS".into()); + + let pipeline_descriptor = RenderPipelineDescriptor { + label: material_pipeline_descriptor.label, + layout: vec![ + mesh_pipeline.get_view_layout(view_key.into()).clone(), + gpu_scene.material_draw_bind_group_layout(), + material_pipeline.material_layout.clone(), + ], + push_constant_ranges: vec![], + vertex: VertexState { + shader: MESHLET_MESH_MATERIAL_SHADER_HANDLE, + shader_defs: shader_defs.clone(), + entry_point: material_pipeline_descriptor.vertex.entry_point, + buffers: Vec::new(), + }, + primitive: PrimitiveState::default(), + depth_stencil: Some(DepthStencilState { + format: TextureFormat::Depth16Unorm, + depth_write_enabled: false, + depth_compare: CompareFunction::Equal, + stencil: StencilState::default(), + bias: DepthBiasState::default(), + }), + multisample: MultisampleState::default(), + fragment: Some(FragmentState { + shader: match M::meshlet_mesh_fragment_shader() { + ShaderRef::Default => MESHLET_MESH_MATERIAL_SHADER_HANDLE, + ShaderRef::Handle(handle) => handle, + ShaderRef::Path(path) => asset_server.load(path), + }, + shader_defs, + entry_point: material_fragment.entry_point, + targets: material_fragment.targets, + }), + }; + + let material_id = gpu_scene.get_material_id(material_id.untyped()); + + let pipeline_id = *cache.entry(view_key).or_insert_with(|| { + pipeline_cache.queue_render_pipeline(pipeline_descriptor.clone()) + }); + materials.push((material_id, pipeline_id, material.bind_group.clone())); + } + } +} + +/// A list of `(Material ID, Pipeline, BindGroup)` for a view for use in [`super::MeshletPrepassNode`]. +#[derive(Component, Deref, DerefMut, Default)] +pub struct MeshletViewMaterialsPrepass(pub Vec<(u32, CachedRenderPipelineId, BindGroup)>); + +/// A list of `(Material ID, Pipeline, BindGroup)` for a view for use in [`super::MeshletDeferredGBufferPrepassNode`]. +#[derive(Component, Deref, DerefMut, Default)] +pub struct MeshletViewMaterialsDeferredGBufferPrepass( + pub Vec<(u32, CachedRenderPipelineId, BindGroup)>, +); + +/// Prepare [`Material`] pipelines for [`super::MeshletMesh`] entities for use in [`super::MeshletPrepassNode`], +/// and [`super::MeshletDeferredGBufferPrepassNode`] and register the material with [`MeshletGpuScene`]. +#[allow(clippy::too_many_arguments)] +pub fn prepare_material_meshlet_meshes_prepass( + mut gpu_scene: ResMut, + mut cache: Local>, + pipeline_cache: Res, + prepass_pipeline: Res>, + render_materials: Res>, + render_material_instances: Res>, + mut mesh_vertex_buffer_layouts: ResMut, + asset_server: Res, + mut views: Query< + ( + &mut MeshletViewMaterialsPrepass, + &mut MeshletViewMaterialsDeferredGBufferPrepass, + &ExtractedView, + AnyOf<(&NormalPrepass, &MotionVectorPrepass, &DeferredPrepass)>, + ), + With, + >, +) where + M::Data: PartialEq + Eq + Hash + Clone, +{ + let fake_vertex_buffer_layout = &fake_vertex_buffer_layout(&mut mesh_vertex_buffer_layouts); + + for ( + mut materials, + mut deferred_materials, + view, + (normal_prepass, motion_vector_prepass, deferred_prepass), + ) in &mut views + { + let mut view_key = + MeshPipelineKey::from_msaa_samples(1) | MeshPipelineKey::from_hdr(view.hdr); + + if normal_prepass.is_some() { + view_key |= MeshPipelineKey::NORMAL_PREPASS; + } + if motion_vector_prepass.is_some() { + view_key |= MeshPipelineKey::MOTION_VECTOR_PREPASS; + } + + view_key |= MeshPipelineKey::from_primitive_topology(PrimitiveTopology::TriangleList); + + for material_id in render_material_instances.values() { + let Some(material) = render_materials.get(material_id) else { + continue; + }; + + if material.properties.alpha_mode != AlphaMode::Opaque + || material.properties.reads_view_transmission_texture + { + continue; + } + + let material_wants_deferred = matches!( + material.properties.render_method, + OpaqueRendererMethod::Deferred + ); + if deferred_prepass.is_some() && material_wants_deferred { + view_key |= MeshPipelineKey::DEFERRED_PREPASS; + } else if normal_prepass.is_none() && motion_vector_prepass.is_none() { + continue; + } + + let Ok(material_pipeline_descriptor) = prepass_pipeline.specialize( + MaterialPipelineKey { + mesh_key: view_key, + bind_group_data: material.key.clone(), + }, + fake_vertex_buffer_layout, + ) else { + continue; + }; + let material_fragment = material_pipeline_descriptor.fragment.unwrap(); + + let mut shader_defs = material_fragment.shader_defs; + shader_defs.push("MESHLET_MESH_MATERIAL_PASS".into()); + + let view_layout = if view_key.contains(MeshPipelineKey::MOTION_VECTOR_PREPASS) { + prepass_pipeline.view_layout_motion_vectors.clone() + } else { + prepass_pipeline.view_layout_no_motion_vectors.clone() + }; + + let fragment_shader = if view_key.contains(MeshPipelineKey::DEFERRED_PREPASS) { + M::meshlet_mesh_deferred_fragment_shader() + } else { + M::meshlet_mesh_prepass_fragment_shader() + }; + + let entry_point = match fragment_shader { + ShaderRef::Default => "prepass_fragment".into(), + _ => material_fragment.entry_point, + }; + + let pipeline_descriptor = RenderPipelineDescriptor { + label: material_pipeline_descriptor.label, + layout: vec![ + view_layout, + gpu_scene.material_draw_bind_group_layout(), + prepass_pipeline.material_layout.clone(), + ], + push_constant_ranges: vec![], + vertex: VertexState { + shader: MESHLET_MESH_MATERIAL_SHADER_HANDLE, + shader_defs: shader_defs.clone(), + entry_point: material_pipeline_descriptor.vertex.entry_point, + buffers: Vec::new(), + }, + primitive: PrimitiveState::default(), + depth_stencil: Some(DepthStencilState { + format: TextureFormat::Depth16Unorm, + depth_write_enabled: false, + depth_compare: CompareFunction::Equal, + stencil: StencilState::default(), + bias: DepthBiasState::default(), + }), + multisample: MultisampleState::default(), + fragment: Some(FragmentState { + shader: match fragment_shader { + ShaderRef::Default => MESHLET_MESH_MATERIAL_SHADER_HANDLE, + ShaderRef::Handle(handle) => handle, + ShaderRef::Path(path) => asset_server.load(path), + }, + shader_defs, + entry_point, + targets: material_fragment.targets, + }), + }; + + let material_id = gpu_scene.get_material_id(material_id.untyped()); + + let pipeline_id = *cache.entry(view_key).or_insert_with(|| { + pipeline_cache.queue_render_pipeline(pipeline_descriptor.clone()) + }); + + let item = (material_id, pipeline_id, material.bind_group.clone()); + if view_key.contains(MeshPipelineKey::DEFERRED_PREPASS) { + deferred_materials.push(item); + } else { + materials.push(item); + } + } + } +} + +// Meshlet materials don't use a traditional vertex buffer, but the material specialization requires one. +fn fake_vertex_buffer_layout(layouts: &mut MeshVertexBufferLayouts) -> MeshVertexBufferLayoutRef { + layouts.insert(MeshVertexBufferLayout::new( + vec![ + Mesh::ATTRIBUTE_POSITION.id, + Mesh::ATTRIBUTE_NORMAL.id, + Mesh::ATTRIBUTE_UV_0.id, + Mesh::ATTRIBUTE_TANGENT.id, + ], + VertexBufferLayout { + array_stride: 48, + step_mode: VertexStepMode::Vertex, + attributes: vec![ + VertexAttribute { + format: Mesh::ATTRIBUTE_POSITION.format, + offset: 0, + shader_location: 0, + }, + VertexAttribute { + format: Mesh::ATTRIBUTE_NORMAL.format, + offset: 12, + shader_location: 1, + }, + VertexAttribute { + format: Mesh::ATTRIBUTE_UV_0.format, + offset: 24, + shader_location: 2, + }, + VertexAttribute { + format: Mesh::ATTRIBUTE_TANGENT.format, + offset: 32, + shader_location: 3, + }, + ], + }, + )) +} diff --git a/crates/bevy_pbr/src/meshlet/meshlet_bindings.wgsl b/crates/bevy_pbr/src/meshlet/meshlet_bindings.wgsl new file mode 100644 index 0000000000000..0d9bc3144345c --- /dev/null +++ b/crates/bevy_pbr/src/meshlet/meshlet_bindings.wgsl @@ -0,0 +1,130 @@ +#define_import_path bevy_pbr::meshlet_bindings + +#import bevy_pbr::mesh_types::Mesh +#import bevy_render::view::View + +struct PackedMeshletVertex { + a: vec4, + b: vec4, + tangent: vec4, +} + +// TODO: Octahedral encode normal, remove tangent and derive from UV derivatives +struct MeshletVertex { + position: vec3, + normal: vec3, + uv: vec2, + tangent: vec4, +} + +fn unpack_meshlet_vertex(packed: PackedMeshletVertex) -> MeshletVertex { + var vertex: MeshletVertex; + vertex.position = packed.a.xyz; + vertex.normal = vec3(packed.a.w, packed.b.xy); + vertex.uv = packed.b.zw; + vertex.tangent = packed.tangent; + return vertex; +} + +struct Meshlet { + start_vertex_id: u32, + start_index_id: u32, + triangle_count: u32, +} + +struct MeshletBoundingSphere { + center: vec3, + radius: f32, +} + +struct DrawIndirectArgs { + vertex_count: atomic, + instance_count: u32, + first_vertex: u32, + first_instance: u32, +} + +#ifdef MESHLET_CULLING_PASS +@group(0) @binding(0) var meshlet_thread_meshlet_ids: array; // Per cluster (instance of a meshlet) +@group(0) @binding(1) var meshlet_bounding_spheres: array; // Per asset meshlet +@group(0) @binding(2) var meshlet_thread_instance_ids: array; // Per cluster (instance of a meshlet) +@group(0) @binding(3) var meshlet_instance_uniforms: array; // Per entity instance +@group(0) @binding(4) var meshlet_view_instance_visibility: array; // 1 bit per entity instance, packed as a bitmask +@group(0) @binding(5) var meshlet_occlusion: array>; // 1 bit per cluster (instance of a meshlet), packed as a bitmask +@group(0) @binding(6) var meshlet_previous_cluster_ids: array; // Per cluster (instance of a meshlet) +@group(0) @binding(7) var meshlet_previous_occlusion: array; // 1 bit per cluster (instance of a meshlet), packed as a bitmask +@group(0) @binding(8) var view: View; +@group(0) @binding(9) var depth_pyramid: texture_2d; // Generated from the first raster pass (unused in the first pass but still bound) + +fn should_cull_instance(instance_id: u32) -> bool { + let bit_offset = instance_id % 32u; + let packed_visibility = meshlet_view_instance_visibility[instance_id / 32u]; + return bool(extractBits(packed_visibility, bit_offset, 1u)); +} + +fn get_meshlet_previous_occlusion(cluster_id: u32) -> bool { + let previous_cluster_id = meshlet_previous_cluster_ids[cluster_id]; + let packed_occlusion = meshlet_previous_occlusion[previous_cluster_id / 32u]; + let bit_offset = previous_cluster_id % 32u; + return bool(extractBits(packed_occlusion, bit_offset, 1u)); +} +#endif + +#ifdef MESHLET_WRITE_INDEX_BUFFER_PASS +@group(0) @binding(0) var meshlet_occlusion: array; // 1 bit per cluster (instance of a meshlet), packed as a bitmask +@group(0) @binding(1) var meshlet_thread_meshlet_ids: array; // Per cluster (instance of a meshlet) +@group(0) @binding(2) var meshlet_previous_cluster_ids: array; // Per cluster (instance of a meshlet) +@group(0) @binding(3) var meshlet_previous_occlusion: array; // 1 bit per cluster (instance of a meshlet), packed as a bitmask +@group(0) @binding(4) var meshlets: array; // Per asset meshlet +@group(0) @binding(5) var draw_indirect_args: DrawIndirectArgs; // Single object shared between all workgroups/meshlets/triangles +@group(0) @binding(6) var draw_index_buffer: array; // Single object shared between all workgroups/meshlets/triangles + +fn get_meshlet_occlusion(cluster_id: u32) -> bool { + let packed_occlusion = meshlet_occlusion[cluster_id / 32u]; + let bit_offset = cluster_id % 32u; + return bool(extractBits(packed_occlusion, bit_offset, 1u)); +} + +fn get_meshlet_previous_occlusion(cluster_id: u32) -> bool { + let previous_cluster_id = meshlet_previous_cluster_ids[cluster_id]; + let packed_occlusion = meshlet_previous_occlusion[previous_cluster_id / 32u]; + let bit_offset = previous_cluster_id % 32u; + return bool(extractBits(packed_occlusion, bit_offset, 1u)); +} +#endif + +#ifdef MESHLET_VISIBILITY_BUFFER_RASTER_PASS +@group(0) @binding(0) var meshlet_thread_meshlet_ids: array; // Per cluster (instance of a meshlet) +@group(0) @binding(1) var meshlets: array; // Per asset meshlet +@group(0) @binding(2) var meshlet_indices: array; // Many per asset meshlet +@group(0) @binding(3) var meshlet_vertex_ids: array; // Many per asset meshlet +@group(0) @binding(4) var meshlet_vertex_data: array; // Many per asset meshlet +@group(0) @binding(5) var meshlet_thread_instance_ids: array; // Per cluster (instance of a meshlet) +@group(0) @binding(6) var meshlet_instance_uniforms: array; // Per entity instance +@group(0) @binding(7) var meshlet_instance_material_ids: array; // Per entity instance +@group(0) @binding(8) var draw_index_buffer: array; // Single object shared between all workgroups/meshlets/triangles +@group(0) @binding(9) var view: View; + +fn get_meshlet_index(index_id: u32) -> u32 { + let packed_index = meshlet_indices[index_id / 4u]; + let bit_offset = (index_id % 4u) * 8u; + return extractBits(packed_index, bit_offset, 8u); +} +#endif + +#ifdef MESHLET_MESH_MATERIAL_PASS +@group(1) @binding(0) var meshlet_visibility_buffer: texture_2d; // Generated from the meshlet raster passes +@group(1) @binding(1) var meshlet_thread_meshlet_ids: array; // Per cluster (instance of a meshlet) +@group(1) @binding(2) var meshlets: array; // Per asset meshlet +@group(1) @binding(3) var meshlet_indices: array; // Many per asset meshlet +@group(1) @binding(4) var meshlet_vertex_ids: array; // Many per asset meshlet +@group(1) @binding(5) var meshlet_vertex_data: array; // Many per asset meshlet +@group(1) @binding(6) var meshlet_thread_instance_ids: array; // Per cluster (instance of a meshlet) +@group(1) @binding(7) var meshlet_instance_uniforms: array; // Per entity instance + +fn get_meshlet_index(index_id: u32) -> u32 { + let packed_index = meshlet_indices[index_id / 4u]; + let bit_offset = (index_id % 4u) * 8u; + return extractBits(packed_index, bit_offset, 8u); +} +#endif diff --git a/crates/bevy_pbr/src/meshlet/meshlet_mesh_material.wgsl b/crates/bevy_pbr/src/meshlet/meshlet_mesh_material.wgsl new file mode 100644 index 0000000000000..ec67868aad0df --- /dev/null +++ b/crates/bevy_pbr/src/meshlet/meshlet_mesh_material.wgsl @@ -0,0 +1,52 @@ +#import bevy_pbr::{ + meshlet_visibility_buffer_resolve::resolve_vertex_output, + view_transformations::uv_to_ndc, + prepass_io, + pbr_prepass_functions, + utils::rand_f, +} + +@vertex +fn vertex(@builtin(vertex_index) vertex_input: u32) -> @builtin(position) vec4 { + let vertex_index = vertex_input % 3u; + let material_id = vertex_input / 3u; + let material_depth = f32(material_id) / 65535.0; + let uv = vec2(vec2(vertex_index >> 1u, vertex_index & 1u)) * 2.0; + return vec4(uv_to_ndc(uv), material_depth, 1.0); +} + +@fragment +fn fragment(@builtin(position) frag_coord: vec4) -> @location(0) vec4 { + let vertex_output = resolve_vertex_output(frag_coord); + var rng = vertex_output.meshlet_id; + let color = vec3(rand_f(&rng), rand_f(&rng), rand_f(&rng)); + return vec4(color, 1.0); +} + +#ifdef PREPASS_FRAGMENT +@fragment +fn prepass_fragment(@builtin(position) frag_coord: vec4) -> prepass_io::FragmentOutput { + let vertex_output = resolve_vertex_output(frag_coord); + + var out: prepass_io::FragmentOutput; + +#ifdef NORMAL_PREPASS + out.normal = vec4(vertex_output.world_normal * 0.5 + vec3(0.5), 1.0); +#endif + +#ifdef MOTION_VECTOR_PREPASS + out.motion_vector = vertex_output.motion_vector; +#endif + +#ifdef DEFERRED_PREPASS + // There isn't any material info available for this default prepass shader so we are just writing  + // emissive magenta out to the deferred gbuffer to be rendered by the first deferred lighting pass layer. + // This is here so if the default prepass fragment is used for deferred magenta will be rendered, and also + // as an example to show that a user could write to the deferred gbuffer if they were to start from this shader. + out.deferred = vec4(0u, bevy_pbr::rgb9e5::vec3_to_rgb9e5_(vec3(1.0, 0.0, 1.0)), 0u, 0u); + out.deferred_lighting_pass_id = 1u; +#endif + + return out; +} +#endif diff --git a/crates/bevy_pbr/src/meshlet/meshlet_preview.png b/crates/bevy_pbr/src/meshlet/meshlet_preview.png new file mode 100644 index 0000000000000..2c319a8987720 Binary files /dev/null and b/crates/bevy_pbr/src/meshlet/meshlet_preview.png differ diff --git a/crates/bevy_pbr/src/meshlet/mod.rs b/crates/bevy_pbr/src/meshlet/mod.rs new file mode 100644 index 0000000000000..128a183c98edc --- /dev/null +++ b/crates/bevy_pbr/src/meshlet/mod.rs @@ -0,0 +1,280 @@ +//! Render high-poly 3d meshes using an efficient GPU-driven method. See [`MeshletPlugin`] and [`MeshletMesh`] for details. + +mod asset; +#[cfg(feature = "meshlet_processor")] +mod from_mesh; +mod gpu_scene; +mod material_draw_nodes; +mod material_draw_prepare; +mod persistent_buffer; +mod persistent_buffer_impls; +mod pipelines; +mod visibility_buffer_raster_node; + +pub mod graph { + use bevy_render::render_graph::RenderLabel; + + #[derive(Debug, Hash, PartialEq, Eq, Clone, RenderLabel)] + pub enum NodeMeshlet { + VisibilityBufferRasterPass, + Prepass, + DeferredPrepass, + MainOpaquePass, + } +} + +pub(crate) use self::{ + gpu_scene::{queue_material_meshlet_meshes, MeshletGpuScene}, + material_draw_prepare::{ + prepare_material_meshlet_meshes_main_opaque_pass, prepare_material_meshlet_meshes_prepass, + }, +}; + +pub use self::asset::{Meshlet, MeshletBoundingSphere, MeshletMesh}; +#[cfg(feature = "meshlet_processor")] +pub use self::from_mesh::MeshToMeshletMeshConversionError; + +use self::{ + asset::MeshletMeshSaverLoad, + gpu_scene::{ + extract_meshlet_meshes, perform_pending_meshlet_mesh_writes, + prepare_meshlet_per_frame_resources, prepare_meshlet_view_bind_groups, + }, + graph::NodeMeshlet, + material_draw_nodes::{ + MeshletDeferredGBufferPrepassNode, MeshletMainOpaquePass3dNode, MeshletPrepassNode, + }, + material_draw_prepare::{ + MeshletViewMaterialsDeferredGBufferPrepass, MeshletViewMaterialsMainOpaquePass, + MeshletViewMaterialsPrepass, + }, + pipelines::{ + MeshletPipelines, MESHLET_COPY_MATERIAL_DEPTH_SHADER_HANDLE, MESHLET_CULLING_SHADER_HANDLE, + MESHLET_DOWNSAMPLE_DEPTH_SHADER_HANDLE, MESHLET_VISIBILITY_BUFFER_RASTER_SHADER_HANDLE, + MESHLET_WRITE_INDEX_BUFFER_SHADER_HANDLE, + }, + visibility_buffer_raster_node::MeshletVisibilityBufferRasterPassNode, +}; +use crate::{graph::NodePbr, Material}; +use bevy_app::{App, Plugin}; +use bevy_asset::{load_internal_asset, AssetApp, Handle}; +use bevy_core_pipeline::{ + core_3d::{ + graph::{Core3d, Node3d}, + Camera3d, + }, + prepass::{DeferredPrepass, MotionVectorPrepass, NormalPrepass}, +}; +use bevy_ecs::{ + bundle::Bundle, + entity::Entity, + query::Has, + schedule::IntoSystemConfigs, + system::{Commands, Query}, +}; +use bevy_render::{ + render_graph::{RenderGraphApp, ViewNodeRunner}, + render_resource::{Shader, TextureUsages}, + view::{prepare_view_targets, InheritedVisibility, Msaa, ViewVisibility, Visibility}, + ExtractSchedule, Render, RenderApp, RenderSet, +}; +use bevy_transform::components::{GlobalTransform, Transform}; + +const MESHLET_BINDINGS_SHADER_HANDLE: Handle = Handle::weak_from_u128(1325134235233421); +const MESHLET_MESH_MATERIAL_SHADER_HANDLE: Handle = + Handle::weak_from_u128(3325134235233421); + +/// Provides a plugin for rendering large amounts of high-poly 3d meshes using an efficient GPU-driven method. See also [`MeshletMesh`]. +/// +/// Rendering dense scenes made of high-poly meshes with thousands or millions of triangles is extremely expensive in Bevy's standard renderer. +/// Once meshes are pre-processed into a [`MeshletMesh`], this plugin can render these kinds of scenes very efficiently. +/// +/// In comparison to Bevy's standard renderer: +/// * Minimal rendering work is done on the CPU. All rendering is GPU-driven. +/// * Much more efficient culling. Meshlets can be culled individually, instead of all or nothing culling for entire meshes at a time. +/// Additionally, occlusion culling can eliminate meshlets that would cause overdraw. +/// * Much more efficient batching. All geometry can be rasterized in a single indirect draw. +/// * Scales better with large amounts of dense geometry and overdraw. Bevy's standard renderer will bottleneck sooner. +/// * Much greater base overhead. Rendering will be slower than Bevy's standard renderer with small amounts of geometry and overdraw. +/// * Much greater memory usage. +/// * Requires preprocessing meshes. See [`MeshletMesh`] for details. +/// * More limitations on the kinds of materials you can use. See [`MeshletMesh`] for details. +/// +/// This plugin is not compatible with [`Msaa`], and adding this plugin will disable it. +/// +/// This plugin does not work on the WebGL2 backend. +/// +/// ![A render of the Stanford dragon as a `MeshletMesh`](https://raw.githubusercontent.com/bevyengine/bevy/meshlet/crates/bevy_pbr/src/meshlet/meshlet_preview.png) +pub struct MeshletPlugin; + +impl Plugin for MeshletPlugin { + fn build(&self, app: &mut App) { + load_internal_asset!( + app, + MESHLET_BINDINGS_SHADER_HANDLE, + "meshlet_bindings.wgsl", + Shader::from_wgsl + ); + load_internal_asset!( + app, + super::MESHLET_VISIBILITY_BUFFER_RESOLVE_SHADER_HANDLE, + "visibility_buffer_resolve.wgsl", + Shader::from_wgsl + ); + load_internal_asset!( + app, + MESHLET_CULLING_SHADER_HANDLE, + "cull_meshlets.wgsl", + Shader::from_wgsl + ); + load_internal_asset!( + app, + MESHLET_WRITE_INDEX_BUFFER_SHADER_HANDLE, + "write_index_buffer.wgsl", + Shader::from_wgsl + ); + load_internal_asset!( + app, + MESHLET_DOWNSAMPLE_DEPTH_SHADER_HANDLE, + "downsample_depth.wgsl", + Shader::from_wgsl + ); + load_internal_asset!( + app, + MESHLET_VISIBILITY_BUFFER_RASTER_SHADER_HANDLE, + "visibility_buffer_raster.wgsl", + Shader::from_wgsl + ); + load_internal_asset!( + app, + MESHLET_MESH_MATERIAL_SHADER_HANDLE, + "meshlet_mesh_material.wgsl", + Shader::from_wgsl + ); + load_internal_asset!( + app, + MESHLET_COPY_MATERIAL_DEPTH_SHADER_HANDLE, + "copy_material_depth.wgsl", + Shader::from_wgsl + ); + + app.init_asset::() + .register_asset_loader(MeshletMeshSaverLoad) + .insert_resource(Msaa::Off); + } + + fn finish(&self, app: &mut App) { + let Ok(render_app) = app.get_sub_app_mut(RenderApp) else { + return; + }; + + render_app + .add_render_graph_node::( + Core3d, + NodeMeshlet::VisibilityBufferRasterPass, + ) + .add_render_graph_node::>( + Core3d, + NodeMeshlet::Prepass, + ) + .add_render_graph_node::>( + Core3d, + NodeMeshlet::DeferredPrepass, + ) + .add_render_graph_node::>( + Core3d, + NodeMeshlet::MainOpaquePass, + ) + .add_render_graph_edges( + Core3d, + ( + NodeMeshlet::VisibilityBufferRasterPass, + NodePbr::ShadowPass, + NodeMeshlet::Prepass, + NodeMeshlet::DeferredPrepass, + Node3d::Prepass, + Node3d::DeferredPrepass, + Node3d::CopyDeferredLightingId, + Node3d::EndPrepasses, + Node3d::StartMainPass, + NodeMeshlet::MainOpaquePass, + Node3d::MainOpaquePass, + Node3d::EndMainPass, + ), + ) + .init_resource::() + .init_resource::() + .add_systems(ExtractSchedule, extract_meshlet_meshes) + .add_systems( + Render, + ( + perform_pending_meshlet_mesh_writes.in_set(RenderSet::PrepareAssets), + configure_meshlet_views + .after(prepare_view_targets) + .in_set(RenderSet::ManageViews), + prepare_meshlet_per_frame_resources.in_set(RenderSet::PrepareResources), + prepare_meshlet_view_bind_groups.in_set(RenderSet::PrepareBindGroups), + ), + ); + } +} + +/// A component bundle for entities with a [`MeshletMesh`] and a [`Material`]. +#[derive(Bundle, Clone)] +pub struct MaterialMeshletMeshBundle { + pub meshlet_mesh: Handle, + pub material: Handle, + pub transform: Transform, + pub global_transform: GlobalTransform, + /// User indication of whether an entity is visible + pub visibility: Visibility, + /// Inherited visibility of an entity. + pub inherited_visibility: InheritedVisibility, + /// Algorithmically-computed indication of whether an entity is visible and should be extracted for rendering + pub view_visibility: ViewVisibility, +} + +impl Default for MaterialMeshletMeshBundle { + fn default() -> Self { + Self { + meshlet_mesh: Default::default(), + material: Default::default(), + transform: Default::default(), + global_transform: Default::default(), + visibility: Default::default(), + inherited_visibility: Default::default(), + view_visibility: Default::default(), + } + } +} + +fn configure_meshlet_views( + mut views_3d: Query<( + Entity, + &mut Camera3d, + Has, + Has, + Has, + )>, + mut commands: Commands, +) { + for (entity, mut camera_3d, normal_prepass, motion_vector_prepass, deferred_prepass) in + &mut views_3d + { + let mut usages: TextureUsages = camera_3d.depth_texture_usages.into(); + usages |= TextureUsages::TEXTURE_BINDING; + camera_3d.depth_texture_usages = usages.into(); + + if !(normal_prepass || motion_vector_prepass || deferred_prepass) { + commands + .entity(entity) + .insert(MeshletViewMaterialsMainOpaquePass::default()); + } else { + commands.entity(entity).insert(( + MeshletViewMaterialsMainOpaquePass::default(), + MeshletViewMaterialsPrepass::default(), + MeshletViewMaterialsDeferredGBufferPrepass::default(), + )); + } + } +} diff --git a/crates/bevy_pbr/src/meshlet/persistent_buffer.rs b/crates/bevy_pbr/src/meshlet/persistent_buffer.rs new file mode 100644 index 0000000000000..eccce560dca55 --- /dev/null +++ b/crates/bevy_pbr/src/meshlet/persistent_buffer.rs @@ -0,0 +1,124 @@ +use bevy_render::{ + render_resource::{ + BindingResource, Buffer, BufferAddress, BufferDescriptor, BufferUsages, + CommandEncoderDescriptor, + }, + renderer::{RenderDevice, RenderQueue}, +}; +use range_alloc::RangeAllocator; +use std::{num::NonZeroU64, ops::Range}; + +/// Wrapper for a GPU buffer holding a large amount of data that persists across frames. +pub struct PersistentGpuBuffer { + /// Debug label for the buffer. + label: &'static str, + /// Handle to the GPU buffer. + buffer: Buffer, + /// Tracks free slices of the buffer. + allocation_planner: RangeAllocator, + /// Queue of pending writes, and associated metadata. + write_queue: Vec<(T, T::Metadata, Range)>, +} + +impl PersistentGpuBuffer { + /// Create a new persistent buffer. + pub fn new(label: &'static str, render_device: &RenderDevice) -> Self { + Self { + label, + buffer: render_device.create_buffer(&BufferDescriptor { + label: Some(label), + size: 0, + usage: BufferUsages::STORAGE | BufferUsages::COPY_DST | BufferUsages::COPY_SRC, + mapped_at_creation: false, + }), + allocation_planner: RangeAllocator::new(0..0), + write_queue: Vec::new(), + } + } + + /// Queue an item of type T to be added to the buffer, returning the byte range within the buffer that it will be located at. + pub fn queue_write(&mut self, data: T, metadata: T::Metadata) -> Range { + let data_size = data.size_in_bytes() as u64; + if let Ok(buffer_slice) = self.allocation_planner.allocate_range(data_size) { + self.write_queue + .push((data, metadata, buffer_slice.clone())); + return buffer_slice; + } + + let buffer_size = self.allocation_planner.initial_range(); + let double_buffer_size = (buffer_size.end - buffer_size.start) * 2; + let new_size = double_buffer_size.max(data_size); + self.allocation_planner.grow_to(buffer_size.end + new_size); + + let buffer_slice = self.allocation_planner.allocate_range(data_size).unwrap(); + self.write_queue + .push((data, metadata, buffer_slice.clone())); + buffer_slice + } + + /// Upload all pending data to the GPU buffer. + pub fn perform_writes(&mut self, render_queue: &RenderQueue, render_device: &RenderDevice) { + if self.allocation_planner.initial_range().end > self.buffer.size() { + self.expand_buffer(render_device, render_queue); + } + + let queue_count = self.write_queue.len(); + + for (data, metadata, buffer_slice) in self.write_queue.drain(..) { + let buffer_slice_size = NonZeroU64::new(buffer_slice.end - buffer_slice.start).unwrap(); + let mut buffer_view = render_queue + .write_buffer_with(&self.buffer, buffer_slice.start, buffer_slice_size) + .unwrap(); + data.write_bytes_le(metadata, &mut buffer_view); + } + + let queue_saturation = queue_count as f32 / self.write_queue.capacity() as f32; + if queue_saturation < 0.3 { + self.write_queue = Vec::new(); + } + } + + /// Mark a section of the GPU buffer as no longer needed. + pub fn mark_slice_unused(&mut self, buffer_slice: Range) { + self.allocation_planner.free_range(buffer_slice); + } + + pub fn binding(&self) -> BindingResource<'_> { + self.buffer.as_entire_binding() + } + + /// Expand the buffer by creating a new buffer and copying old data over. + fn expand_buffer(&mut self, render_device: &RenderDevice, render_queue: &RenderQueue) { + let size = self.allocation_planner.initial_range(); + let new_buffer = render_device.create_buffer(&BufferDescriptor { + label: Some(self.label), + size: size.end - size.start, + usage: BufferUsages::STORAGE | BufferUsages::COPY_DST | BufferUsages::COPY_SRC, + mapped_at_creation: false, + }); + + let mut command_encoder = render_device.create_command_encoder(&CommandEncoderDescriptor { + label: Some("persistent_gpu_buffer_expand"), + }); + command_encoder.copy_buffer_to_buffer(&self.buffer, 0, &new_buffer, 0, self.buffer.size()); + render_queue.submit([command_encoder.finish()]); + + self.buffer = new_buffer; + } +} + +/// A trait representing data that can be written to a [`PersistentGpuBuffer`]. +/// +/// # Safety +/// * All data must be a multiple of `wgpu::COPY_BUFFER_ALIGNMENT` bytes. +/// * The amount of bytes written to `buffer` in `write_bytes_le()` must match `size_in_bytes()`. +pub unsafe trait PersistentGpuBufferable { + /// Additional metadata associated with each item, made available during `write_bytes_le`. + type Metadata; + + /// The size in bytes of `self`. + fn size_in_bytes(&self) -> usize; + + /// Convert `self` + `metadata` into bytes (little-endian), and write to the provided buffer slice. + fn write_bytes_le(&self, metadata: Self::Metadata, buffer_slice: &mut [u8]); +} diff --git a/crates/bevy_pbr/src/meshlet/persistent_buffer_impls.rs b/crates/bevy_pbr/src/meshlet/persistent_buffer_impls.rs new file mode 100644 index 0000000000000..0567246b3543f --- /dev/null +++ b/crates/bevy_pbr/src/meshlet/persistent_buffer_impls.rs @@ -0,0 +1,77 @@ +#![allow(clippy::undocumented_unsafe_blocks)] + +use super::{persistent_buffer::PersistentGpuBufferable, Meshlet, MeshletBoundingSphere}; +use std::{mem::size_of, sync::Arc}; + +const MESHLET_VERTEX_SIZE_IN_BYTES: u32 = 48; + +unsafe impl PersistentGpuBufferable for Arc<[u8]> { + type Metadata = (); + + fn size_in_bytes(&self) -> usize { + self.len() + } + + fn write_bytes_le(&self, _: Self::Metadata, buffer_slice: &mut [u8]) { + buffer_slice.clone_from_slice(self); + } +} + +unsafe impl PersistentGpuBufferable for Arc<[u32]> { + type Metadata = u64; + + fn size_in_bytes(&self) -> usize { + self.len() * size_of::() + } + + fn write_bytes_le(&self, offset: Self::Metadata, buffer_slice: &mut [u8]) { + let offset = offset as u32 / MESHLET_VERTEX_SIZE_IN_BYTES; + + for (i, index) in self.iter().enumerate() { + let size = size_of::(); + let i = i * size; + let bytes = (*index + offset).to_le_bytes(); + buffer_slice[i..(i + size)].clone_from_slice(&bytes); + } + } +} + +unsafe impl PersistentGpuBufferable for Arc<[Meshlet]> { + type Metadata = (u64, u64); + + fn size_in_bytes(&self) -> usize { + self.len() * size_of::() + } + + fn write_bytes_le( + &self, + (vertex_offset, index_offset): Self::Metadata, + buffer_slice: &mut [u8], + ) { + let vertex_offset = (vertex_offset as usize / size_of::()) as u32; + let index_offset = index_offset as u32; + + for (i, meshlet) in self.iter().enumerate() { + let size = size_of::(); + let i = i * size; + let bytes = bytemuck::cast::<_, [u8; size_of::()]>(Meshlet { + start_vertex_id: meshlet.start_vertex_id + vertex_offset, + start_index_id: meshlet.start_index_id + index_offset, + triangle_count: meshlet.triangle_count, + }); + buffer_slice[i..(i + size)].clone_from_slice(&bytes); + } + } +} + +unsafe impl PersistentGpuBufferable for Arc<[MeshletBoundingSphere]> { + type Metadata = (); + + fn size_in_bytes(&self) -> usize { + self.len() * size_of::() + } + + fn write_bytes_le(&self, _: Self::Metadata, buffer_slice: &mut [u8]) { + buffer_slice.clone_from_slice(bytemuck::cast_slice(self)); + } +} diff --git a/crates/bevy_pbr/src/meshlet/pipelines.rs b/crates/bevy_pbr/src/meshlet/pipelines.rs new file mode 100644 index 0000000000000..1452905d7bc7d --- /dev/null +++ b/crates/bevy_pbr/src/meshlet/pipelines.rs @@ -0,0 +1,295 @@ +use super::gpu_scene::MeshletGpuScene; +use bevy_asset::Handle; +use bevy_core_pipeline::{ + core_3d::CORE_3D_DEPTH_FORMAT, fullscreen_vertex_shader::fullscreen_shader_vertex_state, +}; +use bevy_ecs::{ + system::Resource, + world::{FromWorld, World}, +}; +use bevy_render::render_resource::*; + +pub const MESHLET_CULLING_SHADER_HANDLE: Handle = Handle::weak_from_u128(4325134235233421); +pub const MESHLET_WRITE_INDEX_BUFFER_SHADER_HANDLE: Handle = + Handle::weak_from_u128(5325134235233421); +pub const MESHLET_DOWNSAMPLE_DEPTH_SHADER_HANDLE: Handle = + Handle::weak_from_u128(6325134235233421); +pub const MESHLET_VISIBILITY_BUFFER_RASTER_SHADER_HANDLE: Handle = + Handle::weak_from_u128(7325134235233421); +pub const MESHLET_COPY_MATERIAL_DEPTH_SHADER_HANDLE: Handle = + Handle::weak_from_u128(8325134235233421); + +#[derive(Resource)] +pub struct MeshletPipelines { + cull_first: CachedComputePipelineId, + cull_second: CachedComputePipelineId, + write_index_buffer_first: CachedComputePipelineId, + write_index_buffer_second: CachedComputePipelineId, + downsample_depth: CachedRenderPipelineId, + visibility_buffer_raster: CachedRenderPipelineId, + visibility_buffer_raster_depth_only: CachedRenderPipelineId, + visibility_buffer_raster_depth_only_clamp_ortho: CachedRenderPipelineId, + copy_material_depth: CachedRenderPipelineId, +} + +impl FromWorld for MeshletPipelines { + fn from_world(world: &mut World) -> Self { + let gpu_scene = world.resource::(); + let cull_layout = gpu_scene.culling_bind_group_layout(); + let write_index_buffer_layout = gpu_scene.write_index_buffer_bind_group_layout(); + let downsample_depth_layout = gpu_scene.downsample_depth_bind_group_layout(); + let visibility_buffer_layout = gpu_scene.visibility_buffer_raster_bind_group_layout(); + let copy_material_depth_layout = gpu_scene.copy_material_depth_bind_group_layout(); + let pipeline_cache = world.resource_mut::(); + + Self { + cull_first: pipeline_cache.queue_compute_pipeline(ComputePipelineDescriptor { + label: Some("meshlet_culling_first_pipeline".into()), + layout: vec![cull_layout.clone()], + push_constant_ranges: vec![], + shader: MESHLET_CULLING_SHADER_HANDLE, + shader_defs: vec!["MESHLET_CULLING_PASS".into()], + entry_point: "cull_meshlets".into(), + }), + + cull_second: pipeline_cache.queue_compute_pipeline(ComputePipelineDescriptor { + label: Some("meshlet_culling_second_pipeline".into()), + layout: vec![cull_layout], + push_constant_ranges: vec![], + shader: MESHLET_CULLING_SHADER_HANDLE, + shader_defs: vec![ + "MESHLET_CULLING_PASS".into(), + "MESHLET_SECOND_CULLING_PASS".into(), + ], + entry_point: "cull_meshlets".into(), + }), + + write_index_buffer_first: pipeline_cache.queue_compute_pipeline( + ComputePipelineDescriptor { + label: Some("meshlet_write_index_buffer_first_pipeline".into()), + layout: vec![write_index_buffer_layout.clone()], + push_constant_ranges: vec![], + shader: MESHLET_WRITE_INDEX_BUFFER_SHADER_HANDLE, + shader_defs: vec!["MESHLET_WRITE_INDEX_BUFFER_PASS".into()], + entry_point: "write_index_buffer".into(), + }, + ), + + write_index_buffer_second: pipeline_cache.queue_compute_pipeline( + ComputePipelineDescriptor { + label: Some("meshlet_write_index_buffer_second_pipeline".into()), + layout: vec![write_index_buffer_layout], + push_constant_ranges: vec![], + shader: MESHLET_WRITE_INDEX_BUFFER_SHADER_HANDLE, + shader_defs: vec![ + "MESHLET_WRITE_INDEX_BUFFER_PASS".into(), + "MESHLET_SECOND_WRITE_INDEX_BUFFER_PASS".into(), + ], + entry_point: "write_index_buffer".into(), + }, + ), + + downsample_depth: pipeline_cache.queue_render_pipeline(RenderPipelineDescriptor { + label: Some("meshlet_downsample_depth".into()), + layout: vec![downsample_depth_layout], + push_constant_ranges: vec![], + vertex: fullscreen_shader_vertex_state(), + primitive: PrimitiveState::default(), + depth_stencil: None, + multisample: MultisampleState::default(), + fragment: Some(FragmentState { + shader: MESHLET_DOWNSAMPLE_DEPTH_SHADER_HANDLE, + shader_defs: vec![], + entry_point: "downsample_depth".into(), + targets: vec![Some(ColorTargetState { + format: TextureFormat::R32Float, + blend: None, + write_mask: ColorWrites::ALL, + })], + }), + }), + + visibility_buffer_raster: pipeline_cache.queue_render_pipeline( + RenderPipelineDescriptor { + label: Some("meshlet_visibility_buffer_raster_pipeline".into()), + layout: vec![visibility_buffer_layout.clone()], + push_constant_ranges: vec![], + vertex: VertexState { + shader: MESHLET_VISIBILITY_BUFFER_RASTER_SHADER_HANDLE, + shader_defs: vec![ + "MESHLET_VISIBILITY_BUFFER_RASTER_PASS".into(), + "MESHLET_VISIBILITY_BUFFER_RASTER_PASS_OUTPUT".into(), + ], + entry_point: "vertex".into(), + buffers: vec![], + }, + primitive: PrimitiveState { + topology: PrimitiveTopology::TriangleList, + strip_index_format: None, + front_face: FrontFace::Ccw, + cull_mode: None, + unclipped_depth: false, + polygon_mode: PolygonMode::Fill, + conservative: false, + }, + depth_stencil: Some(DepthStencilState { + format: CORE_3D_DEPTH_FORMAT, + depth_write_enabled: true, + depth_compare: CompareFunction::GreaterEqual, + stencil: StencilState::default(), + bias: DepthBiasState::default(), + }), + multisample: MultisampleState::default(), + fragment: Some(FragmentState { + shader: MESHLET_VISIBILITY_BUFFER_RASTER_SHADER_HANDLE, + shader_defs: vec![ + "MESHLET_VISIBILITY_BUFFER_RASTER_PASS".into(), + "MESHLET_VISIBILITY_BUFFER_RASTER_PASS_OUTPUT".into(), + ], + entry_point: "fragment".into(), + targets: vec![ + Some(ColorTargetState { + format: TextureFormat::R32Uint, + blend: None, + write_mask: ColorWrites::ALL, + }), + Some(ColorTargetState { + format: TextureFormat::R16Uint, + blend: None, + write_mask: ColorWrites::ALL, + }), + ], + }), + }, + ), + + visibility_buffer_raster_depth_only: pipeline_cache.queue_render_pipeline( + RenderPipelineDescriptor { + label: Some("meshlet_visibility_buffer_raster_depth_only_pipeline".into()), + layout: vec![visibility_buffer_layout.clone()], + push_constant_ranges: vec![], + vertex: VertexState { + shader: MESHLET_VISIBILITY_BUFFER_RASTER_SHADER_HANDLE, + shader_defs: vec!["MESHLET_VISIBILITY_BUFFER_RASTER_PASS".into()], + entry_point: "vertex".into(), + buffers: vec![], + }, + primitive: PrimitiveState { + topology: PrimitiveTopology::TriangleList, + strip_index_format: None, + front_face: FrontFace::Ccw, + cull_mode: None, + unclipped_depth: false, + polygon_mode: PolygonMode::Fill, + conservative: false, + }, + depth_stencil: Some(DepthStencilState { + format: CORE_3D_DEPTH_FORMAT, + depth_write_enabled: true, + depth_compare: CompareFunction::GreaterEqual, + stencil: StencilState::default(), + bias: DepthBiasState::default(), + }), + multisample: MultisampleState::default(), + fragment: None, + }, + ), + + visibility_buffer_raster_depth_only_clamp_ortho: pipeline_cache.queue_render_pipeline( + RenderPipelineDescriptor { + label: Some("visibility_buffer_raster_depth_only_clamp_ortho_pipeline".into()), + layout: vec![visibility_buffer_layout], + push_constant_ranges: vec![], + vertex: VertexState { + shader: MESHLET_VISIBILITY_BUFFER_RASTER_SHADER_HANDLE, + shader_defs: vec![ + "MESHLET_VISIBILITY_BUFFER_RASTER_PASS".into(), + "DEPTH_CLAMP_ORTHO".into(), + ], + entry_point: "vertex".into(), + buffers: vec![], + }, + primitive: PrimitiveState { + topology: PrimitiveTopology::TriangleList, + strip_index_format: None, + front_face: FrontFace::Ccw, + cull_mode: None, + unclipped_depth: false, + polygon_mode: PolygonMode::Fill, + conservative: false, + }, + depth_stencil: Some(DepthStencilState { + format: CORE_3D_DEPTH_FORMAT, + depth_write_enabled: true, + depth_compare: CompareFunction::GreaterEqual, + stencil: StencilState::default(), + bias: DepthBiasState::default(), + }), + multisample: MultisampleState::default(), + fragment: Some(FragmentState { + shader: MESHLET_VISIBILITY_BUFFER_RASTER_SHADER_HANDLE, + shader_defs: vec![ + "MESHLET_VISIBILITY_BUFFER_RASTER_PASS".into(), + "DEPTH_CLAMP_ORTHO".into(), + ], + entry_point: "fragment".into(), + targets: vec![], + }), + }, + ), + + copy_material_depth: pipeline_cache.queue_render_pipeline(RenderPipelineDescriptor { + label: Some("meshlet_copy_material_depth".into()), + layout: vec![copy_material_depth_layout], + push_constant_ranges: vec![], + vertex: fullscreen_shader_vertex_state(), + primitive: PrimitiveState::default(), + depth_stencil: Some(DepthStencilState { + format: TextureFormat::Depth16Unorm, + depth_write_enabled: true, + depth_compare: CompareFunction::Always, + stencil: StencilState::default(), + bias: DepthBiasState::default(), + }), + multisample: MultisampleState::default(), + fragment: Some(FragmentState { + shader: MESHLET_COPY_MATERIAL_DEPTH_SHADER_HANDLE, + shader_defs: vec![], + entry_point: "copy_material_depth".into(), + targets: vec![], + }), + }), + } + } +} + +impl MeshletPipelines { + pub fn get( + world: &World, + ) -> Option<( + &ComputePipeline, + &ComputePipeline, + &ComputePipeline, + &ComputePipeline, + &RenderPipeline, + &RenderPipeline, + &RenderPipeline, + &RenderPipeline, + &RenderPipeline, + )> { + let pipeline_cache = world.get_resource::()?; + let pipeline = world.get_resource::()?; + Some(( + pipeline_cache.get_compute_pipeline(pipeline.cull_first)?, + pipeline_cache.get_compute_pipeline(pipeline.cull_second)?, + pipeline_cache.get_compute_pipeline(pipeline.write_index_buffer_first)?, + pipeline_cache.get_compute_pipeline(pipeline.write_index_buffer_second)?, + pipeline_cache.get_render_pipeline(pipeline.downsample_depth)?, + pipeline_cache.get_render_pipeline(pipeline.visibility_buffer_raster)?, + pipeline_cache.get_render_pipeline(pipeline.visibility_buffer_raster_depth_only)?, + pipeline_cache + .get_render_pipeline(pipeline.visibility_buffer_raster_depth_only_clamp_ortho)?, + pipeline_cache.get_render_pipeline(pipeline.copy_material_depth)?, + )) + } +} diff --git a/crates/bevy_pbr/src/meshlet/visibility_buffer_raster.wgsl b/crates/bevy_pbr/src/meshlet/visibility_buffer_raster.wgsl new file mode 100644 index 0000000000000..dde6d2655dd7c --- /dev/null +++ b/crates/bevy_pbr/src/meshlet/visibility_buffer_raster.wgsl @@ -0,0 +1,88 @@ +#import bevy_pbr::{ + meshlet_bindings::{ + meshlet_thread_meshlet_ids, + meshlets, + meshlet_vertex_ids, + meshlet_vertex_data, + meshlet_thread_instance_ids, + meshlet_instance_uniforms, + meshlet_instance_material_ids, + draw_index_buffer, + view, + get_meshlet_index, + unpack_meshlet_vertex, + }, + mesh_functions::mesh_position_local_to_world, +} +#import bevy_render::maths::affine3_to_square + +/// Vertex/fragment shader for rasterizing meshlets into a visibility buffer. + +struct VertexOutput { + @builtin(position) clip_position: vec4, +#ifdef MESHLET_VISIBILITY_BUFFER_RASTER_PASS_OUTPUT + @location(0) @interpolate(flat) visibility: u32, + @location(1) @interpolate(flat) material_depth: u32, +#endif +#ifdef DEPTH_CLAMP_ORTHO + @location(0) unclamped_clip_depth: f32, +#endif +} + +#ifdef MESHLET_VISIBILITY_BUFFER_RASTER_PASS_OUTPUT +struct FragmentOutput { + @location(0) visibility: vec4, + @location(1) material_depth: vec4, +} +#endif + +@vertex +fn vertex(@builtin(vertex_index) vertex_index: u32) -> VertexOutput { + let packed_ids = draw_index_buffer[vertex_index / 3u]; + let cluster_id = packed_ids >> 8u; + let triangle_id = extractBits(packed_ids, 0u, 8u); + let index_id = (triangle_id * 3u) + (vertex_index % 3u); + let meshlet_id = meshlet_thread_meshlet_ids[cluster_id]; + let meshlet = meshlets[meshlet_id]; + let index = get_meshlet_index(meshlet.start_index_id + index_id); + let vertex_id = meshlet_vertex_ids[meshlet.start_vertex_id + index]; + let vertex = unpack_meshlet_vertex(meshlet_vertex_data[vertex_id]); + let instance_id = meshlet_thread_instance_ids[cluster_id]; + let instance_uniform = meshlet_instance_uniforms[instance_id]; + + let model = affine3_to_square(instance_uniform.model); + let world_position = mesh_position_local_to_world(model, vec4(vertex.position, 1.0)); + var clip_position = view.view_proj * vec4(world_position.xyz, 1.0); +#ifdef DEPTH_CLAMP_ORTHO + let unclamped_clip_depth = clip_position.z; + clip_position.z = min(clip_position.z, 1.0); +#endif + + return VertexOutput( + clip_position, +#ifdef MESHLET_VISIBILITY_BUFFER_RASTER_PASS_OUTPUT + packed_ids, + meshlet_instance_material_ids[instance_id], +#endif +#ifdef DEPTH_CLAMP_ORTHO + unclamped_clip_depth, +#endif + ); +} + +#ifdef MESHLET_VISIBILITY_BUFFER_RASTER_PASS_OUTPUT +@fragment +fn fragment(vertex_output: VertexOutput) -> FragmentOutput { + return FragmentOutput( + vec4(vertex_output.visibility, 0u, 0u, 0u), + vec4(vertex_output.material_depth, 0u, 0u, 0u), + ); +} +#endif + +#ifdef DEPTH_CLAMP_ORTHO +@fragment +fn fragment(vertex_output: VertexOutput) -> @builtin(frag_depth) f32 { + return vertex_output.unclamped_clip_depth; +} +#endif diff --git a/crates/bevy_pbr/src/meshlet/visibility_buffer_raster_node.rs b/crates/bevy_pbr/src/meshlet/visibility_buffer_raster_node.rs new file mode 100644 index 0000000000000..965a4135b21d6 --- /dev/null +++ b/crates/bevy_pbr/src/meshlet/visibility_buffer_raster_node.rs @@ -0,0 +1,448 @@ +use super::{ + gpu_scene::{MeshletViewBindGroups, MeshletViewResources}, + pipelines::MeshletPipelines, +}; +use crate::{LightEntity, ShadowView, ViewLightEntities}; +use bevy_color::LinearRgba; +use bevy_ecs::{ + query::QueryState, + world::{FromWorld, World}, +}; +use bevy_render::{ + camera::ExtractedCamera, + render_graph::{Node, NodeRunError, RenderGraphContext}, + render_resource::*, + renderer::RenderContext, + view::{ViewDepthTexture, ViewUniformOffset}, +}; + +/// Rasterize meshlets into a depth buffer, and optional visibility buffer + material depth buffer for shading passes. +pub struct MeshletVisibilityBufferRasterPassNode { + main_view_query: QueryState<( + &'static ExtractedCamera, + &'static ViewDepthTexture, + &'static ViewUniformOffset, + &'static MeshletViewBindGroups, + &'static MeshletViewResources, + &'static ViewLightEntities, + )>, + view_light_query: QueryState<( + &'static ShadowView, + &'static LightEntity, + &'static ViewUniformOffset, + &'static MeshletViewBindGroups, + &'static MeshletViewResources, + )>, +} + +impl FromWorld for MeshletVisibilityBufferRasterPassNode { + fn from_world(world: &mut World) -> Self { + Self { + main_view_query: QueryState::new(world), + view_light_query: QueryState::new(world), + } + } +} + +impl Node for MeshletVisibilityBufferRasterPassNode { + fn update(&mut self, world: &mut World) { + self.main_view_query.update_archetypes(world); + self.view_light_query.update_archetypes(world); + } + + fn run( + &self, + graph: &mut RenderGraphContext, + render_context: &mut RenderContext, + world: &World, + ) -> Result<(), NodeRunError> { + let Ok(( + camera, + view_depth, + view_offset, + meshlet_view_bind_groups, + meshlet_view_resources, + lights, + )) = self.main_view_query.get_manual(world, graph.view_entity()) + else { + return Ok(()); + }; + + let Some(( + culling_first_pipeline, + culling_second_pipeline, + write_index_buffer_first_pipeline, + write_index_buffer_second_pipeline, + downsample_depth_pipeline, + visibility_buffer_raster_pipeline, + visibility_buffer_raster_depth_only_pipeline, + visibility_buffer_raster_depth_only_clamp_ortho, + copy_material_depth_pipeline, + )) = MeshletPipelines::get(world) + else { + return Ok(()); + }; + + let culling_workgroups = meshlet_view_resources.scene_meshlet_count.div_ceil(128); + let write_index_buffer_workgroups = (meshlet_view_resources.scene_meshlet_count as f32) + .cbrt() + .ceil() as u32; + + render_context + .command_encoder() + .push_debug_group("meshlet_visibility_buffer_raster_pass"); + if meshlet_view_resources.occlusion_buffer_needs_clearing { + render_context.command_encoder().clear_buffer( + &meshlet_view_resources.occlusion_buffer, + 0, + None, + ); + } + cull_pass( + "meshlet_culling_first_pass", + render_context, + meshlet_view_bind_groups, + view_offset, + culling_first_pipeline, + culling_workgroups, + ); + write_index_buffer_pass( + "meshlet_write_index_buffer_first_pass", + render_context, + &meshlet_view_bind_groups.write_index_buffer_first, + write_index_buffer_first_pipeline, + write_index_buffer_workgroups, + ); + render_context.command_encoder().clear_buffer( + &meshlet_view_resources.occlusion_buffer, + 0, + None, + ); + raster_pass( + true, + render_context, + meshlet_view_resources, + &meshlet_view_resources.visibility_buffer_draw_indirect_args_first, + view_depth.get_attachment(StoreOp::Store), + meshlet_view_bind_groups, + view_offset, + visibility_buffer_raster_pipeline, + Some(camera), + ); + downsample_depth( + render_context, + meshlet_view_resources, + meshlet_view_bind_groups, + downsample_depth_pipeline, + ); + cull_pass( + "meshlet_culling_second_pass", + render_context, + meshlet_view_bind_groups, + view_offset, + culling_second_pipeline, + culling_workgroups, + ); + write_index_buffer_pass( + "meshlet_write_index_buffer_second_pass", + render_context, + &meshlet_view_bind_groups.write_index_buffer_second, + write_index_buffer_second_pipeline, + write_index_buffer_workgroups, + ); + raster_pass( + false, + render_context, + meshlet_view_resources, + &meshlet_view_resources.visibility_buffer_draw_indirect_args_second, + view_depth.get_attachment(StoreOp::Store), + meshlet_view_bind_groups, + view_offset, + visibility_buffer_raster_pipeline, + Some(camera), + ); + copy_material_depth_pass( + render_context, + meshlet_view_resources, + meshlet_view_bind_groups, + copy_material_depth_pipeline, + camera, + ); + render_context.command_encoder().pop_debug_group(); + + for light_entity in &lights.lights { + let Ok(( + shadow_view, + light_type, + view_offset, + meshlet_view_bind_groups, + meshlet_view_resources, + )) = self.view_light_query.get_manual(world, *light_entity) + else { + continue; + }; + + let shadow_visibility_buffer_pipeline = match light_type { + LightEntity::Directional { .. } => visibility_buffer_raster_depth_only_clamp_ortho, + _ => visibility_buffer_raster_depth_only_pipeline, + }; + + render_context.command_encoder().push_debug_group(&format!( + "meshlet_visibility_buffer_raster_pass: {}", + shadow_view.pass_name + )); + if meshlet_view_resources.occlusion_buffer_needs_clearing { + render_context.command_encoder().clear_buffer( + &meshlet_view_resources.occlusion_buffer, + 0, + None, + ); + } + cull_pass( + "meshlet_culling_first_pass", + render_context, + meshlet_view_bind_groups, + view_offset, + culling_first_pipeline, + culling_workgroups, + ); + write_index_buffer_pass( + "meshlet_write_index_buffer_first_pass", + render_context, + &meshlet_view_bind_groups.write_index_buffer_first, + write_index_buffer_first_pipeline, + write_index_buffer_workgroups, + ); + render_context.command_encoder().clear_buffer( + &meshlet_view_resources.occlusion_buffer, + 0, + None, + ); + raster_pass( + true, + render_context, + meshlet_view_resources, + &meshlet_view_resources.visibility_buffer_draw_indirect_args_first, + shadow_view.depth_attachment.get_attachment(StoreOp::Store), + meshlet_view_bind_groups, + view_offset, + shadow_visibility_buffer_pipeline, + None, + ); + downsample_depth( + render_context, + meshlet_view_resources, + meshlet_view_bind_groups, + downsample_depth_pipeline, + ); + cull_pass( + "meshlet_culling_second_pass", + render_context, + meshlet_view_bind_groups, + view_offset, + culling_second_pipeline, + culling_workgroups, + ); + write_index_buffer_pass( + "meshlet_write_index_buffer_second_pass", + render_context, + &meshlet_view_bind_groups.write_index_buffer_second, + write_index_buffer_second_pipeline, + write_index_buffer_workgroups, + ); + raster_pass( + false, + render_context, + meshlet_view_resources, + &meshlet_view_resources.visibility_buffer_draw_indirect_args_second, + shadow_view.depth_attachment.get_attachment(StoreOp::Store), + meshlet_view_bind_groups, + view_offset, + shadow_visibility_buffer_pipeline, + None, + ); + render_context.command_encoder().pop_debug_group(); + } + + Ok(()) + } +} + +fn cull_pass( + label: &'static str, + render_context: &mut RenderContext, + meshlet_view_bind_groups: &MeshletViewBindGroups, + view_offset: &ViewUniformOffset, + culling_pipeline: &ComputePipeline, + culling_workgroups: u32, +) { + let command_encoder = render_context.command_encoder(); + let mut cull_pass = command_encoder.begin_compute_pass(&ComputePassDescriptor { + label: Some(label), + timestamp_writes: None, + }); + cull_pass.set_bind_group(0, &meshlet_view_bind_groups.culling, &[view_offset.offset]); + cull_pass.set_pipeline(culling_pipeline); + cull_pass.dispatch_workgroups(culling_workgroups, 1, 1); +} + +fn write_index_buffer_pass( + label: &'static str, + render_context: &mut RenderContext, + write_index_buffer_bind_group: &BindGroup, + write_index_buffer_pipeline: &ComputePipeline, + write_index_buffer_workgroups: u32, +) { + let command_encoder = render_context.command_encoder(); + let mut cull_pass = command_encoder.begin_compute_pass(&ComputePassDescriptor { + label: Some(label), + timestamp_writes: None, + }); + cull_pass.set_bind_group(0, write_index_buffer_bind_group, &[]); + cull_pass.set_pipeline(write_index_buffer_pipeline); + cull_pass.dispatch_workgroups( + write_index_buffer_workgroups, + write_index_buffer_workgroups, + write_index_buffer_workgroups, + ); +} + +#[allow(clippy::too_many_arguments)] +fn raster_pass( + first_pass: bool, + render_context: &mut RenderContext, + meshlet_view_resources: &MeshletViewResources, + visibility_buffer_draw_indirect_args: &Buffer, + depth_stencil_attachment: RenderPassDepthStencilAttachment, + meshlet_view_bind_groups: &MeshletViewBindGroups, + view_offset: &ViewUniformOffset, + visibility_buffer_raster_pipeline: &RenderPipeline, + camera: Option<&ExtractedCamera>, +) { + let mut color_attachments_filled = [None, None]; + if let (Some(visibility_buffer), Some(material_depth_color)) = ( + meshlet_view_resources.visibility_buffer.as_ref(), + meshlet_view_resources.material_depth_color.as_ref(), + ) { + let load = if first_pass { + LoadOp::Clear(LinearRgba::BLACK.into()) + } else { + LoadOp::Load + }; + color_attachments_filled = [ + Some(RenderPassColorAttachment { + view: &visibility_buffer.default_view, + resolve_target: None, + ops: Operations { + load, + store: StoreOp::Store, + }, + }), + Some(RenderPassColorAttachment { + view: &material_depth_color.default_view, + resolve_target: None, + ops: Operations { + load, + store: StoreOp::Store, + }, + }), + ]; + } + + let mut draw_pass = render_context.begin_tracked_render_pass(RenderPassDescriptor { + label: Some(if first_pass { + "meshlet_visibility_buffer_raster_first_pass" + } else { + "meshlet_visibility_buffer_raster_second_pass" + }), + color_attachments: if color_attachments_filled[0].is_none() { + &[] + } else { + &color_attachments_filled + }, + depth_stencil_attachment: Some(depth_stencil_attachment), + timestamp_writes: None, + occlusion_query_set: None, + }); + if let Some(viewport) = camera.and_then(|camera| camera.viewport.as_ref()) { + draw_pass.set_camera_viewport(viewport); + } + + draw_pass.set_bind_group( + 0, + &meshlet_view_bind_groups.visibility_buffer_raster, + &[view_offset.offset], + ); + draw_pass.set_render_pipeline(visibility_buffer_raster_pipeline); + draw_pass.draw_indirect(visibility_buffer_draw_indirect_args, 0); +} + +fn downsample_depth( + render_context: &mut RenderContext, + meshlet_view_resources: &MeshletViewResources, + meshlet_view_bind_groups: &MeshletViewBindGroups, + downsample_depth_pipeline: &RenderPipeline, +) { + render_context + .command_encoder() + .push_debug_group("meshlet_downsample_depth"); + + for i in 0..meshlet_view_resources.depth_pyramid_mips.len() { + let downsample_pass = RenderPassDescriptor { + label: Some("meshlet_downsample_depth_pass"), + color_attachments: &[Some(RenderPassColorAttachment { + view: &meshlet_view_resources.depth_pyramid_mips[i], + resolve_target: None, + ops: Operations { + load: LoadOp::Clear(LinearRgba::BLACK.into()), + store: StoreOp::Store, + }, + })], + depth_stencil_attachment: None, + timestamp_writes: None, + occlusion_query_set: None, + }; + + let mut downsample_pass = render_context.begin_tracked_render_pass(downsample_pass); + downsample_pass.set_bind_group(0, &meshlet_view_bind_groups.downsample_depth[i], &[]); + downsample_pass.set_render_pipeline(downsample_depth_pipeline); + downsample_pass.draw(0..3, 0..1); + } + + render_context.command_encoder().pop_debug_group(); +} + +fn copy_material_depth_pass( + render_context: &mut RenderContext, + meshlet_view_resources: &MeshletViewResources, + meshlet_view_bind_groups: &MeshletViewBindGroups, + copy_material_depth_pipeline: &RenderPipeline, + camera: &ExtractedCamera, +) { + if let (Some(material_depth), Some(copy_material_depth_bind_group)) = ( + meshlet_view_resources.material_depth.as_ref(), + meshlet_view_bind_groups.copy_material_depth.as_ref(), + ) { + let mut copy_pass = render_context.begin_tracked_render_pass(RenderPassDescriptor { + label: Some("meshlet_copy_material_depth_pass"), + color_attachments: &[], + depth_stencil_attachment: Some(RenderPassDepthStencilAttachment { + view: &material_depth.default_view, + depth_ops: Some(Operations { + load: LoadOp::Clear(0.0), + store: StoreOp::Store, + }), + stencil_ops: None, + }), + timestamp_writes: None, + occlusion_query_set: None, + }); + if let Some(viewport) = &camera.viewport { + copy_pass.set_camera_viewport(viewport); + } + + copy_pass.set_bind_group(0, copy_material_depth_bind_group, &[]); + copy_pass.set_render_pipeline(copy_material_depth_pipeline); + copy_pass.draw(0..3, 0..1); + } +} diff --git a/crates/bevy_pbr/src/meshlet/visibility_buffer_resolve.wgsl b/crates/bevy_pbr/src/meshlet/visibility_buffer_resolve.wgsl new file mode 100644 index 0000000000000..0325ba96b9713 --- /dev/null +++ b/crates/bevy_pbr/src/meshlet/visibility_buffer_resolve.wgsl @@ -0,0 +1,186 @@ +#define_import_path bevy_pbr::meshlet_visibility_buffer_resolve + +#import bevy_pbr::{ + meshlet_bindings::{ + meshlet_visibility_buffer, + meshlet_thread_meshlet_ids, + meshlets, + meshlet_vertex_ids, + meshlet_vertex_data, + meshlet_thread_instance_ids, + meshlet_instance_uniforms, + get_meshlet_index, + unpack_meshlet_vertex, + }, + mesh_view_bindings::view, + mesh_functions::mesh_position_local_to_world, + mesh_types::MESH_FLAGS_SIGN_DETERMINANT_MODEL_3X3_BIT, + view_transformations::{position_world_to_clip, frag_coord_to_ndc}, +} +#import bevy_render::maths::{affine3_to_square, mat2x4_f32_to_mat3x3_unpack} + +#ifdef PREPASS_FRAGMENT +#ifdef MOTION_VECTOR_PREPASS +#import bevy_pbr::{ + prepass_bindings::previous_view_proj, + pbr_prepass_functions::calculate_motion_vector, +} +#endif +#endif + +/// Functions to be used by materials for reading from a meshlet visibility buffer texture. + +#ifdef MESHLET_MESH_MATERIAL_PASS +struct PartialDerivatives { + barycentrics: vec3, + ddx: vec3, + ddy: vec3, +} + +// https://github.com/ConfettiFX/The-Forge/blob/2d453f376ef278f66f97cbaf36c0d12e4361e275/Examples_3/Visibility_Buffer/src/Shaders/FSL/visibilityBuffer_shade.frag.fsl#L83-L139 +fn compute_partial_derivatives(vertex_clip_positions: array, 3>, ndc_uv: vec2, screen_size: vec2) -> PartialDerivatives { + var result: PartialDerivatives; + + let inv_w = 1.0 / vec3(vertex_clip_positions[0].w, vertex_clip_positions[1].w, vertex_clip_positions[2].w); + let ndc_0 = vertex_clip_positions[0].xy * inv_w[0]; + let ndc_1 = vertex_clip_positions[1].xy * inv_w[1]; + let ndc_2 = vertex_clip_positions[2].xy * inv_w[2]; + + let inv_det = 1.0 / determinant(mat2x2(ndc_2 - ndc_1, ndc_0 - ndc_1)); + result.ddx = vec3(ndc_1.y - ndc_2.y, ndc_2.y - ndc_0.y, ndc_0.y - ndc_1.y) * inv_det * inv_w; + result.ddy = vec3(ndc_2.x - ndc_1.x, ndc_0.x - ndc_2.x, ndc_1.x - ndc_0.x) * inv_det * inv_w; + + var ddx_sum = dot(result.ddx, vec3(1.0)); + var ddy_sum = dot(result.ddy, vec3(1.0)); + + let delta_v = ndc_uv - ndc_0; + let interp_inv_w = inv_w.x + delta_v.x * ddx_sum + delta_v.y * ddy_sum; + let interp_w = 1.0 / interp_inv_w; + + result.barycentrics = vec3( + interp_w * (delta_v.x * result.ddx.x + delta_v.y * result.ddy.x + inv_w.x), + interp_w * (delta_v.x * result.ddx.y + delta_v.y * result.ddy.y), + interp_w * (delta_v.x * result.ddx.z + delta_v.y * result.ddy.z), + ); + + result.ddx *= 2.0 / screen_size.x; + result.ddy *= 2.0 / screen_size.y; + ddx_sum *= 2.0 / screen_size.x; + ddy_sum *= 2.0 / screen_size.y; + + let interp_ddx_w = 1.0 / (interp_inv_w + ddx_sum); + let interp_ddy_w = 1.0 / (interp_inv_w + ddy_sum); + + result.ddx = interp_ddx_w * (result.barycentrics * interp_inv_w + result.ddx) - result.barycentrics; + result.ddy = interp_ddy_w * (result.barycentrics * interp_inv_w + result.ddy) - result.barycentrics; + return result; +} + +struct VertexOutput { + position: vec4, + world_position: vec4, + world_normal: vec3, + uv: vec2, + ddx_uv: vec2, + ddy_uv: vec2, + world_tangent: vec4, + mesh_flags: u32, + meshlet_id: u32, +#ifdef PREPASS_FRAGMENT +#ifdef MOTION_VECTOR_PREPASS + motion_vector: vec2, +#endif +#endif +} + +/// Load the visibility buffer texture and resolve it into a VertexOutput. +fn resolve_vertex_output(frag_coord: vec4) -> VertexOutput { + let vbuffer = textureLoad(meshlet_visibility_buffer, vec2(frag_coord.xy), 0).r; + let cluster_id = vbuffer >> 8u; + let meshlet_id = meshlet_thread_meshlet_ids[cluster_id]; + let meshlet = meshlets[meshlet_id]; + let triangle_id = extractBits(vbuffer, 0u, 8u); + let index_ids = meshlet.start_index_id + vec3(triangle_id * 3u) + vec3(0u, 1u, 2u); + let indices = meshlet.start_vertex_id + vec3(get_meshlet_index(index_ids.x), get_meshlet_index(index_ids.y), get_meshlet_index(index_ids.z)); + let vertex_ids = vec3(meshlet_vertex_ids[indices.x], meshlet_vertex_ids[indices.y], meshlet_vertex_ids[indices.z]); + let vertex_1 = unpack_meshlet_vertex(meshlet_vertex_data[vertex_ids.x]); + let vertex_2 = unpack_meshlet_vertex(meshlet_vertex_data[vertex_ids.y]); + let vertex_3 = unpack_meshlet_vertex(meshlet_vertex_data[vertex_ids.z]); + + let instance_id = meshlet_thread_instance_ids[cluster_id]; + let instance_uniform = meshlet_instance_uniforms[instance_id]; + let model = affine3_to_square(instance_uniform.model); + + let world_position_1 = mesh_position_local_to_world(model, vec4(vertex_1.position, 1.0)); + let world_position_2 = mesh_position_local_to_world(model, vec4(vertex_2.position, 1.0)); + let world_position_3 = mesh_position_local_to_world(model, vec4(vertex_3.position, 1.0)); + let clip_position_1 = position_world_to_clip(world_position_1.xyz); + let clip_position_2 = position_world_to_clip(world_position_2.xyz); + let clip_position_3 = position_world_to_clip(world_position_3.xyz); + let frag_coord_ndc = frag_coord_to_ndc(frag_coord).xy; + let partial_derivatives = compute_partial_derivatives( + array(clip_position_1, clip_position_2, clip_position_3), + frag_coord_ndc, + view.viewport.zw, + ); + + let world_position = mat3x4(world_position_1, world_position_2, world_position_3) * partial_derivatives.barycentrics; + let vertex_normal = mat3x3(vertex_1.normal, vertex_2.normal, vertex_3.normal) * partial_derivatives.barycentrics; + let world_normal = normalize( + mat2x4_f32_to_mat3x3_unpack( + instance_uniform.inverse_transpose_model_a, + instance_uniform.inverse_transpose_model_b, + ) * vertex_normal + ); + let uv = mat3x2(vertex_1.uv, vertex_2.uv, vertex_3.uv) * partial_derivatives.barycentrics; + let ddx_uv = mat3x2(vertex_1.uv, vertex_2.uv, vertex_3.uv) * partial_derivatives.ddx; + let ddy_uv = mat3x2(vertex_1.uv, vertex_2.uv, vertex_3.uv) * partial_derivatives.ddy; + let vertex_tangent = mat3x4(vertex_1.tangent, vertex_2.tangent, vertex_3.tangent) * partial_derivatives.barycentrics; + let world_tangent = vec4( + normalize( + mat3x3( + model[0].xyz, + model[1].xyz, + model[2].xyz + ) * vertex_tangent.xyz + ), + vertex_tangent.w * (f32(bool(instance_uniform.flags & MESH_FLAGS_SIGN_DETERMINANT_MODEL_3X3_BIT)) * 2.0 - 1.0) + ); + +#ifdef PREPASS_FRAGMENT +#ifdef MOTION_VECTOR_PREPASS + let previous_model = affine3_to_square(instance_uniform.previous_model); + let previous_world_position_1 = mesh_position_local_to_world(previous_model, vec4(vertex_1.position, 1.0)); + let previous_world_position_2 = mesh_position_local_to_world(previous_model, vec4(vertex_2.position, 1.0)); + let previous_world_position_3 = mesh_position_local_to_world(previous_model, vec4(vertex_3.position, 1.0)); + let previous_clip_position_1 = previous_view_proj * vec4(previous_world_position_1.xyz, 1.0); + let previous_clip_position_2 = previous_view_proj * vec4(previous_world_position_2.xyz, 1.0); + let previous_clip_position_3 = previous_view_proj * vec4(previous_world_position_3.xyz, 1.0); + let previous_partial_derivatives = compute_partial_derivatives( + array(previous_clip_position_1, previous_clip_position_2, previous_clip_position_3), + frag_coord_ndc, + view.viewport.zw, + ); + let previous_world_position = mat3x4(previous_world_position_1, previous_world_position_2, previous_world_position_3) * previous_partial_derivatives.barycentrics; + let motion_vector = calculate_motion_vector(world_position, previous_world_position); +#endif +#endif + + return VertexOutput( + frag_coord, + world_position, + world_normal, + uv, + ddx_uv, + ddy_uv, + world_tangent, + instance_uniform.flags, + meshlet_id, +#ifdef PREPASS_FRAGMENT +#ifdef MOTION_VECTOR_PREPASS + motion_vector, +#endif +#endif + ); +} +#endif diff --git a/crates/bevy_pbr/src/meshlet/write_index_buffer.wgsl b/crates/bevy_pbr/src/meshlet/write_index_buffer.wgsl new file mode 100644 index 0000000000000..f7ea7dae56aac --- /dev/null +++ b/crates/bevy_pbr/src/meshlet/write_index_buffer.wgsl @@ -0,0 +1,43 @@ +#import bevy_pbr::meshlet_bindings::{ + meshlet_thread_meshlet_ids, + meshlets, + draw_indirect_args, + draw_index_buffer, + get_meshlet_occlusion, + get_meshlet_previous_occlusion, +} + +var draw_index_buffer_start_workgroup: u32; + +/// This pass writes out a buffer of cluster + triangle IDs for the draw_indirect() call to rasterize each visible meshlet. + +@compute +@workgroup_size(64, 1, 1) // 64 threads per workgroup, 1 workgroup per cluster, 1 thread per triangle +fn write_index_buffer(@builtin(workgroup_id) workgroup_id: vec3, @builtin(num_workgroups) num_workgroups: vec3, @builtin(local_invocation_index) triangle_id: u32) { + // Calculate the cluster ID for this workgroup + let cluster_id = dot(workgroup_id, vec3(num_workgroups.x * num_workgroups.x, num_workgroups.x, 1u)); + if cluster_id >= arrayLength(&meshlet_thread_meshlet_ids) { return; } + + // If the meshlet was culled, then we don't need to draw it + if !get_meshlet_occlusion(cluster_id) { return; } + + // If the meshlet was drawn in the first pass, and this is the second pass, then we don't need to draw it +#ifdef MESHLET_SECOND_WRITE_INDEX_BUFFER_PASS + if get_meshlet_previous_occlusion(cluster_id) { return; } +#endif + + let meshlet_id = meshlet_thread_meshlet_ids[cluster_id]; + let meshlet = meshlets[meshlet_id]; + + // Reserve space in the buffer for this meshlet's triangles, and broadcast the start of that slice to all threads + if triangle_id == 0u { + draw_index_buffer_start_workgroup = atomicAdd(&draw_indirect_args.vertex_count, meshlet.triangle_count * 3u); + draw_index_buffer_start_workgroup /= 3u; + } + workgroupBarrier(); + + // Each thread writes one triangle of the meshlet to the buffer slice reserved for the meshlet + if triangle_id < meshlet.triangle_count { + draw_index_buffer[draw_index_buffer_start_workgroup + triangle_id] = (cluster_id << 8u) | triangle_id; + } +} diff --git a/crates/bevy_pbr/src/pbr_material.rs b/crates/bevy_pbr/src/pbr_material.rs index 3231cb8df5d6d..64c7b661f9abd 100644 --- a/crates/bevy_pbr/src/pbr_material.rs +++ b/crates/bevy_pbr/src/pbr_material.rs @@ -823,6 +823,21 @@ impl Material for StandardMaterial { PBR_SHADER_HANDLE.into() } + #[cfg(feature = "meshlet")] + fn meshlet_mesh_fragment_shader() -> ShaderRef { + Self::fragment_shader() + } + + #[cfg(feature = "meshlet")] + fn meshlet_mesh_prepass_fragment_shader() -> ShaderRef { + Self::prepass_fragment_shader() + } + + #[cfg(feature = "meshlet")] + fn meshlet_mesh_deferred_fragment_shader() -> ShaderRef { + Self::deferred_fragment_shader() + } + fn specialize( _pipeline: &MaterialPipeline, descriptor: &mut RenderPipelineDescriptor, diff --git a/crates/bevy_pbr/src/prepass/mod.rs b/crates/bevy_pbr/src/prepass/mod.rs index f8d9ecc18808c..6e78c8f4c8c5e 100644 --- a/crates/bevy_pbr/src/prepass/mod.rs +++ b/crates/bevy_pbr/src/prepass/mod.rs @@ -29,6 +29,10 @@ use bevy_render::{ use bevy_transform::prelude::GlobalTransform; use bevy_utils::tracing::error; +#[cfg(feature = "meshlet")] +use crate::meshlet::{ + prepare_material_meshlet_meshes_prepass, queue_material_meshlet_meshes, MeshletGpuScene, +}; use crate::*; use std::{hash::Hash, marker::PhantomData}; @@ -172,6 +176,15 @@ where // queue_material_meshes only writes to `material_bind_group_id`, which `queue_prepass_material_meshes` doesn't read .ambiguous_with(queue_material_meshes::), ); + + #[cfg(feature = "meshlet")] + render_app.add_systems( + Render, + prepare_material_meshlet_meshes_prepass:: + .in_set(RenderSet::Queue) + .before(queue_material_meshlet_meshes::) + .run_if(resource_exists::), + ); } } diff --git a/crates/bevy_pbr/src/render/pbr.wgsl b/crates/bevy_pbr/src/render/pbr.wgsl index def70dd89b3ed..7421f1e381353 100644 --- a/crates/bevy_pbr/src/render/pbr.wgsl +++ b/crates/bevy_pbr/src/render/pbr.wgsl @@ -16,11 +16,24 @@ } #endif +#ifdef MESHLET_MESH_MATERIAL_PASS +#import bevy_pbr::meshlet_visibility_buffer_resolve::resolve_vertex_output +#endif + @fragment fn fragment( +#ifdef MESHLET_MESH_MATERIAL_PASS + @builtin(position) frag_coord: vec4, +#else in: VertexOutput, @builtin(front_facing) is_front: bool, +#endif ) -> FragmentOutput { +#ifdef MESHLET_MESH_MATERIAL_PASS + let in = resolve_vertex_output(frag_coord); + let is_front = true; +#endif + // generate a PbrInput struct from the StandardMaterial bindings var pbr_input = pbr_input_from_standard_material(in, is_front); diff --git a/crates/bevy_pbr/src/render/pbr_fragment.wgsl b/crates/bevy_pbr/src/render/pbr_fragment.wgsl index 07b37c3b46962..c3b3e949888dd 100644 --- a/crates/bevy_pbr/src/render/pbr_fragment.wgsl +++ b/crates/bevy_pbr/src/render/pbr_fragment.wgsl @@ -17,7 +17,9 @@ #import bevy_pbr::gtao_utils::gtao_multibounce #endif -#ifdef PREPASS_PIPELINE +#ifdef MESHLET_MESH_MATERIAL_PASS +#import bevy_pbr::meshlet_visibility_buffer_resolve::VertexOutput +#else ifdef PREPASS_PIPELINE #import bevy_pbr::prepass_io::VertexOutput #else #import bevy_pbr::forward_io::VertexOutput @@ -31,7 +33,12 @@ fn pbr_input_from_vertex_output( ) -> pbr_types::PbrInput { var pbr_input: pbr_types::PbrInput = pbr_types::pbr_input_new(); +#ifdef MESHLET_MESH_MATERIAL_PASS + pbr_input.flags = in.mesh_flags; +#else pbr_input.flags = mesh[in.instance_index].flags; +#endif + pbr_input.is_orthographic = view.projection[3].w == 1.0; pbr_input.V = pbr_functions::calculate_view(in.world_position, pbr_input.is_orthographic); pbr_input.frag_coord = in.position; @@ -98,7 +105,11 @@ fn pbr_input_from_standard_material( #endif // VERTEX_TANGENTS if ((pbr_bindings::material.flags & pbr_types::STANDARD_MATERIAL_FLAGS_BASE_COLOR_TEXTURE_BIT) != 0u) { +#ifdef MESHLET_MESH_MATERIAL_PASS + pbr_input.material.base_color *= textureSampleGrad(pbr_bindings::base_color_texture, pbr_bindings::base_color_sampler, uv, in.ddx_uv, in.ddy_uv); +#else pbr_input.material.base_color *= textureSampleBias(pbr_bindings::base_color_texture, pbr_bindings::base_color_sampler, uv, view.mip_bias); +#endif } #endif // VERTEX_UVS @@ -117,7 +128,11 @@ fn pbr_input_from_standard_material( var emissive: vec4 = pbr_bindings::material.emissive; #ifdef VERTEX_UVS if ((pbr_bindings::material.flags & pbr_types::STANDARD_MATERIAL_FLAGS_EMISSIVE_TEXTURE_BIT) != 0u) { +#ifdef MESHLET_MESH_MATERIAL_PASS + emissive = vec4(emissive.rgb * textureSampleGrad(pbr_bindings::emissive_texture, pbr_bindings::emissive_sampler, uv, in.ddx_uv, in.ddy_uv).rgb, 1.0); +#else emissive = vec4(emissive.rgb * textureSampleBias(pbr_bindings::emissive_texture, pbr_bindings::emissive_sampler, uv, view.mip_bias).rgb, 1.0); +#endif } #endif pbr_input.material.emissive = emissive; @@ -128,7 +143,11 @@ fn pbr_input_from_standard_material( let roughness = lighting::perceptualRoughnessToRoughness(perceptual_roughness); #ifdef VERTEX_UVS if ((pbr_bindings::material.flags & pbr_types::STANDARD_MATERIAL_FLAGS_METALLIC_ROUGHNESS_TEXTURE_BIT) != 0u) { +#ifdef MESHLET_MESH_MATERIAL_PASS + let metallic_roughness = textureSampleGrad(pbr_bindings::metallic_roughness_texture, pbr_bindings::metallic_roughness_sampler, uv, in.ddx_uv, in.ddy_uv); +#else let metallic_roughness = textureSampleBias(pbr_bindings::metallic_roughness_texture, pbr_bindings::metallic_roughness_sampler, uv, view.mip_bias); +#endif // Sampling from GLTF standard channels for now metallic *= metallic_roughness.b; perceptual_roughness *= metallic_roughness.g; @@ -140,7 +159,11 @@ fn pbr_input_from_standard_material( var specular_transmission: f32 = pbr_bindings::material.specular_transmission; #ifdef PBR_TRANSMISSION_TEXTURES_SUPPORTED if ((pbr_bindings::material.flags & pbr_types::STANDARD_MATERIAL_FLAGS_SPECULAR_TRANSMISSION_TEXTURE_BIT) != 0u) { - specular_transmission *= textureSample(pbr_bindings::specular_transmission_texture, pbr_bindings::specular_transmission_sampler, uv).r; +#ifdef MESHLET_MESH_MATERIAL_PASS + specular_transmission *= textureSampleGrad(pbr_bindings::specular_transmission_texture, pbr_bindings::specular_transmission_sampler, uv, in.ddx_uv, in.ddy_uv).r; +#else + specular_transmission *= textureSampleBias(pbr_bindings::specular_transmission_texture, pbr_bindings::specular_transmission_sampler, uv, view.mip_bias).r; +#endif } #endif pbr_input.material.specular_transmission = specular_transmission; @@ -148,19 +171,30 @@ fn pbr_input_from_standard_material( var thickness: f32 = pbr_bindings::material.thickness; #ifdef PBR_TRANSMISSION_TEXTURES_SUPPORTED if ((pbr_bindings::material.flags & pbr_types::STANDARD_MATERIAL_FLAGS_THICKNESS_TEXTURE_BIT) != 0u) { - thickness *= textureSample(pbr_bindings::thickness_texture, pbr_bindings::thickness_sampler, uv).g; +#ifdef MESHLET_MESH_MATERIAL_PASS + thickness *= textureSampleGrad(pbr_bindings::thickness_texture, pbr_bindings::thickness_sampler, uv, in.ddx_uv, in.ddy_uv).g; +#else + thickness *= textureSampleBias(pbr_bindings::thickness_texture, pbr_bindings::thickness_sampler, uv, view.mip_bias).g; +#endif } #endif // scale thickness, accounting for non-uniform scaling (e.g. a “squished” mesh) + // TODO: Meshlet support +#ifndef MESHLET_MESH_MATERIAL_PASS thickness *= length( (transpose(mesh[in.instance_index].model) * vec4(pbr_input.N, 0.0)).xyz ); +#endif pbr_input.material.thickness = thickness; var diffuse_transmission = pbr_bindings::material.diffuse_transmission; #ifdef PBR_TRANSMISSION_TEXTURES_SUPPORTED if ((pbr_bindings::material.flags & pbr_types::STANDARD_MATERIAL_FLAGS_DIFFUSE_TRANSMISSION_TEXTURE_BIT) != 0u) { - diffuse_transmission *= textureSample(pbr_bindings::diffuse_transmission_texture, pbr_bindings::diffuse_transmission_sampler, uv).a; +#ifdef MESHLET_MESH_MATERIAL_PASS + diffuse_transmission *= textureSampleGrad(pbr_bindings::diffuse_transmission_texture, pbr_bindings::diffuse_transmission_sampler, uv, in.ddx_uv, in.ddy_uv).a; +#else + diffuse_transmission *= textureSampleBias(pbr_bindings::diffuse_transmission_texture, pbr_bindings::diffuse_transmission_sampler, uv, view.mip_bias).a; +#endif } #endif pbr_input.material.diffuse_transmission = diffuse_transmission; @@ -169,7 +203,11 @@ fn pbr_input_from_standard_material( var specular_occlusion: f32 = 1.0; #ifdef VERTEX_UVS if ((pbr_bindings::material.flags & pbr_types::STANDARD_MATERIAL_FLAGS_OCCLUSION_TEXTURE_BIT) != 0u) { +#ifdef MESHLET_MESH_MATERIAL_PASS + diffuse_occlusion = vec3(textureSampleGrad(pbr_bindings::occlusion_texture, pbr_bindings::occlusion_sampler, uv, in.ddx_uv, in.ddy_uv).r); +#else diffuse_occlusion = vec3(textureSampleBias(pbr_bindings::occlusion_texture, pbr_bindings::occlusion_sampler, uv, view.mip_bias).r); +#endif } #endif #ifdef SCREEN_SPACE_AMBIENT_OCCLUSION @@ -199,9 +237,14 @@ fn pbr_input_from_standard_material( uv, #endif view.mip_bias, +#ifdef MESHLET_MESH_MATERIAL_PASS + in.ddx_uv, + in.ddy_uv, +#endif ); #endif +// TODO: Meshlet support #ifdef LIGHTMAP pbr_input.lightmap_light = lightmap( in.uv_b, diff --git a/crates/bevy_pbr/src/render/pbr_functions.wgsl b/crates/bevy_pbr/src/render/pbr_functions.wgsl index 24090aab329a5..1c602944662c5 100644 --- a/crates/bevy_pbr/src/render/pbr_functions.wgsl +++ b/crates/bevy_pbr/src/render/pbr_functions.wgsl @@ -74,6 +74,10 @@ fn apply_normal_mapping( uv: vec2, #endif mip_bias: f32, +#ifdef MESHLET_MESH_MATERIAL_PASS + ddx_uv: vec2, + ddy_uv: vec2, +#endif ) -> vec3 { // NOTE: The mikktspace method of normal mapping explicitly requires that the world normal NOT // be re-normalized in the fragment shader. This is primarily to match the way mikktspace @@ -98,7 +102,11 @@ fn apply_normal_mapping( #ifdef VERTEX_UVS #ifdef STANDARD_MATERIAL_NORMAL_MAP // Nt is the tangent-space normal. +#ifdef MESHLET_MESH_MATERIAL_PASS + var Nt = textureSampleGrad(pbr_bindings::normal_map_texture, pbr_bindings::normal_map_sampler, uv, ddx_uv, ddy_uv).rgb; +#else var Nt = textureSampleBias(pbr_bindings::normal_map_texture, pbr_bindings::normal_map_sampler, uv, mip_bias).rgb; +#endif if (standard_material_flags & pbr_types::STANDARD_MATERIAL_FLAGS_TWO_COMPONENT_NORMAL_MAP) != 0u { // Only use the xy components and derive z for 2-component normal maps. Nt = vec3(Nt.rg * 2.0 - 1.0, 0.0); diff --git a/crates/bevy_pbr/src/render/pbr_prepass.wgsl b/crates/bevy_pbr/src/render/pbr_prepass.wgsl index 8be86b5af2175..c77d71ebca16d 100644 --- a/crates/bevy_pbr/src/render/pbr_prepass.wgsl +++ b/crates/bevy_pbr/src/render/pbr_prepass.wgsl @@ -6,14 +6,27 @@ prepass_io, mesh_view_bindings::view, } - + +#ifdef MESHLET_MESH_MATERIAL_PASS +#import bevy_pbr::meshlet_visibility_buffer_resolve::resolve_vertex_output +#endif + #ifdef PREPASS_FRAGMENT @fragment fn fragment( +#ifdef MESHLET_MESH_MATERIAL_PASS + @builtin(position) frag_coord: vec4, +#else in: prepass_io::VertexOutput, @builtin(front_facing) is_front: bool, +#endif ) -> prepass_io::FragmentOutput { +#ifdef MESHLET_MESH_MATERIAL_PASS + let in = resolve_vertex_output(frag_coord); + let is_front = true; +#else pbr_prepass_functions::prepass_alpha_discard(in); +#endif var out: prepass_io::FragmentOutput; @@ -46,6 +59,10 @@ fn fragment( in.uv, #endif // VERTEX_UVS view.mip_bias, +#ifdef MESHLET_MESH_MATERIAL_PASS + in.ddx_uv, + in.ddy_uv, +#endif // MESHLET_MESH_MATERIAL_PASS ); out.normal = vec4(normal * 0.5 + vec3(0.5), 1.0); @@ -55,7 +72,11 @@ fn fragment( #endif // NORMAL_PREPASS #ifdef MOTION_VECTOR_PREPASS +#ifdef MESHLET_MESH_MATERIAL_PASS + out.motion_vector = in.motion_vector; +#else out.motion_vector = pbr_prepass_functions::calculate_motion_vector(in.world_position, in.previous_world_position); +#endif #endif return out; diff --git a/crates/bevy_render/src/mesh/mesh/mod.rs b/crates/bevy_render/src/mesh/mesh/mod.rs index dc4549cd040b5..e65dca3b984f8 100644 --- a/crates/bevy_render/src/mesh/mesh/mod.rs +++ b/crates/bevy_render/src/mesh/mesh/mod.rs @@ -369,6 +369,14 @@ impl Mesh { self } + /// Returns the size of a vertex in bytes. + pub fn get_vertex_size(&self) -> u64 { + self.attributes + .values() + .map(|data| data.attribute.format.get_size()) + .sum() + } + /// Computes and returns the index data of the mesh as bytes. /// This is used to transform the index data into a GPU friendly format. pub fn get_index_buffer_bytes(&self) -> Option<&[u8]> { diff --git a/crates/bevy_render/src/mesh/mod.rs b/crates/bevy_render/src/mesh/mod.rs index 7748a5a1eaeb5..2cb30bb2dc5ad 100644 --- a/crates/bevy_render/src/mesh/mod.rs +++ b/crates/bevy_render/src/mesh/mod.rs @@ -57,7 +57,7 @@ impl MeshVertexBufferLayouts { /// Inserts a new mesh vertex buffer layout in the store and returns a /// reference to it, reusing the existing reference if this mesh vertex /// buffer layout was already in the store. - pub(crate) fn insert(&mut self, layout: MeshVertexBufferLayout) -> MeshVertexBufferLayoutRef { + pub fn insert(&mut self, layout: MeshVertexBufferLayout) -> MeshVertexBufferLayoutRef { // Because the special `PartialEq` and `Hash` implementations that // compare by pointer are on `MeshVertexBufferLayoutRef`, not on // `Arc`, this compares the mesh vertex buffer diff --git a/docs/cargo_features.md b/docs/cargo_features.md index c5bf29bd20c84..ff4d5313efc3c 100644 --- a/docs/cargo_features.md +++ b/docs/cargo_features.md @@ -64,6 +64,8 @@ The default feature set enables most of the expected features of a game engine, |glam_assert|Enable assertions to check the validity of parameters passed to glam| |ios_simulator|Enable support for the ios_simulator by downgrading some rendering capabilities| |jpeg|JPEG image format support| +|meshlet|Enables the meshlet renderer for dense high-poly scenes (experimental)| +|meshlet_processor|Enables processing meshes into meshlet meshes for bevy_pbr| |minimp3|MP3 audio format support (through minimp3)| |mp3|MP3 audio format support| |pbr_transmission_textures|Enable support for transmission-related textures in the `StandardMaterial`, at the risk of blowing past the global, per-shader texture limit on older/lower-end GPUs| diff --git a/examples/3d/meshlet.rs b/examples/3d/meshlet.rs new file mode 100644 index 0000000000000..3a94efca6a276 --- /dev/null +++ b/examples/3d/meshlet.rs @@ -0,0 +1,180 @@ +//! Meshlet rendering for dense high-poly scenes (experimental). + +#[path = "../helpers/camera_controller.rs"] +mod camera_controller; + +use bevy::{ + pbr::{ + experimental::meshlet::{MaterialMeshletMeshBundle, MeshletMesh, MeshletPlugin}, + CascadeShadowConfigBuilder, DirectionalLightShadowMap, + }, + prelude::*, + render::render_resource::AsBindGroup, +}; +use camera_controller::{CameraController, CameraControllerPlugin}; +use std::f32::consts::PI; + +// Note: This example showcases the meshlet API, but is not the type of scene that would benefit from using meshlets. + +fn main() { + App::new() + .insert_resource(DirectionalLightShadowMap { size: 4096 }) + .add_plugins(( + DefaultPlugins, + MeshletPlugin, + MaterialPlugin::::default(), + CameraControllerPlugin, + )) + .add_systems(Startup, setup) + .add_systems(Update, draw_bounding_spheres) + .run(); +} + +fn setup( + mut commands: Commands, + asset_server: Res, + mut standard_materials: ResMut>, + mut debug_materials: ResMut>, + mut meshes: ResMut>, +) { + info!("\nMeshlet Controls:\n Space - Toggle bounding spheres"); + + commands.spawn(( + Camera3dBundle { + transform: Transform::from_translation(Vec3::new(1.8, 0.4, -0.1)) + .looking_at(Vec3::ZERO, Vec3::Y), + ..default() + }, + EnvironmentMapLight { + diffuse_map: asset_server.load("environment_maps/pisa_diffuse_rgb9e5_zstd.ktx2"), + specular_map: asset_server.load("environment_maps/pisa_specular_rgb9e5_zstd.ktx2"), + intensity: 150.0, + }, + CameraController::default(), + )); + + commands.spawn(DirectionalLightBundle { + directional_light: DirectionalLight { + illuminance: light_consts::lux::FULL_DAYLIGHT, + shadows_enabled: true, + ..default() + }, + cascade_shadow_config: CascadeShadowConfigBuilder { + num_cascades: 1, + maximum_distance: 5.0, + ..default() + } + .build(), + transform: Transform::from_rotation(Quat::from_euler( + EulerRot::ZYX, + 0.0, + PI * -0.15, + PI * -0.15, + )), + ..default() + }); + + // A custom file format storing a [`bevy_render::mesh::Mesh`] + // that has been converted to a [`bevy_pbr::meshlet::MeshletMesh`] + // using [`bevy_pbr::meshlet::MeshletMesh::from_mesh`], which is + // a function only available when the `meshlet_processor` cargo feature is enabled. + let meshlet_mesh_handle = asset_server.load("models/bunny.meshlet_mesh"); + let debug_material = debug_materials.add(MeshletDebugMaterial::default()); + + for x in -2..=2 { + commands.spawn(MaterialMeshletMeshBundle { + meshlet_mesh: meshlet_mesh_handle.clone(), + material: standard_materials.add(StandardMaterial { + base_color: match x { + -2 => Srgba::hex("#dc2626").unwrap().into(), + -1 => Srgba::hex("#ea580c").unwrap().into(), + 0 => Srgba::hex("#facc15").unwrap().into(), + 1 => Srgba::hex("#16a34a").unwrap().into(), + 2 => Srgba::hex("#0284c7").unwrap().into(), + _ => unreachable!(), + }, + perceptual_roughness: (x + 2) as f32 / 4.0, + ..default() + }), + transform: Transform::default() + .with_scale(Vec3::splat(0.2)) + .with_translation(Vec3::new(x as f32 / 2.0, 0.0, -0.3)), + ..default() + }); + } + for x in -2..=2 { + commands.spawn(MaterialMeshletMeshBundle { + meshlet_mesh: meshlet_mesh_handle.clone(), + material: debug_material.clone(), + transform: Transform::default() + .with_scale(Vec3::splat(0.2)) + .with_rotation(Quat::from_rotation_y(PI)) + .with_translation(Vec3::new(x as f32 / 2.0, 0.0, 0.3)), + ..default() + }); + } + + commands.spawn(PbrBundle { + mesh: meshes.add(Plane3d::default().mesh().size(5.0, 5.0)), + material: standard_materials.add(StandardMaterial { + base_color: Color::WHITE, + perceptual_roughness: 1.0, + ..default() + }), + ..default() + }); +} + +#[allow(clippy::too_many_arguments)] +fn draw_bounding_spheres( + query: Query<(&Handle, &Transform), With>>, + debug: Query<&MeshletBoundingSpheresDebug>, + camera: Query<&Transform, With>, + mut commands: Commands, + meshlets: Res>, + mut gizmos: Gizmos, + keys: Res>, + mut should_draw: Local, +) { + if keys.just_pressed(KeyCode::Space) { + *should_draw = !*should_draw; + } + + match debug.get_single() { + Ok(meshlet_debug) if *should_draw => { + let camera_pos = camera.single().translation; + for circle in &meshlet_debug.circles { + gizmos.circle( + circle.0, + Dir3::new(camera_pos - circle.0).unwrap(), + circle.1, + Color::BLACK, + ); + } + } + Err(_) => { + if let Some((handle, transform)) = query.iter().last() { + if let Some(meshlets) = meshlets.get(handle) { + let mut circles = Vec::new(); + for bounding_sphere in meshlets.meshlet_bounding_spheres.iter() { + let center = transform.transform_point(bounding_sphere.center); + circles.push((center, transform.scale.x * bounding_sphere.radius)); + } + commands.spawn(MeshletBoundingSpheresDebug { circles }); + } + } + } + _ => {} + } +} + +#[derive(Component)] +struct MeshletBoundingSpheresDebug { + circles: Vec<(Vec3, f32)>, +} + +#[derive(Asset, TypePath, AsBindGroup, Clone, Default)] +struct MeshletDebugMaterial { + _dummy: (), +} +impl Material for MeshletDebugMaterial {} diff --git a/examples/README.md b/examples/README.md index a336ede7f4af3..6ca72df0b3d78 100644 --- a/examples/README.md +++ b/examples/README.md @@ -136,6 +136,7 @@ Example | Description [Lightmaps](../examples/3d/lightmaps.rs) | Rendering a scene with baked lightmaps [Lines](../examples/3d/lines.rs) | Create a custom material to draw 3d lines [Load glTF](../examples/3d/load_gltf.rs) | Loads and renders a glTF file as a scene +[Meshlet](../examples/3d/meshlet.rs) | Meshlet rendering for dense high-poly scenes (experimental) [Orthographic View](../examples/3d/orthographic.rs) | Shows how to create a 3D orthographic view (for isometric-look in games or CAD applications) [Parallax Mapping](../examples/3d/parallax_mapping.rs) | Demonstrates use of a normal map and depth map for parallax mapping [Parenting](../examples/3d/parenting.rs) | Demonstrates parent->child relationships and relative transformations