From ae14ac53b42252f22dd857b854eb16842e159982 Mon Sep 17 00:00:00 2001 From: Robert Swain Date: Thu, 7 Apr 2022 16:16:35 +0000 Subject: [PATCH] Use storage buffers for clustered forward point lights (#3989) # Objective - Make use of storage buffers, where they are available, for clustered forward bindings to support far more point lights in a scene - Fixes #3605 - Based on top of #4079 This branch on an M1 Max can keep 60fps with about 2150 point lights of radius 1m in the Sponza scene where I've been testing. The bottleneck is mostly assigning lights to clusters which grows faster than linearly (I think 1000 lights was about 1.5ms and 5000 was 7.5ms). I have seen papers and presentations leveraging compute shaders that can get this up to over 1 million. That said, I think any further optimisations should probably be done in a separate PR. ## Solution - Add `RenderDevice` to the `Material` and `SpecializedMaterial` trait `::key()` functions to allow setting flags on the keys depending on feature/limit availability - Make `GpuPointLights` and `ViewClusterBuffers` into enums containing `UniformVec` and `StorageBuffer` variants. Implement the necessary API on them to make usage the same for both cases, and the only difference is at initialisation time. - Appropriate shader defs in the shader code to handle the two cases ## Context on some decisions / open questions - I'm using `max_storage_buffers_per_shader_stage >= 3` as a check to see if storage buffers are supported. I was thinking about diving into 'binding resource management' but it feels like we don't have enough use cases to understand the problem yet, and it is mostly a separate concern to this PR, so I think it should be handled separately. - Should `ViewClusterBuffers` and `ViewClusterBindings` be merged, duplicating the count variables into the enum variants? Co-authored-by: Carter Anderson --- Cargo.toml | 5 +- crates/bevy_pbr/src/lib.rs | 10 +- crates/bevy_pbr/src/light.rs | 25 +- crates/bevy_pbr/src/material.rs | 11 +- crates/bevy_pbr/src/pbr_material.rs | 1 + crates/bevy_pbr/src/render/light.rs | 366 ++++++++++++++---- crates/bevy_pbr/src/render/mesh.rs | 60 ++- .../src/render/mesh_view_bind_group.wgsl | 24 +- crates/bevy_pbr/src/render/pbr.wgsl | 33 +- .../src/render_resource/storage_buffer.rs | 24 +- .../bevy_render/src/renderer/render_device.rs | 13 +- crates/bevy_sprite/src/mesh2d/material.rs | 15 +- examples/README.md | 1 + examples/shader/custom_vertex_attribute.rs | 1 + examples/shader/shader_material_glsl.rs | 1 + examples/stress_tests/many_lights.rs | 166 ++++++++ 16 files changed, 623 insertions(+), 133 deletions(-) create mode 100644 examples/stress_tests/many_lights.rs diff --git a/Cargo.toml b/Cargo.toml index dc3377ac8d155..40ef75724ce6a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -612,7 +612,10 @@ min_sdk_version = 16 target_sdk_version = 29 # Stress Tests +[[example]] +name = "many_lights" +path = "examples/stress_tests/many_lights.rs" + [[example]] name = "transform_hierarchy" path = "examples/stress_tests/transform_hierarchy.rs" - diff --git a/crates/bevy_pbr/src/lib.rs b/crates/bevy_pbr/src/lib.rs index 20d0aa2f5a979..3659a88c088ac 100644 --- a/crates/bevy_pbr/src/lib.rs +++ b/crates/bevy_pbr/src/lib.rs @@ -150,12 +150,10 @@ impl Plugin for PbrPlugin { ) .add_system_to_stage( RenderStage::Prepare, - // this is added as an exclusive system because it contributes new views. it must run (and have Commands applied) - // _before_ the `prepare_views()` system is run. ideally this becomes a normal system when "stageless" features come out - render::prepare_clusters - .exclusive_system() - .label(RenderLightSystems::PrepareClusters) - .after(RenderLightSystems::PrepareLights), + // NOTE: This needs to run after prepare_lights. As prepare_lights is an exclusive system, + // just adding it to the non-exclusive systems in the Prepare stage means it runs after + // prepare_lights. + render::prepare_clusters.label(RenderLightSystems::PrepareClusters), ) .add_system_to_stage( RenderStage::Queue, diff --git a/crates/bevy_pbr/src/light.rs b/crates/bevy_pbr/src/light.rs index 1258c944de24a..90eda98082ae0 100644 --- a/crates/bevy_pbr/src/light.rs +++ b/crates/bevy_pbr/src/light.rs @@ -9,6 +9,8 @@ use bevy_render::{ color::Color, prelude::Image, primitives::{Aabb, CubemapFrusta, Frustum, Sphere}, + render_resource::BufferBindingType, + renderer::RenderDevice, view::{ComputedVisibility, RenderLayers, Visibility, VisibleEntities}, }; use bevy_transform::components::GlobalTransform; @@ -17,7 +19,8 @@ use bevy_window::Windows; use crate::{ calculate_cluster_factors, CubeMapFace, CubemapVisibleEntities, ViewClusterBindings, - CUBE_MAP_FACES, MAX_POINT_LIGHTS, POINT_LIGHT_NEAR_Z, + CLUSTERED_FORWARD_STORAGE_BUFFER_COUNT, CUBE_MAP_FACES, MAX_UNIFORM_BUFFER_POINT_LIGHTS, + POINT_LIGHT_NEAR_Z, }; /// A light that emits light in all directions from a central point. @@ -709,6 +712,7 @@ pub(crate) fn assign_lights_to_clusters( lights_query: Query<(Entity, &GlobalTransform, &PointLight, &Visibility)>, mut lights: Local>, mut max_point_lights_warning_emitted: Local, + render_device: Res, ) { global_lights.entities.clear(); lights.clear(); @@ -727,7 +731,13 @@ pub(crate) fn assign_lights_to_clusters( ), ); - if lights.len() > MAX_POINT_LIGHTS { + let clustered_forward_buffer_binding_type = + render_device.get_supported_read_only_binding_type(CLUSTERED_FORWARD_STORAGE_BUFFER_COUNT); + let supports_storage_buffers = matches!( + clustered_forward_buffer_binding_type, + BufferBindingType::Storage { .. } + ); + if lights.len() > MAX_UNIFORM_BUFFER_POINT_LIGHTS && !supports_storage_buffers { lights.sort_by(|light_1, light_2| { point_light_order( (&light_1.entity, &light_1.shadows_enabled), @@ -743,7 +753,7 @@ pub(crate) fn assign_lights_to_clusters( let mut lights_in_view_count = 0; lights.retain(|light| { // take one extra light to check if we should emit the warning - if lights_in_view_count == MAX_POINT_LIGHTS + 1 { + if lights_in_view_count == MAX_UNIFORM_BUFFER_POINT_LIGHTS + 1 { false } else { let light_sphere = Sphere { @@ -763,12 +773,15 @@ pub(crate) fn assign_lights_to_clusters( } }); - if lights.len() > MAX_POINT_LIGHTS && !*max_point_lights_warning_emitted { - warn!("MAX_POINT_LIGHTS ({}) exceeded", MAX_POINT_LIGHTS); + if lights.len() > MAX_UNIFORM_BUFFER_POINT_LIGHTS && !*max_point_lights_warning_emitted { + warn!( + "MAX_UNIFORM_BUFFER_POINT_LIGHTS ({}) exceeded", + MAX_UNIFORM_BUFFER_POINT_LIGHTS + ); *max_point_lights_warning_emitted = true; } - lights.truncate(MAX_POINT_LIGHTS); + lights.truncate(MAX_UNIFORM_BUFFER_POINT_LIGHTS); } for (view_entity, camera_transform, camera, frustum, config, clusters, mut visible_lights) in diff --git a/crates/bevy_pbr/src/material.rs b/crates/bevy_pbr/src/material.rs index 5e0648adcd43a..33cb2eb54d53c 100644 --- a/crates/bevy_pbr/src/material.rs +++ b/crates/bevy_pbr/src/material.rs @@ -39,7 +39,7 @@ use std::marker::PhantomData; /// way to render [`Mesh`] entities with custom shader logic. For materials that can specialize their [`RenderPipelineDescriptor`] /// based on specific material values, see [`SpecializedMaterial`]. [`Material`] automatically implements [`SpecializedMaterial`] /// and can be used anywhere that type is used (such as [`MaterialPlugin`]). -pub trait Material: Asset + RenderAsset { +pub trait Material: Asset + RenderAsset + Sized { /// Returns this material's [`BindGroup`]. This should match the layout returned by [`Material::bind_group_layout`]. fn bind_group(material: &::PreparedAsset) -> &BindGroup; @@ -78,6 +78,7 @@ pub trait Material: Asset + RenderAsset { #[allow(unused_variables)] #[inline] fn specialize( + pipeline: &MaterialPipeline, descriptor: &mut RenderPipelineDescriptor, layout: &MeshVertexBufferLayout, ) -> Result<(), SpecializedMeshPipelineError> { @@ -93,11 +94,12 @@ impl SpecializedMaterial for M { #[inline] fn specialize( + pipeline: &MaterialPipeline, descriptor: &mut RenderPipelineDescriptor, _key: Self::Key, layout: &MeshVertexBufferLayout, ) -> Result<(), SpecializedMeshPipelineError> { - ::specialize(descriptor, layout) + ::specialize(pipeline, descriptor, layout) } #[inline] @@ -137,7 +139,7 @@ impl SpecializedMaterial for M { /// way to render [`Mesh`] entities with custom shader logic. [`SpecializedMaterials`](SpecializedMaterial) use their [`SpecializedMaterial::Key`] /// to customize their [`RenderPipelineDescriptor`] based on specific material values. The slightly simpler [`Material`] trait /// should be used for materials that do not need specialization. [`Material`] types automatically implement [`SpecializedMaterial`]. -pub trait SpecializedMaterial: Asset + RenderAsset { +pub trait SpecializedMaterial: Asset + RenderAsset + Sized { /// The key used to specialize this material's [`RenderPipelineDescriptor`]. type Key: PartialEq + Eq + Hash + Clone + Send + Sync; @@ -148,6 +150,7 @@ pub trait SpecializedMaterial: Asset + RenderAsset { /// Specializes the given `descriptor` according to the given `key`. fn specialize( + pipeline: &MaterialPipeline, descriptor: &mut RenderPipelineDescriptor, key: Self::Key, layout: &MeshVertexBufferLayout, @@ -251,7 +254,7 @@ impl SpecializedMeshPipeline for MaterialPipeline { let descriptor_layout = descriptor.layout.as_mut().unwrap(); descriptor_layout.insert(1, self.material_layout.clone()); - M::specialize(&mut descriptor, key.material_key, layout)?; + M::specialize(self, &mut descriptor, key.material_key, layout)?; Ok(descriptor) } } diff --git a/crates/bevy_pbr/src/pbr_material.rs b/crates/bevy_pbr/src/pbr_material.rs index a63950042757c..65362c5b14d13 100644 --- a/crates/bevy_pbr/src/pbr_material.rs +++ b/crates/bevy_pbr/src/pbr_material.rs @@ -378,6 +378,7 @@ impl SpecializedMaterial for StandardMaterial { } fn specialize( + _pipeline: &MaterialPipeline, descriptor: &mut RenderPipelineDescriptor, key: Self::Key, _layout: &MeshVertexBufferLayout, diff --git a/crates/bevy_pbr/src/render/light.rs b/crates/bevy_pbr/src/render/light.rs index fbffe9cd021cd..aa23d63568e3e 100644 --- a/crates/bevy_pbr/src/render/light.rs +++ b/crates/bevy_pbr/src/render/light.rs @@ -10,7 +10,7 @@ use bevy_ecs::{ prelude::*, system::{lifetimeless::*, SystemParamItem}, }; -use bevy_math::{const_vec3, Mat4, UVec3, UVec4, Vec2, Vec3, Vec4, Vec4Swizzles}; +use bevy_math::{const_vec3, Mat4, UVec2, UVec3, UVec4, Vec2, Vec3, Vec4, Vec4Swizzles}; use bevy_render::{ camera::{Camera, CameraProjection}, color::Color, @@ -22,7 +22,7 @@ use bevy_render::{ EntityRenderCommand, PhaseItem, RenderCommandResult, RenderPhase, SetItemPipeline, TrackedRenderPass, }, - render_resource::{std140::AsStd140, *}, + render_resource::{std140::AsStd140, std430::AsStd430, *}, renderer::{RenderContext, RenderDevice, RenderQueue}, texture::*, view::{ @@ -81,7 +81,7 @@ pub struct ExtractedDirectionalLight { pub type ExtractedDirectionalLightShadowMap = DirectionalLightShadowMap; #[repr(C)] -#[derive(Copy, Clone, AsStd140, Default, Debug)] +#[derive(Copy, Clone, AsStd140, AsStd430, Default, Debug)] pub struct GpuPointLight { // The lower-right 2x2 values of the projection matrix 22 23 32 33 projection_lr: Vec4, @@ -92,9 +92,84 @@ pub struct GpuPointLight { shadow_normal_bias: f32, } -#[derive(AsStd140)] -pub struct GpuPointLights { - data: [GpuPointLight; MAX_POINT_LIGHTS], +pub enum GpuPointLights { + Uniform { + buffer: UniformVec<[GpuPointLight; MAX_UNIFORM_BUFFER_POINT_LIGHTS]>, + }, + Storage { + buffer: StorageBuffer, + }, +} + +impl GpuPointLights { + fn new(buffer_binding_type: BufferBindingType) -> Self { + match buffer_binding_type { + BufferBindingType::Storage { .. } => Self::storage(), + BufferBindingType::Uniform => Self::uniform(), + } + } + + fn uniform() -> Self { + Self::Uniform { + buffer: UniformVec::default(), + } + } + + fn storage() -> Self { + Self::Storage { + buffer: StorageBuffer::default(), + } + } + + fn clear(&mut self) { + match self { + GpuPointLights::Uniform { buffer } => buffer.clear(), + GpuPointLights::Storage { buffer } => buffer.clear(), + } + } + + fn push(&mut self, mut lights: Vec) { + match self { + GpuPointLights::Uniform { buffer } => { + // NOTE: This iterator construction allows moving and padding with default + // values and is like this to avoid unnecessary cloning. + let gpu_point_lights = lights + .drain(..) + .chain(std::iter::repeat_with(GpuPointLight::default)) + .take(MAX_UNIFORM_BUFFER_POINT_LIGHTS) + .collect::>(); + buffer.push(gpu_point_lights.try_into().unwrap()); + } + GpuPointLights::Storage { buffer } => { + buffer.append(&mut lights); + } + } + } + + fn write_buffer(&mut self, render_device: &RenderDevice, render_queue: &RenderQueue) { + match self { + GpuPointLights::Uniform { buffer } => buffer.write_buffer(render_device, render_queue), + GpuPointLights::Storage { buffer } => buffer.write_buffer(render_device, render_queue), + } + } + + pub fn binding(&self) -> Option { + match self { + GpuPointLights::Uniform { buffer } => buffer.binding(), + GpuPointLights::Storage { buffer } => buffer.binding(), + } + } + + pub fn len(&self) -> usize { + match self { + GpuPointLights::Uniform { buffer } => buffer.len(), + GpuPointLights::Storage { buffer } => buffer.values().len(), + } + } + + pub fn is_empty(&self) -> bool { + self.len() == 0 + } } // NOTE: These must match the bit flags in bevy_pbr2/src/render/pbr.frag! @@ -144,7 +219,7 @@ pub struct GpuLights { } // NOTE: this must be kept in sync with the same constants in pbr.frag -pub const MAX_POINT_LIGHTS: usize = 256; +pub const MAX_UNIFORM_BUFFER_POINT_LIGHTS: usize = 256; // FIXME: How should we handle shadows for clustered forward? Limiting to maximum 10 // point light shadow maps for now #[cfg(feature = "webgl")] @@ -346,13 +421,13 @@ pub fn extract_clusters(mut commands: Commands, views: Query<(Entity, &Clusters) } } +#[allow(clippy::too_many_arguments)] pub fn extract_lights( mut commands: Commands, ambient_light: Res, point_light_shadow_map: Res, directional_light_shadow_map: Res, global_point_lights: Res, - // visible_point_lights: Query<&VisiblePointLights>, mut point_lights: Query<(&PointLight, &mut CubemapVisibleEntities, &GlobalTransform)>, mut directional_lights: Query<( Entity, @@ -361,6 +436,7 @@ pub fn extract_lights( &GlobalTransform, &Visibility, )>, + mut previous_point_lights_len: Local, ) { commands.insert_resource(ExtractedAmbientLight { color: ambient_light.color, @@ -379,32 +455,38 @@ pub fn extract_lights( // https://catlikecoding.com/unity/tutorials/custom-srp/point-and-spot-shadows/ let point_light_texel_size = 2.0 / point_light_shadow_map.size as f32; + let mut point_lights_values = Vec::with_capacity(*previous_point_lights_len); for entity in global_point_lights.iter().copied() { if let Ok((point_light, cubemap_visible_entities, transform)) = point_lights.get_mut(entity) { let render_cubemap_visible_entities = std::mem::take(cubemap_visible_entities.into_inner()); - commands.get_or_spawn(entity).insert_bundle(( - ExtractedPointLight { - color: point_light.color, - // NOTE: Map from luminous power in lumens to luminous intensity in lumens per steradian - // for a point light. See https://google.github.io/filament/Filament.html#mjx-eqn-pointLightLuminousPower - // for details. - intensity: point_light.intensity / (4.0 * std::f32::consts::PI), - range: point_light.range, - radius: point_light.radius, - transform: *transform, - shadows_enabled: point_light.shadows_enabled, - shadow_depth_bias: point_light.shadow_depth_bias, - // The factor of SQRT_2 is for the worst-case diagonal offset - shadow_normal_bias: point_light.shadow_normal_bias - * point_light_texel_size - * std::f32::consts::SQRT_2, - }, - render_cubemap_visible_entities, + point_lights_values.push(( + entity, + ( + ExtractedPointLight { + color: point_light.color, + // NOTE: Map from luminous power in lumens to luminous intensity in lumens per steradian + // for a point light. See https://google.github.io/filament/Filament.html#mjx-eqn-pointLightLuminousPower + // for details. + intensity: point_light.intensity / (4.0 * std::f32::consts::PI), + range: point_light.range, + radius: point_light.radius, + transform: *transform, + shadows_enabled: point_light.shadows_enabled, + shadow_depth_bias: point_light.shadow_depth_bias, + // The factor of SQRT_2 is for the worst-case diagonal offset + shadow_normal_bias: point_light.shadow_normal_bias + * point_light_texel_size + * std::f32::consts::SQRT_2, + }, + render_cubemap_visible_entities, + ), )); } } + *previous_point_lights_len = point_lights_values.len(); + commands.insert_or_spawn_batch(point_lights_values); for (entity, directional_light, visible_entities, transform, visibility) in directional_lights.iter_mut() @@ -528,12 +610,34 @@ pub struct ViewLightsUniformOffset { pub offset: u32, } -#[derive(Default)] +// NOTE: Clustered-forward rendering requires 3 storage buffer bindings so check that +// at least that many are supported using this constant and SupportedBindingType::from_device() +pub const CLUSTERED_FORWARD_STORAGE_BUFFER_COUNT: u32 = 3; + pub struct GlobalLightMeta { - pub gpu_point_lights: UniformVec, + pub gpu_point_lights: GpuPointLights, pub entity_to_index: HashMap, } +impl FromWorld for GlobalLightMeta { + fn from_world(world: &mut World) -> Self { + Self::new( + world + .resource::() + .get_supported_read_only_binding_type(CLUSTERED_FORWARD_STORAGE_BUFFER_COUNT), + ) + } +} + +impl GlobalLightMeta { + pub fn new(buffer_binding_type: BufferBindingType) -> Self { + Self { + gpu_point_lights: GpuPointLights::new(buffer_binding_type), + entity_to_index: HashMap::default(), + } + } +} + #[derive(Default)] pub struct LightMeta { pub view_gpu_lights: DynamicUniformVec, @@ -615,14 +719,14 @@ pub fn prepare_lights( .reserve(point_lights.len()); } - let mut gpu_point_lights = [GpuPointLight::default(); MAX_POINT_LIGHTS]; + let mut gpu_point_lights = Vec::new(); for (index, &(entity, light)) in point_lights.iter().enumerate() { let mut flags = PointLightFlags::NONE; // Lights are sorted, shadow enabled lights are first if light.shadows_enabled && index < MAX_POINT_LIGHT_SHADOW_MAPS { flags |= PointLightFlags::SHADOWS_ENABLED; } - gpu_point_lights[index] = GpuPointLight { + gpu_point_lights.push(GpuPointLight { projection_lr: Vec4::new( cube_face_projection.z_axis.z, cube_face_projection.z_axis.w, @@ -639,12 +743,10 @@ pub fn prepare_lights( flags: flags.bits, shadow_depth_bias: light.shadow_depth_bias, shadow_normal_bias: light.shadow_normal_bias, - }; + }); global_light_meta.entity_to_index.insert(entity, index); } - global_light_meta.gpu_point_lights.push(GpuPointLights { - data: gpu_point_lights, - }); + global_light_meta.gpu_point_lights.push(gpu_point_lights); global_light_meta .gpu_point_lights .write_buffer(&render_device, &render_queue); @@ -906,7 +1008,7 @@ pub fn prepare_lights( } // this must match CLUSTER_COUNT_SIZE in pbr.wgsl -// and must be large enough to contain MAX_POINT_LIGHTS +// and must be large enough to contain MAX_UNIFORM_BUFFER_POINT_LIGHTS const CLUSTER_COUNT_SIZE: u32 = 13; const CLUSTER_OFFSET_MASK: u32 = (1 << (32 - CLUSTER_COUNT_SIZE)) - 1; @@ -931,14 +1033,47 @@ fn pack_offset_and_count(offset: usize, count: usize) -> u32 { | (count as u32 & CLUSTER_COUNT_MASK) } -#[derive(Component, Default)] +enum ViewClusterBuffers { + Uniform { + // NOTE: UVec4 is because all arrays in Std140 layout have 16-byte alignment + cluster_light_index_lists: UniformVec<[UVec4; ViewClusterBindings::MAX_UNIFORM_ITEMS]>, + // NOTE: UVec4 is because all arrays in Std140 layout have 16-byte alignment + cluster_offsets_and_counts: UniformVec<[UVec4; ViewClusterBindings::MAX_UNIFORM_ITEMS]>, + }, + Storage { + cluster_light_index_lists: StorageBuffer, + cluster_offsets_and_counts: StorageBuffer, + }, +} + +impl ViewClusterBuffers { + fn new(buffer_binding_type: BufferBindingType) -> Self { + match buffer_binding_type { + BufferBindingType::Storage { .. } => Self::storage(), + BufferBindingType::Uniform => Self::uniform(), + } + } + + fn uniform() -> Self { + ViewClusterBuffers::Uniform { + cluster_light_index_lists: UniformVec::default(), + cluster_offsets_and_counts: UniformVec::default(), + } + } + + fn storage() -> Self { + ViewClusterBuffers::Storage { + cluster_light_index_lists: StorageBuffer::default(), + cluster_offsets_and_counts: StorageBuffer::default(), + } + } +} + +#[derive(Component)] pub struct ViewClusterBindings { n_indices: usize, - // NOTE: UVec4 is because all arrays in Std140 layout have 16-byte alignment - pub cluster_light_index_lists: UniformVec<[UVec4; Self::MAX_UNIFORM_ITEMS]>, n_offsets: usize, - // NOTE: UVec4 is because all arrays in Std140 layout have 16-byte alignment - pub cluster_offsets_and_counts: UniformVec<[UVec4; Self::MAX_UNIFORM_ITEMS]>, + buffers: ViewClusterBuffers, } impl ViewClusterBindings { @@ -946,25 +1081,59 @@ impl ViewClusterBindings { const MAX_UNIFORM_ITEMS: usize = Self::MAX_OFFSETS / 4; pub const MAX_INDICES: usize = 16384; + pub fn new(buffer_binding_type: BufferBindingType) -> Self { + Self { + n_indices: 0, + n_offsets: 0, + buffers: ViewClusterBuffers::new(buffer_binding_type), + } + } + pub fn reserve_and_clear(&mut self) { - self.cluster_light_index_lists.clear(); - self.cluster_light_index_lists - .push([UVec4::ZERO; Self::MAX_UNIFORM_ITEMS]); - self.cluster_offsets_and_counts.clear(); - self.cluster_offsets_and_counts - .push([UVec4::ZERO; Self::MAX_UNIFORM_ITEMS]); + match &mut self.buffers { + ViewClusterBuffers::Uniform { + cluster_light_index_lists, + cluster_offsets_and_counts, + } => { + cluster_light_index_lists.clear(); + cluster_light_index_lists.push([UVec4::ZERO; Self::MAX_UNIFORM_ITEMS]); + cluster_offsets_and_counts.clear(); + cluster_offsets_and_counts.push([UVec4::ZERO; Self::MAX_UNIFORM_ITEMS]); + } + ViewClusterBuffers::Storage { + cluster_light_index_lists, + cluster_offsets_and_counts, + .. + } => { + cluster_light_index_lists.clear(); + cluster_offsets_and_counts.clear(); + } + } } pub fn push_offset_and_count(&mut self, offset: usize, count: usize) { - let array_index = self.n_offsets >> 2; // >> 2 is equivalent to / 4 - if array_index >= Self::MAX_UNIFORM_ITEMS { - warn!("cluster offset and count out of bounds!"); - return; - } - let component = self.n_offsets & ((1 << 2) - 1); - let packed = pack_offset_and_count(offset, count); + match &mut self.buffers { + ViewClusterBuffers::Uniform { + cluster_offsets_and_counts, + .. + } => { + let array_index = self.n_offsets >> 2; // >> 2 is equivalent to / 4 + if array_index >= Self::MAX_UNIFORM_ITEMS { + warn!("cluster offset and count out of bounds!"); + return; + } + let component = self.n_offsets & ((1 << 2) - 1); + let packed = pack_offset_and_count(offset, count); - self.cluster_offsets_and_counts.get_mut(0)[array_index][component] = packed; + cluster_offsets_and_counts.get_mut(0)[array_index][component] = packed; + } + ViewClusterBuffers::Storage { + cluster_offsets_and_counts, + .. + } => { + cluster_offsets_and_counts.push(UVec2::new(offset as u32, count as u32)); + } + } self.n_offsets += 1; } @@ -974,22 +1143,81 @@ impl ViewClusterBindings { } pub fn push_index(&mut self, index: usize) { - let array_index = self.n_indices >> 4; // >> 4 is equivalent to / 16 - let component = (self.n_indices >> 2) & ((1 << 2) - 1); - let sub_index = self.n_indices & ((1 << 2) - 1); - let index = index as u32 & POINT_LIGHT_INDEX_MASK; - - self.cluster_light_index_lists.get_mut(0)[array_index][component] |= - index << (8 * sub_index); + match &mut self.buffers { + ViewClusterBuffers::Uniform { + cluster_light_index_lists, + .. + } => { + let array_index = self.n_indices >> 4; // >> 4 is equivalent to / 16 + let component = (self.n_indices >> 2) & ((1 << 2) - 1); + let sub_index = self.n_indices & ((1 << 2) - 1); + let index = index as u32 & POINT_LIGHT_INDEX_MASK; + + cluster_light_index_lists.get_mut(0)[array_index][component] |= + index << (8 * sub_index); + } + ViewClusterBuffers::Storage { + cluster_light_index_lists, + .. + } => { + cluster_light_index_lists.push(index as u32); + } + } self.n_indices += 1; } + + pub fn write_buffers(&mut self, render_device: &RenderDevice, render_queue: &RenderQueue) { + match &mut self.buffers { + ViewClusterBuffers::Uniform { + cluster_light_index_lists, + cluster_offsets_and_counts, + } => { + cluster_light_index_lists.write_buffer(render_device, render_queue); + cluster_offsets_and_counts.write_buffer(render_device, render_queue); + } + ViewClusterBuffers::Storage { + cluster_light_index_lists, + cluster_offsets_and_counts, + } => { + cluster_light_index_lists.write_buffer(render_device, render_queue); + cluster_offsets_and_counts.write_buffer(render_device, render_queue); + } + } + } + + pub fn light_index_lists_binding(&self) -> Option { + match &self.buffers { + ViewClusterBuffers::Uniform { + cluster_light_index_lists, + .. + } => cluster_light_index_lists.binding(), + ViewClusterBuffers::Storage { + cluster_light_index_lists, + .. + } => cluster_light_index_lists.binding(), + } + } + + pub fn offsets_and_counts_binding(&self) -> Option { + match &self.buffers { + ViewClusterBuffers::Uniform { + cluster_offsets_and_counts, + .. + } => cluster_offsets_and_counts.binding(), + ViewClusterBuffers::Storage { + cluster_offsets_and_counts, + .. + } => cluster_offsets_and_counts.binding(), + } + } } pub fn prepare_clusters( mut commands: Commands, render_device: Res, render_queue: Res, + mesh_pipeline: Res, global_light_meta: Res, views: Query< ( @@ -1000,8 +1228,14 @@ pub fn prepare_clusters( With>, >, ) { + let render_device = render_device.into_inner(); + let supports_storage_buffers = matches!( + mesh_pipeline.clustered_forward_buffer_binding_type, + BufferBindingType::Storage { .. } + ); for (entity, cluster_config, extracted_clusters) in views.iter() { - let mut view_clusters_bindings = ViewClusterBindings::default(); + let mut view_clusters_bindings = + ViewClusterBindings::new(mesh_pipeline.clustered_forward_buffer_binding_type); view_clusters_bindings.reserve_and_clear(); let mut indices_full = false; @@ -1021,6 +1255,7 @@ pub fn prepare_clusters( { if view_clusters_bindings.n_indices() >= ViewClusterBindings::MAX_INDICES + && !supports_storage_buffers { warn!("Cluster light index lists is full! The PointLights in the view are affecting too many clusters."); indices_full = true; @@ -1036,12 +1271,7 @@ pub fn prepare_clusters( } } - view_clusters_bindings - .cluster_light_index_lists - .write_buffer(&render_device, &render_queue); - view_clusters_bindings - .cluster_offsets_and_counts - .write_buffer(&render_device, &render_queue); + view_clusters_bindings.write_buffers(render_device, &render_queue); commands.get_or_spawn(entity).insert(view_clusters_bindings); } diff --git a/crates/bevy_pbr/src/render/mesh.rs b/crates/bevy_pbr/src/render/mesh.rs index 08ac42b83865d..7743524eea7f3 100644 --- a/crates/bevy_pbr/src/render/mesh.rs +++ b/crates/bevy_pbr/src/render/mesh.rs @@ -1,6 +1,7 @@ use crate::{ GlobalLightMeta, GpuLights, LightMeta, NotShadowCaster, NotShadowReceiver, ShadowPipeline, ViewClusterBindings, ViewLightsUniformOffset, ViewShadowBindings, + CLUSTERED_FORWARD_STORAGE_BUFFER_COUNT, }; use bevy_app::Plugin; use bevy_asset::{load_internal_asset, Assets, Handle, HandleUntyped}; @@ -258,11 +259,18 @@ pub struct MeshPipeline { pub skinned_mesh_layout: BindGroupLayout, // This dummy white texture is to be used in place of optional StandardMaterial textures pub dummy_white_gpu_image: GpuImage, + pub clustered_forward_buffer_binding_type: BufferBindingType, } impl FromWorld for MeshPipeline { fn from_world(world: &mut World) -> Self { let render_device = world.resource::(); + let clustered_forward_buffer_binding_type = render_device + .get_supported_read_only_binding_type(CLUSTERED_FORWARD_STORAGE_BUFFER_COUNT); + let cluster_min_binding_size = match clustered_forward_buffer_binding_type { + BufferBindingType::Storage { .. } => None, + BufferBindingType::Uniform => BufferSize::new(16384), + }; let view_layout = render_device.create_bind_group_layout(&BindGroupLayoutDescriptor { entries: &[ // View @@ -334,11 +342,12 @@ impl FromWorld for MeshPipeline { binding: 6, visibility: ShaderStages::FRAGMENT, ty: BindingType::Buffer { - ty: BufferBindingType::Uniform, + ty: clustered_forward_buffer_binding_type, has_dynamic_offset: false, - // NOTE: Static size for uniform buffers. GpuPointLight has a padded - // size of 64 bytes, so 16384 / 64 = 256 point lights max - min_binding_size: BufferSize::new(16384), + // NOTE (when no storage buffers): Static size for uniform buffers. + // GpuPointLight has a padded size of 64 bytes, so 16384 / 64 = 256 + // point lights max + min_binding_size: cluster_min_binding_size, }, count: None, }, @@ -347,10 +356,11 @@ impl FromWorld for MeshPipeline { binding: 7, visibility: ShaderStages::FRAGMENT, ty: BindingType::Buffer { - ty: BufferBindingType::Uniform, + ty: clustered_forward_buffer_binding_type, has_dynamic_offset: false, - // NOTE: With 256 point lights max, indices need 8 bits so use u8 - min_binding_size: BufferSize::new(16384), + // NOTE (when no storage buffers): With 256 point lights max, indices + // need 8 bits so use u8 + min_binding_size: cluster_min_binding_size, }, count: None, }, @@ -359,13 +369,14 @@ impl FromWorld for MeshPipeline { binding: 8, visibility: ShaderStages::FRAGMENT, ty: BindingType::Buffer { - ty: BufferBindingType::Uniform, + ty: clustered_forward_buffer_binding_type, has_dynamic_offset: false, - // NOTE: The offset needs to address 16384 indices, which needs 14 bits. - // The count can be at most all 256 lights so 8 bits. - // Pack the offset into the upper 24 bits and the count into the - // lower 8 bits. - min_binding_size: BufferSize::new(16384), + // NOTE (when no storage buffers): The offset needs to address 16384 + // indices, which needs 14 bits. The count can be at most all 256 lights + // so 8 bits. + // NOTE: Pack the offset into the upper 19 bits and the count into the + // lower 13 bits. + min_binding_size: cluster_min_binding_size, }, count: None, }, @@ -457,6 +468,7 @@ impl FromWorld for MeshPipeline { view_layout, mesh_layout, skinned_mesh_layout, + clustered_forward_buffer_binding_type, dummy_white_gpu_image, } } @@ -548,6 +560,18 @@ impl SpecializedMeshPipeline for MeshPipeline { vertex_attributes.push(Mesh::ATTRIBUTE_TANGENT.at_shader_location(3)); } + // TODO: consider exposing this in shaders in a more generally useful way, such as: + // # if AVAILABLE_STORAGE_BUFFER_BINDINGS == 3 + // /* use storage buffers here */ + // # elif + // /* use uniforms here */ + if !matches!( + self.clustered_forward_buffer_binding_type, + BufferBindingType::Storage { .. } + ) { + shader_defs.push(String::from("NO_STORAGE_BUFFERS_SUPPORT")); + } + let mut bind_group_layout = vec![self.view_layout.clone()]; if layout.contains(Mesh::ATTRIBUTE_JOINT_INDEX) && layout.contains(Mesh::ATTRIBUTE_JOINT_WEIGHT) @@ -770,17 +794,11 @@ pub fn queue_mesh_view_bind_groups( }, BindGroupEntry { binding: 7, - resource: view_cluster_bindings - .cluster_light_index_lists - .binding() - .unwrap(), + resource: view_cluster_bindings.light_index_lists_binding().unwrap(), }, BindGroupEntry { binding: 8, - resource: view_cluster_bindings - .cluster_offsets_and_counts - .binding() - .unwrap(), + resource: view_cluster_bindings.offsets_and_counts_binding().unwrap(), }, ], label: Some("mesh_view_bind_group"), diff --git a/crates/bevy_pbr/src/render/mesh_view_bind_group.wgsl b/crates/bevy_pbr/src/render/mesh_view_bind_group.wgsl index fd1d1dec4d79d..581334676f2cd 100644 --- a/crates/bevy_pbr/src/render/mesh_view_bind_group.wgsl +++ b/crates/bevy_pbr/src/render/mesh_view_bind_group.wgsl @@ -57,20 +57,30 @@ struct Lights { n_directional_lights: u32; }; +#ifdef NO_STORAGE_BUFFERS_SUPPORT struct PointLights { data: array; }; - struct ClusterLightIndexLists { // each u32 contains 4 u8 indices into the PointLights array data: array, 1024u>; }; - struct ClusterOffsetsAndCounts { // each u32 contains a 24-bit index into ClusterLightIndexLists in the high 24 bits // and an 8-bit count of the number of lights in the low 8 bits data: array, 1024u>; }; +#else +struct PointLights { + data: array; +}; +struct ClusterLightIndexLists { + data: array; +}; +struct ClusterOffsetsAndCounts { + data: array>; +}; +#endif [[group(0), binding(0)]] var view: View; @@ -94,9 +104,19 @@ var directional_shadow_textures: texture_depth_2d_array; #endif [[group(0), binding(5)]] var directional_shadow_textures_sampler: sampler_comparison; + +#ifdef NO_STORAGE_BUFFERS_SUPPORT [[group(0), binding(6)]] var point_lights: PointLights; [[group(0), binding(7)]] var cluster_light_index_lists: ClusterLightIndexLists; [[group(0), binding(8)]] var cluster_offsets_and_counts: ClusterOffsetsAndCounts; +#else +[[group(0), binding(6)]] +var point_lights: PointLights; +[[group(0), binding(7)]] +var cluster_light_index_lists: ClusterLightIndexLists; +[[group(0), binding(8)]] +var cluster_offsets_and_counts: ClusterOffsetsAndCounts; +#endif diff --git a/crates/bevy_pbr/src/render/pbr.wgsl b/crates/bevy_pbr/src/render/pbr.wgsl index 9a7854ce32cfc..8cb483173c9b0 100644 --- a/crates/bevy_pbr/src/render/pbr.wgsl +++ b/crates/bevy_pbr/src/render/pbr.wgsl @@ -264,29 +264,32 @@ fn fragment_cluster_index(frag_coord: vec2, view_z: f32, is_orthographic: b ); } -struct ClusterOffsetAndCount { - offset: u32; - count: u32; -}; - // this must match CLUSTER_COUNT_SIZE in light.rs let CLUSTER_COUNT_SIZE = 13u; -fn unpack_offset_and_count(cluster_index: u32) -> ClusterOffsetAndCount { +fn unpack_offset_and_count(cluster_index: u32) -> vec2 { +#ifdef NO_STORAGE_BUFFERS_SUPPORT let offset_and_count = cluster_offsets_and_counts.data[cluster_index >> 2u][cluster_index & ((1u << 2u) - 1u)]; - var output: ClusterOffsetAndCount; - // The offset is stored in the upper 24 bits - output.offset = (offset_and_count >> CLUSTER_COUNT_SIZE) & ((1u << 32u - CLUSTER_COUNT_SIZE) - 1u); - // The count is stored in the lower 8 bits - output.count = offset_and_count & ((1u << CLUSTER_COUNT_SIZE) - 1u); - return output; + return vec2( + // The offset is stored in the upper 32 - CLUSTER_COUNT_SIZE = 19 bits + (offset_and_count >> CLUSTER_COUNT_SIZE) & ((1u << 32u - CLUSTER_COUNT_SIZE) - 1u), + // The count is stored in the lower CLUSTER_COUNT_SIZE = 13 bits + offset_and_count & ((1u << CLUSTER_COUNT_SIZE) - 1u) + ); +#else + return cluster_offsets_and_counts.data[cluster_index]; +#endif } fn get_light_id(index: u32) -> u32 { +#ifdef NO_STORAGE_BUFFERS_SUPPORT // The index is correct but in cluster_light_index_lists we pack 4 u8s into a u32 // This means the index into cluster_light_index_lists is index / 4 let indices = cluster_light_index_lists.data[index >> 4u][(index >> 2u) & ((1u << 2u) - 1u)]; // And index % 4 gives the sub-index of the u8 within the u32 so we shift by 8 * sub-index return (indices >> (8u * (index & ((1u << 2u) - 1u)))) & ((1u << 8u) - 1u); +#else + return cluster_light_index_lists.data[index]; +#endif } fn point_light( @@ -583,7 +586,7 @@ fn fragment(in: FragmentInput) -> [[location(0)]] vec4 { ), in.world_position); let cluster_index = fragment_cluster_index(in.frag_coord.xy, view_z, is_orthographic); let offset_and_count = unpack_offset_and_count(cluster_index); - for (var i: u32 = offset_and_count.offset; i < offset_and_count.offset + offset_and_count.count; i = i + 1u) { + for (var i: u32 = offset_and_count[0]; i < offset_and_count[0] + offset_and_count[1]; i = i + 1u) { let light_id = get_light_id(i); let light = point_lights.data[light_id]; var shadow: f32 = 1.0; @@ -637,9 +640,9 @@ fn fragment(in: FragmentInput) -> [[location(0)]] vec4 { let cluster_overlay_alpha = 0.1; let max_light_complexity_per_cluster = 64.0; output_color.r = (1.0 - cluster_overlay_alpha) * output_color.r - + cluster_overlay_alpha * smoothStep(0.0, max_light_complexity_per_cluster, f32(offset_and_count.count)); + + cluster_overlay_alpha * smoothStep(0.0, max_light_complexity_per_cluster, f32(offset_and_count[1])); output_color.g = (1.0 - cluster_overlay_alpha) * output_color.g - + cluster_overlay_alpha * (1.0 - smoothStep(0.0, max_light_complexity_per_cluster, f32(offset_and_count.count))); + + cluster_overlay_alpha * (1.0 - smoothStep(0.0, max_light_complexity_per_cluster, f32(offset_and_count[1]))); #endif // CLUSTERED_FORWARD_DEBUG_CLUSTER_LIGHT_COMPLEXITY #ifdef CLUSTERED_FORWARD_DEBUG_CLUSTER_COHERENCY // NOTE: Visualizes the cluster to which the fragment belongs diff --git a/crates/bevy_render/src/render_resource/storage_buffer.rs b/crates/bevy_render/src/render_resource/storage_buffer.rs index 874f1527a1540..b0015f1c3d941 100644 --- a/crates/bevy_render/src/render_resource/storage_buffer.rs +++ b/crates/bevy_render/src/render_resource/storage_buffer.rs @@ -1,13 +1,10 @@ -use std::num::NonZeroU64; - +use super::Buffer; +use crate::renderer::{RenderDevice, RenderQueue}; use bevy_crevice::std430::{self, AsStd430, Std430}; use bevy_utils::tracing::warn; +use std::num::NonZeroU64; use wgpu::{BindingResource, BufferBinding, BufferDescriptor, BufferUsages}; -use crate::renderer::{RenderDevice, RenderQueue}; - -use super::Buffer; - /// A helper for a storage buffer binding with a body, or a variable-sized array, or both. pub struct StorageBuffer { body: U, @@ -126,4 +123,19 @@ impl StorageBuffer { pub fn values_mut(&mut self) -> &mut [T] { &mut self.values } + + #[inline] + pub fn clear(&mut self) { + self.values.clear(); + } + + #[inline] + pub fn push(&mut self, value: T) { + self.values.push(value); + } + + #[inline] + pub fn append(&mut self, values: &mut Vec) { + self.values.append(values); + } } diff --git a/crates/bevy_render/src/renderer/render_device.rs b/crates/bevy_render/src/renderer/render_device.rs index 7ff9e88ef6dd0..4430e7001273f 100644 --- a/crates/bevy_render/src/renderer/render_device.rs +++ b/crates/bevy_render/src/renderer/render_device.rs @@ -4,7 +4,7 @@ use crate::render_resource::{ }; use futures_lite::future; use std::sync::Arc; -use wgpu::util::DeviceExt; +use wgpu::{util::DeviceExt, BufferBindingType}; use super::RenderQueue; @@ -184,4 +184,15 @@ impl RenderDevice { let padded_bytes_per_row_padding = (align - row_bytes % align) % align; row_bytes + padded_bytes_per_row_padding } + + pub fn get_supported_read_only_binding_type( + &self, + buffers_per_shader_stage: u32, + ) -> BufferBindingType { + if self.limits().max_storage_buffers_per_shader_stage >= buffers_per_shader_stage { + BufferBindingType::Storage { read_only: true } + } else { + BufferBindingType::Uniform + } + } } diff --git a/crates/bevy_sprite/src/mesh2d/material.rs b/crates/bevy_sprite/src/mesh2d/material.rs index 6a900c4604d50..44fcbfbf558e2 100644 --- a/crates/bevy_sprite/src/mesh2d/material.rs +++ b/crates/bevy_sprite/src/mesh2d/material.rs @@ -86,7 +86,11 @@ impl SpecializedMaterial2d for M { type Key = (); #[inline] - fn key(_material: &::PreparedAsset) -> Self::Key {} + fn key( + _render_device: &RenderDevice, + _material: &::PreparedAsset, + ) -> Self::Key { + } #[inline] fn specialize( @@ -136,7 +140,10 @@ pub trait SpecializedMaterial2d: Asset + RenderAsset { /// Extract the [`SpecializedMaterial2d::Key`] for the "prepared" version of this material. This key will be /// passed in to the [`SpecializedMaterial2d::specialize`] function when compiling the [`RenderPipeline`](bevy_render::render_resource::RenderPipeline) /// for a given entity's material. - fn key(material: &::PreparedAsset) -> Self::Key; + fn key( + render_device: &RenderDevice, + material: &::PreparedAsset, + ) -> Self::Key; /// Specializes the given `descriptor` according to the given `key`. fn specialize( @@ -292,6 +299,7 @@ pub fn queue_material2d_meshes( material2d_pipeline: Res>, mut pipelines: ResMut>>, mut pipeline_cache: ResMut, + render_device: Res, msaa: Res, render_meshes: Res>, render_materials: Res>, @@ -301,6 +309,7 @@ pub fn queue_material2d_meshes( if material2d_meshes.is_empty() { return; } + let render_device = render_device.into_inner(); for (visible_entities, mut transparent_phase) in views.iter_mut() { let draw_transparent_pbr = transparent_draw_functions .read() @@ -318,7 +327,7 @@ pub fn queue_material2d_meshes( let mesh_key = msaa_key | Mesh2dPipelineKey::from_primitive_topology(mesh.primitive_topology); - let material_key = M::key(material2d); + let material_key = M::key(render_device, material2d); let pipeline_id = pipelines.specialize( &mut pipeline_cache, &material2d_pipeline, diff --git a/examples/README.md b/examples/README.md index 314a3244ea0eb..d560eddd27521 100644 --- a/examples/README.md +++ b/examples/README.md @@ -443,4 +443,5 @@ cargo run --release --example Example | File | Description --- | --- | --- +`many_lights` | [`stress_tests/many_lights.rs`](./stress_tests/many_lights.rs) | Simple benchmark to test rendering many point lights. Run with `WGPU_SETTINGS_PRIO=webgl2` to restrict to uniform buffers and max 256 lights. `transform_hierarchy.rs` | [`stress_tests/transform_hierarchy.rs`](./stress_tests/transform_hierarchy.rs) | Various test cases for hierarchy and transform propagation performance diff --git a/examples/shader/custom_vertex_attribute.rs b/examples/shader/custom_vertex_attribute.rs index 101b09d80937a..44ef9d0a18553 100644 --- a/examples/shader/custom_vertex_attribute.rs +++ b/examples/shader/custom_vertex_attribute.rs @@ -137,6 +137,7 @@ impl Material for CustomMaterial { } fn specialize( + _pipeline: &MaterialPipeline, descriptor: &mut RenderPipelineDescriptor, layout: &MeshVertexBufferLayout, ) -> Result<(), SpecializedMeshPipelineError> { diff --git a/examples/shader/shader_material_glsl.rs b/examples/shader/shader_material_glsl.rs index 05e8a659fe4b3..7e405157ce99f 100644 --- a/examples/shader/shader_material_glsl.rs +++ b/examples/shader/shader_material_glsl.rs @@ -97,6 +97,7 @@ impl SpecializedMaterial for CustomMaterial { fn key(_: &::PreparedAsset) -> Self::Key {} fn specialize( + _pipeline: &MaterialPipeline, descriptor: &mut RenderPipelineDescriptor, _: Self::Key, _layout: &MeshVertexBufferLayout, diff --git a/examples/stress_tests/many_lights.rs b/examples/stress_tests/many_lights.rs new file mode 100644 index 0000000000000..6134b459de082 --- /dev/null +++ b/examples/stress_tests/many_lights.rs @@ -0,0 +1,166 @@ +use bevy::{ + diagnostic::{FrameTimeDiagnosticsPlugin, LogDiagnosticsPlugin}, + math::{DVec2, DVec3}, + pbr::{ExtractedPointLight, GlobalLightMeta}, + prelude::*, + render::{RenderApp, RenderStage}, +}; + +fn main() { + App::new() + .insert_resource(WindowDescriptor { + width: 1024.0, + height: 768.0, + title: "many_lights".to_string(), + present_mode: bevy::window::PresentMode::Immediate, + ..default() + }) + .add_plugins(DefaultPlugins) + .add_plugin(FrameTimeDiagnosticsPlugin::default()) + .add_plugin(LogDiagnosticsPlugin::default()) + .add_startup_system(setup) + .add_system(move_camera) + .add_system(print_light_count) + .add_plugin(LogVisibleLights) + .run(); +} + +fn setup( + mut commands: Commands, + mut meshes: ResMut>, + mut materials: ResMut>, +) { + const LIGHT_RADIUS: f32 = 0.3; + const LIGHT_INTENSITY: f32 = 5.0; + const RADIUS: f32 = 50.0; + const N_LIGHTS: usize = 100_000; + + commands.spawn_bundle(PbrBundle { + mesh: meshes.add(Mesh::from(shape::Icosphere { + radius: RADIUS, + subdivisions: 9, + })), + material: materials.add(StandardMaterial::from(Color::WHITE)), + transform: Transform::from_scale(Vec3::splat(-1.0)), + ..default() + }); + + let mesh = meshes.add(Mesh::from(shape::Cube { size: 1.0 })); + let material = materials.add(StandardMaterial { + base_color: Color::PINK, + ..default() + }); + + // NOTE: This pattern is good for testing performance of culling as it provides roughly + // the same number of visible meshes regardless of the viewing angle. + // NOTE: f64 is used to avoid precision issues that produce visual artifacts in the distribution + let golden_ratio = 0.5f64 * (1.0f64 + 5.0f64.sqrt()); + for i in 0..N_LIGHTS { + let spherical_polar_theta_phi = fibonacci_spiral_on_sphere(golden_ratio, i, N_LIGHTS); + let unit_sphere_p = spherical_polar_to_cartesian(spherical_polar_theta_phi); + commands.spawn_bundle(PointLightBundle { + point_light: PointLight { + range: LIGHT_RADIUS, + intensity: LIGHT_INTENSITY, + ..default() + }, + transform: Transform::from_translation((RADIUS as f64 * unit_sphere_p).as_vec3()), + ..default() + }); + } + + // camera + commands.spawn_bundle(PerspectiveCameraBundle::default()); + + // add one cube, the only one with strong handles + // also serves as a reference point during rotation + commands.spawn_bundle(PbrBundle { + mesh, + material, + transform: Transform { + translation: Vec3::new(0.0, RADIUS as f32, 0.0), + scale: Vec3::splat(5.0), + ..default() + }, + ..default() + }); +} + +// NOTE: This epsilon value is apparently optimal for optimizing for the average +// nearest-neighbor distance. See: +// http://extremelearning.com.au/how-to-evenly-distribute-points-on-a-sphere-more-effectively-than-the-canonical-fibonacci-lattice/ +// for details. +const EPSILON: f64 = 0.36; +fn fibonacci_spiral_on_sphere(golden_ratio: f64, i: usize, n: usize) -> DVec2 { + DVec2::new( + 2.0 * std::f64::consts::PI * (i as f64 / golden_ratio), + (1.0 - 2.0 * (i as f64 + EPSILON) / (n as f64 - 1.0 + 2.0 * EPSILON)).acos(), + ) +} + +fn spherical_polar_to_cartesian(p: DVec2) -> DVec3 { + let (sin_theta, cos_theta) = p.x.sin_cos(); + let (sin_phi, cos_phi) = p.y.sin_cos(); + DVec3::new(cos_theta * sin_phi, sin_theta * sin_phi, cos_phi) +} + +// System for rotating the camera +fn move_camera(time: Res