Skip to content

Commit

Permalink
fix issues with too many point lights (bevyengine#3916)
Browse files Browse the repository at this point in the history
# Objective

fix bevyengine#3915 

## Solution

the issues are caused by
- lights are assigned to clusters before being filtered down to MAX_POINT_LIGHTS, leading to cluster counts potentially being too high
- after fixing the above, packing the count into 8 bits still causes overflow with exactly 256 lights affecting a cluster

to fix:

```assign_lights_to_clusters```
- limit extracted lights to MAX_POINT_LIGHTS, selecting based on shadow-caster & intensity (if required)
- warn if MAX_POINT_LIGHT count is exceeded

```prepare_lights```
- limit the lights assigned to a cluster to CLUSTER_COUNT_MASK (which is 1 less than MAX_POINT_LIGHTS) to avoid overflowing into the offset bits

notes:
- a better solution to the overflow may be to use more than 8 bits for cluster_count (the comment states only 14 of the remaining 24 bits are used for the offset). this would touch more of the code base but i'm happy to try if it has some benefit.
- intensity is only one way to select lights. it may be worth allowing user configuration of the light filtering, but i can't see a clean way to do that
  • Loading branch information
robtfm authored and Ku95 committed Mar 6, 2022
1 parent e6beb2d commit ff4a037
Show file tree
Hide file tree
Showing 3 changed files with 111 additions and 26 deletions.
97 changes: 88 additions & 9 deletions crates/bevy_pbr/src/light.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,12 @@ use bevy_render::{
view::{ComputedVisibility, RenderLayers, Visibility, VisibleEntities},
};
use bevy_transform::components::GlobalTransform;
use bevy_utils::tracing::warn;
use bevy_window::Windows;

use crate::{
calculate_cluster_factors, CubeMapFace, CubemapVisibleEntities, ViewClusterBindings,
CUBE_MAP_FACES, POINT_LIGHT_NEAR_Z,
CUBE_MAP_FACES, MAX_POINT_LIGHTS, POINT_LIGHT_NEAR_Z,
};

/// A light that emits light in all directions from a central point.
Expand Down Expand Up @@ -478,14 +479,91 @@ fn ndc_position_to_cluster(
.clamp(UVec3::ZERO, cluster_dimensions - UVec3::ONE)
}

// Sort point lights with shadows enabled first, then by a stable key so that the index
// can be used to render at most `MAX_POINT_LIGHT_SHADOW_MAPS` point light shadows, and
// we keep a stable set of lights visible
pub(crate) fn point_light_order(
(entity_1, shadows_enabled_1): (&Entity, &bool),
(entity_2, shadows_enabled_2): (&Entity, &bool),
) -> std::cmp::Ordering {
shadows_enabled_1
.cmp(shadows_enabled_2)
.reverse()
.then_with(|| entity_1.cmp(entity_2))
}

#[derive(Clone, Copy)]
// data required for assigning lights to clusters
pub(crate) struct PointLightAssignmentData {
entity: Entity,
translation: Vec3,
range: f32,
shadows_enabled: bool,
}

// NOTE: Run this before update_point_light_frusta!
pub fn assign_lights_to_clusters(
pub(crate) fn assign_lights_to_clusters(
mut commands: Commands,
mut global_lights: ResMut<VisiblePointLights>,
mut views: Query<(Entity, &GlobalTransform, &Camera, &Frustum, &mut Clusters)>,
lights: Query<(Entity, &GlobalTransform, &PointLight)>,
lights_query: Query<(Entity, &GlobalTransform, &PointLight)>,
mut lights: Local<Vec<PointLightAssignmentData>>,
mut max_point_lights_warning_emitted: Local<bool>,
) {
let light_count = lights.iter().count();
// collect just the relevant light query data into a persisted vec to avoid reallocating each frame
lights.extend(
lights_query
.iter()
.map(|(entity, transform, light)| PointLightAssignmentData {
entity,
translation: transform.translation,
shadows_enabled: light.shadows_enabled,
range: light.range,
}),
);

if lights.len() > MAX_POINT_LIGHTS {
lights.sort_by(|light_1, light_2| {
point_light_order(
(&light_1.entity, &light_1.shadows_enabled),
(&light_2.entity, &light_2.shadows_enabled),
)
});

// check each light against each view's frustum, keep only those that affect at least one of our views
let frusta: Vec<_> = views.iter().map(|(_, _, _, frustum, _)| *frustum).collect();
let mut lights_in_view_count = 0;
lights.retain(|light| {
// take one extra light to check if we should emit the warning
if lights_in_view_count == MAX_POINT_LIGHTS + 1 {
false
} else {
let light_sphere = Sphere {
center: light.translation,
radius: light.range,
};

let light_in_view = frusta
.iter()
.any(|frustum| frustum.intersects_sphere(&light_sphere));

if light_in_view {
lights_in_view_count += 1;
}

light_in_view
}
});

if lights.len() > MAX_POINT_LIGHTS && !*max_point_lights_warning_emitted {
warn!("MAX_POINT_LIGHTS ({}) exceeded", MAX_POINT_LIGHTS);
*max_point_lights_warning_emitted = true;
}

lights.truncate(MAX_POINT_LIGHTS);
}

let light_count = lights.len();
let mut global_lights_set = HashSet::with_capacity(light_count);
for (view_entity, view_transform, camera, frustum, mut clusters) in views.iter_mut() {
let view_transform = view_transform.compute_matrix();
Expand All @@ -504,9 +582,9 @@ pub fn assign_lights_to_clusters(
vec![VisiblePointLights::from_light_count(light_count); cluster_count];
let mut visible_lights = Vec::with_capacity(light_count);

for (light_entity, light_transform, light) in lights.iter() {
for light in lights.iter() {
let light_sphere = Sphere {
center: light_transform.translation,
center: light.translation,
radius: light.range,
};

Expand All @@ -516,8 +594,8 @@ pub fn assign_lights_to_clusters(
}

// NOTE: The light intersects the frustum so it must be visible and part of the global set
global_lights_set.insert(light_entity);
visible_lights.push(light_entity);
global_lights_set.insert(light.entity);
visible_lights.push(light.entity);

// Calculate an AABB for the light in view space, find the corresponding clusters for the min and max
// points of the AABB, then iterate over just those clusters for this light
Expand Down Expand Up @@ -599,7 +677,7 @@ pub fn assign_lights_to_clusters(
let cluster_index = (col_offset + z) as usize;
let cluster_aabb = &clusters.aabbs[cluster_index];
if light_sphere.intersects_obb(cluster_aabb, &view_transform) {
clusters_lights[cluster_index].entities.push(light_entity);
clusters_lights[cluster_index].entities.push(light.entity);
}
}
}
Expand All @@ -617,6 +695,7 @@ pub fn assign_lights_to_clusters(
});
}
global_lights.entities = global_lights_set.into_iter().collect();
lights.clear();
}

pub fn update_directional_light_frusta(
Expand Down
34 changes: 19 additions & 15 deletions crates/bevy_pbr/src/render/light.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
use crate::{
AmbientLight, Clusters, CubemapVisibleEntities, DirectionalLight, DirectionalLightShadowMap,
DrawMesh, MeshPipeline, NotShadowCaster, PointLight, PointLightShadowMap, SetMeshBindGroup,
VisiblePointLights, SHADOW_SHADER_HANDLE,
point_light_order, AmbientLight, Clusters, CubemapVisibleEntities, DirectionalLight,
DirectionalLightShadowMap, DrawMesh, MeshPipeline, NotShadowCaster, PointLight,
PointLightShadowMap, SetMeshBindGroup, VisiblePointLights, SHADOW_SHADER_HANDLE,
};
use bevy_asset::Handle;
use bevy_core::FloatOrd;
Expand Down Expand Up @@ -574,11 +574,10 @@ pub fn prepare_lights(
// Sort point lights with shadows enabled first, then by a stable key so that the index can be used
// to render at most `MAX_POINT_LIGHT_SHADOW_MAPS` point light shadows.
point_lights.sort_by(|(entity_1, light_1), (entity_2, light_2)| {
light_1
.shadows_enabled
.cmp(&light_2.shadows_enabled)
.reverse()
.then_with(|| entity_1.cmp(entity_2))
point_light_order(
(entity_1, &light_1.shadows_enabled),
(entity_2, &light_2.shadows_enabled),
)
});

if global_light_meta.entity_to_index.capacity() < point_lights.len() {
Expand All @@ -588,7 +587,7 @@ pub fn prepare_lights(
}

let mut gpu_point_lights = [GpuPointLight::default(); MAX_POINT_LIGHTS];
for (index, &(entity, light)) in point_lights.iter().enumerate().take(MAX_POINT_LIGHTS) {
for (index, &(entity, light)) in point_lights.iter().enumerate() {
let mut flags = PointLightFlags::NONE;
// Lights are sorted, shadow enabled lights are first
if light.shadows_enabled && index < MAX_POINT_LIGHT_SHADOW_MAPS {
Expand Down Expand Up @@ -877,20 +876,25 @@ pub fn prepare_lights(
.write_buffer(&render_device, &render_queue);
}

const CLUSTER_OFFSET_MASK: u32 = (1 << 24) - 1;
const CLUSTER_COUNT_SIZE: u32 = 8;
const CLUSTER_COUNT_MASK: u32 = (1 << 8) - 1;
// this must match CLUSTER_COUNT_SIZE in pbr.wgsl
// and must be large enough to contain MAX_POINT_LIGHTS
const CLUSTER_COUNT_SIZE: u32 = 13;

const CLUSTER_OFFSET_MASK: u32 = (1 << (32 - CLUSTER_COUNT_SIZE)) - 1;
const CLUSTER_COUNT_MASK: u32 = (1 << CLUSTER_COUNT_SIZE) - 1;
const POINT_LIGHT_INDEX_MASK: u32 = (1 << 8) - 1;

// NOTE: With uniform buffer max binding size as 16384 bytes
// that means we can fit say 256 point lights in one uniform
// buffer, which means the count can be at most 256 so it
// needs 8 bits.
// needs 9 bits.
// The array of indices can also use u8 and that means the
// offset in to the array of indices needs to be able to address
// 16384 values. log2(16384) = 14 bits.
// This means we can pack the offset into the upper 24 bits of a u32
// and the count into the lower 8 bits.
// We use 32 bits to store the pair, so we choose to divide the
// remaining 9 bits proportionally to give some future room.
// This means we can pack the offset into the upper 19 bits of a u32
// and the count into the lower 13 bits.
// NOTE: This assumes CPU and GPU endianness are the same which is true
// for all common and tested x86/ARM CPUs and AMD/NVIDIA/Intel/Apple/etc GPUs
fn pack_offset_and_count(offset: usize, count: usize) -> u32 {
Expand Down
6 changes: 4 additions & 2 deletions crates/bevy_pbr/src/render/pbr.wgsl
Original file line number Diff line number Diff line change
Expand Up @@ -267,13 +267,15 @@ struct ClusterOffsetAndCount {
count: u32;
};

// this must match CLUSTER_COUNT_SIZE in light.rs
let CLUSTER_COUNT_SIZE = 13u;
fn unpack_offset_and_count(cluster_index: u32) -> ClusterOffsetAndCount {
let offset_and_count = cluster_offsets_and_counts.data[cluster_index >> 2u][cluster_index & ((1u << 2u) - 1u)];
var output: ClusterOffsetAndCount;
// The offset is stored in the upper 24 bits
output.offset = (offset_and_count >> 8u) & ((1u << 24u) - 1u);
output.offset = (offset_and_count >> CLUSTER_COUNT_SIZE) & ((1u << 32u - CLUSTER_COUNT_SIZE) - 1u);
// The count is stored in the lower 8 bits
output.count = offset_and_count & ((1u << 8u) - 1u);
output.count = offset_and_count & ((1u << CLUSTER_COUNT_SIZE) - 1u);
return output;
}

Expand Down

0 comments on commit ff4a037

Please sign in to comment.