From c895e49d51b1fc2c33d45798924fac1dca2ca2bb Mon Sep 17 00:00:00 2001 From: Ricky Taylor Date: Fri, 12 Jan 2024 01:05:34 +0000 Subject: [PATCH 1/3] Remove loop from queue_meshlet_mesh_upload --- crates/bevy_pbr/src/meshlet/gpu_scene.rs | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/crates/bevy_pbr/src/meshlet/gpu_scene.rs b/crates/bevy_pbr/src/meshlet/gpu_scene.rs index 239e4094f1a36..a8d625753a980 100644 --- a/crates/bevy_pbr/src/meshlet/gpu_scene.rs +++ b/crates/bevy_pbr/src/meshlet/gpu_scene.rs @@ -745,18 +745,16 @@ impl MeshletGpuScene { .previous_thread_id_starts .entry((instance, handle.id())) .or_insert((0, true)); + let previous_thread_ids = if previous_thread_id_start.1 { + 0..(meshlets_slice.len() as u32) + } else { + let start = previous_thread_id_start.0; + start..(meshlets_slice.len() as u32 + start) + }; - for (i, meshlet_index) in meshlets_slice.into_iter().enumerate() { - self.thread_instance_ids.get_mut().push(instance_index); - self.thread_meshlet_ids.get_mut().push(meshlet_index); - self.previous_thread_ids - .get_mut() - .push(if previous_thread_id_start.1 { - 0 - } else { - previous_thread_id_start.0 + i as u32 - }); - } + self.thread_instance_ids.get_mut().extend(std::iter::repeat(instance_index).take(meshlets_slice.len())); + self.thread_meshlet_ids.get_mut().extend(meshlets_slice); + self.previous_thread_ids.get_mut().extend(previous_thread_ids); *previous_thread_id_start = (current_thread_id_start, true); } From 20083fa5b23287d00a4be8b58d5fa1540e11e46f Mon Sep 17 00:00:00 2001 From: Ricky Taylor Date: Fri, 12 Jan 2024 01:44:49 +0000 Subject: [PATCH 2/3] Parallelise meshlet buffer writes --- crates/bevy_pbr/Cargo.toml | 1 + crates/bevy_pbr/src/meshlet/gpu_scene.rs | 45 ++++++++++++++++-------- 2 files changed, 31 insertions(+), 15 deletions(-) diff --git a/crates/bevy_pbr/Cargo.toml b/crates/bevy_pbr/Cargo.toml index f0ede5e2c26ce..b1557b20d59df 100644 --- a/crates/bevy_pbr/Cargo.toml +++ b/crates/bevy_pbr/Cargo.toml @@ -26,6 +26,7 @@ bevy_reflect = { path = "../bevy_reflect", version = "0.12.0", features = [ "bevy", ] } bevy_render = { path = "../bevy_render", version = "0.12.0" } +bevy_tasks = { path = "../bevy_tasks", version = "0.12.0" } bevy_transform = { path = "../bevy_transform", version = "0.12.0" } bevy_utils = { path = "../bevy_utils", version = "0.12.0" } bevy_window = { path = "../bevy_window", version = "0.12.0" } diff --git a/crates/bevy_pbr/src/meshlet/gpu_scene.rs b/crates/bevy_pbr/src/meshlet/gpu_scene.rs index a8d625753a980..19f02bc797b3e 100644 --- a/crates/bevy_pbr/src/meshlet/gpu_scene.rs +++ b/crates/bevy_pbr/src/meshlet/gpu_scene.rs @@ -20,6 +20,7 @@ use bevy_render::{ MainWorld, }; use bevy_transform::components::GlobalTransform; +use bevy_tasks::ComputeTaskPool; use bevy_utils::{default, EntityHashMap, HashMap, HashSet}; use std::{ ops::{DerefMut, Range}, @@ -144,21 +145,35 @@ pub fn prepare_meshlet_per_frame_resources( return; } - gpu_scene - .instance_uniforms - .write_buffer(&render_device, &render_queue); - gpu_scene - .instance_material_ids - .write_buffer(&render_device, &render_queue); - gpu_scene - .thread_instance_ids - .write_buffer(&render_device, &render_queue); - gpu_scene - .thread_meshlet_ids - .write_buffer(&render_device, &render_queue); - gpu_scene - .previous_thread_ids - .write_buffer(&render_device, &render_queue); + let gpu_scene = gpu_scene.as_mut(); + ComputeTaskPool::get() + .scope(|scope| { + scope.spawn(async { + gpu_scene + .instance_uniforms + .write_buffer(&render_device, &render_queue) + }); + scope.spawn(async { + gpu_scene + .instance_material_ids + .write_buffer(&render_device, &render_queue) + }); + scope.spawn(async { + gpu_scene + .thread_instance_ids + .write_buffer(&render_device, &render_queue) + }); + scope.spawn(async { + gpu_scene + .thread_meshlet_ids + .write_buffer(&render_device, &render_queue) + }); + scope.spawn(async { + gpu_scene + .previous_thread_ids + .write_buffer(&render_device, &render_queue) + }); + }); let needed_buffer_size = 4 * gpu_scene.scene_index_count; let visibility_buffer_draw_index_buffer = From 2283864e9a0df16988cea8f24da7c2074e13c440 Mon Sep 17 00:00:00 2001 From: Ricky Taylor Date: Fri, 12 Jan 2024 02:18:57 +0000 Subject: [PATCH 3/3] Skip some copies during meshlet upload --- crates/bevy_pbr/Cargo.toml | 1 - crates/bevy_pbr/src/meshlet/gpu_scene.rs | 56 ++++++++++++------------ 2 files changed, 27 insertions(+), 30 deletions(-) diff --git a/crates/bevy_pbr/Cargo.toml b/crates/bevy_pbr/Cargo.toml index b1557b20d59df..f0ede5e2c26ce 100644 --- a/crates/bevy_pbr/Cargo.toml +++ b/crates/bevy_pbr/Cargo.toml @@ -26,7 +26,6 @@ bevy_reflect = { path = "../bevy_reflect", version = "0.12.0", features = [ "bevy", ] } bevy_render = { path = "../bevy_render", version = "0.12.0" } -bevy_tasks = { path = "../bevy_tasks", version = "0.12.0" } bevy_transform = { path = "../bevy_transform", version = "0.12.0" } bevy_utils = { path = "../bevy_utils", version = "0.12.0" } bevy_window = { path = "../bevy_window", version = "0.12.0" } diff --git a/crates/bevy_pbr/src/meshlet/gpu_scene.rs b/crates/bevy_pbr/src/meshlet/gpu_scene.rs index 19f02bc797b3e..a125ba6246e78 100644 --- a/crates/bevy_pbr/src/meshlet/gpu_scene.rs +++ b/crates/bevy_pbr/src/meshlet/gpu_scene.rs @@ -20,12 +20,12 @@ use bevy_render::{ MainWorld, }; use bevy_transform::components::GlobalTransform; -use bevy_tasks::ComputeTaskPool; use bevy_utils::{default, EntityHashMap, HashMap, HashSet}; use std::{ ops::{DerefMut, Range}, sync::Arc, }; +use encase::internal::WriteInto; /// Create and queue for uploading to the GPU [`MeshUniform`] components for /// [`MeshletMesh`] entities, as well as queuing uploads for any new meshlet mesh @@ -129,6 +129,24 @@ pub fn queue_material_meshlet_meshes( } } +fn upload_storage_buffer( + buffer: &mut StorageBuffer>, + render_device: &RenderDevice, + render_queue: &RenderQueue, +) where Vec: WriteInto { + let inner = buffer.buffer(); + let capacity = inner.as_deref().map_or(0, |b| b.size()); + let size = buffer.get().size().get() as BufferAddress; + + if capacity >= size { + let inner = inner.unwrap(); + let bytes = bytemuck::cast_slice(buffer.get().as_slice()); + render_queue.write_buffer(inner, 0, bytes); + } else { + buffer.write_buffer(render_device, render_queue); + } +} + pub fn prepare_meshlet_per_frame_resources( mut gpu_scene: ResMut, views: Query<(Entity, &ExtractedView, AnyOf<(&Camera3d, &ShadowView)>)>, @@ -146,34 +164,14 @@ pub fn prepare_meshlet_per_frame_resources( } let gpu_scene = gpu_scene.as_mut(); - ComputeTaskPool::get() - .scope(|scope| { - scope.spawn(async { - gpu_scene - .instance_uniforms - .write_buffer(&render_device, &render_queue) - }); - scope.spawn(async { - gpu_scene - .instance_material_ids - .write_buffer(&render_device, &render_queue) - }); - scope.spawn(async { - gpu_scene - .thread_instance_ids - .write_buffer(&render_device, &render_queue) - }); - scope.spawn(async { - gpu_scene - .thread_meshlet_ids - .write_buffer(&render_device, &render_queue) - }); - scope.spawn(async { - gpu_scene - .previous_thread_ids - .write_buffer(&render_device, &render_queue) - }); - }); + + gpu_scene + .instance_uniforms + .write_buffer(&render_device, &render_queue); + upload_storage_buffer(&mut gpu_scene.instance_material_ids, &*render_device, &*render_queue); + upload_storage_buffer(&mut gpu_scene.thread_instance_ids, &*render_device, &*render_queue); + upload_storage_buffer(&mut gpu_scene.thread_meshlet_ids, &*render_device, &*render_queue); + upload_storage_buffer(&mut gpu_scene.previous_thread_ids, &*render_device, &*render_queue); let needed_buffer_size = 4 * gpu_scene.scene_index_count; let visibility_buffer_draw_index_buffer =