From c8b616d5e9b4e7100c93b4228d5bd5f5e09f4684 Mon Sep 17 00:00:00 2001 From: Jerome Humbert Date: Thu, 31 Oct 2024 13:39:08 +0900 Subject: [PATCH] Add `wgpu` as core dependency (#395) Move `wgpu` from a dev-dependency to a core crate dependency, to allow the direct use of its types. Clean-up various documentations, and other minor code changes. --- Cargo.toml | 4 +- src/asset.rs | 71 ++++++++++++- src/plugin.rs | 20 ++-- src/render/aligned_buffer_vec.rs | 35 +++++-- src/render/mod.rs | 168 ++++++++++++++++--------------- src/render/vfx_common.wgsl | 26 +++++ src/spawn.rs | 7 ++ 7 files changed, 232 insertions(+), 99 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 3eb08be0..fe6db485 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -55,6 +55,7 @@ bitflags = "2.3" typetag = { version = "0.2", optional = true } thiserror = "1.0" # Same versions as Bevy 0.14 (bevy_render) +wgpu = "0.20" naga = "0.20" naga_oil = { version = "0.14", default-features = false, features = ["test_shader"] } @@ -67,9 +68,6 @@ features = [ "bevy_core_pipeline", "bevy_render", "bevy_asset", "x11" ] all-features = true [dev-dependencies] -# Same versions as Bevy 0.14 (bevy_render) -wgpu = "0.20" - # For world inspector; required if "examples_world_inspector" is used. bevy-inspector-egui = "0.25" bevy_egui = { version = "0.28", default-features = false, features = [ diff --git a/src/asset.rs b/src/asset.rs index 7b5ce5e2..90621171 100644 --- a/src/asset.rs +++ b/src/asset.rs @@ -10,6 +10,7 @@ use bevy::{ use serde::{Deserialize, Serialize}; #[cfg(feature = "serde")] use thiserror::Error; +use wgpu::{BlendComponent, BlendFactor, BlendOperation, BlendState}; use crate::{ modifier::{Modifier, RenderModifier}, @@ -108,7 +109,7 @@ pub enum SimulationCondition { /// rendered during the [`Transparent2d`] render phase. /// /// [`Transparent2d`]: bevy::core_pipeline::core_2d::Transparent2d -#[derive(Debug, Default, Clone, Copy, Eq, PartialEq, Reflect, Serialize, Deserialize, Hash)] +#[derive(Debug, Default, Clone, Copy, PartialEq, Eq, Hash, Reflect, Serialize, Deserialize)] #[non_exhaustive] pub enum AlphaMode { /// Render the effect with alpha blending. @@ -199,6 +200,36 @@ pub enum AlphaMode { Mask(ExprHandle), } +impl From for BlendState { + fn from(value: AlphaMode) -> Self { + match value { + AlphaMode::Blend => BlendState::ALPHA_BLENDING, + AlphaMode::Premultiply => BlendState::PREMULTIPLIED_ALPHA_BLENDING, + AlphaMode::Add => BlendState { + color: BlendComponent { + src_factor: BlendFactor::SrcAlpha, + dst_factor: BlendFactor::One, + operation: BlendOperation::Add, + }, + alpha: BlendComponent { + src_factor: BlendFactor::Zero, + dst_factor: BlendFactor::One, + operation: BlendOperation::Add, + }, + }, + AlphaMode::Multiply => BlendState { + color: BlendComponent { + src_factor: BlendFactor::Dst, + dst_factor: BlendFactor::OneMinusSrcAlpha, + operation: BlendOperation::Add, + }, + alpha: BlendComponent::OVER, + }, + _ => BlendState::ALPHA_BLENDING, + } + } +} + /// Asset describing a visual effect. /// /// The effect can be instanciated with a [`ParticleEffect`] component, or a @@ -824,7 +855,7 @@ impl EffectAsset { /// Build the particle layout of the asset based on its modifiers. /// /// This method calculates the particle layout of the effect based on the - /// currently existing particles, and return it as a newly allocated + /// currently existing modifiers, and return it as a newly allocated /// [`ParticleLayout`] object. pub fn particle_layout(&self) -> ParticleLayout { // Build the set of unique attributes required for all modifiers @@ -1166,4 +1197,40 @@ mod tests { effect_serde.render_modifiers().count() ); } + + #[test] + fn alpha_mode_blend_state() { + assert_eq!(BlendState::ALPHA_BLENDING, AlphaMode::Blend.into()); + assert_eq!( + BlendState::PREMULTIPLIED_ALPHA_BLENDING, + AlphaMode::Premultiply.into() + ); + + let blend_state = BlendState { + color: BlendComponent { + src_factor: BlendFactor::SrcAlpha, + dst_factor: BlendFactor::One, + operation: BlendOperation::Add, + }, + alpha: BlendComponent { + src_factor: BlendFactor::Zero, + dst_factor: BlendFactor::One, + operation: BlendOperation::Add, + }, + }; + assert_eq!(blend_state, AlphaMode::Add.into()); + + let blend_state = BlendState { + color: BlendComponent { + src_factor: BlendFactor::Dst, + dst_factor: BlendFactor::OneMinusSrcAlpha, + operation: BlendOperation::Add, + }, + alpha: BlendComponent::OVER, + }; + assert_eq!(blend_state, AlphaMode::Multiply.into()); + + let expr = Module::default().lit(0.5); + assert_eq!(BlendState::ALPHA_BLENDING, AlphaMode::Mask(expr).into()); + } } diff --git a/src/plugin.rs b/src/plugin.rs index 73be958d..1df336be 100644 --- a/src/plugin.rs +++ b/src/plugin.rs @@ -23,11 +23,11 @@ use crate::{ properties::EffectProperties, render::{ extract_effect_events, extract_effects, prepare_bind_groups, prepare_effects, - prepare_resources, queue_effects, DispatchIndirectPipeline, DrawEffects, EffectAssetEvents, - EffectBindGroups, EffectCache, EffectsMeta, ExtractedEffects, GpuDispatchIndirect, - GpuParticleGroup, GpuRenderEffectMetadata, GpuRenderGroupIndirect, GpuSpawnerParams, - ParticlesInitPipeline, ParticlesRenderPipeline, ParticlesUpdatePipeline, ShaderCache, - SimParams, StorageType as _, VfxSimulateDriverNode, VfxSimulateNode, + prepare_gpu_resources, queue_effects, DispatchIndirectPipeline, DrawEffects, + EffectAssetEvents, EffectBindGroups, EffectCache, EffectsMeta, ExtractedEffects, + GpuDispatchIndirect, GpuParticleGroup, GpuRenderEffectMetadata, GpuRenderGroupIndirect, + GpuSpawnerParams, ParticlesInitPipeline, ParticlesRenderPipeline, ParticlesUpdatePipeline, + ShaderCache, SimParams, StorageType as _, VfxSimulateDriverNode, VfxSimulateNode, }, spawn::{self, Random}, tick_initializers, @@ -76,16 +76,24 @@ pub enum EffectSystems { GatherRemovedEffects, /// Prepare effect assets for the extracted effects. + /// + /// Part of Bevy's own [`RenderSet::PrepareAssets`]. PrepareEffectAssets, /// Queue the GPU commands for the extracted effects. + /// + /// Part of Bevy's own [`RenderSet::Queue`]. QueueEffects, /// Prepare GPU data for the queued effects. + /// + /// Part of Bevy's own [`RenderSet::Prepare`]. PrepareEffectGpuResources, /// Prepare the GPU bind groups once all buffers have been (re-)allocated /// and won't change this frame. + /// + /// Part of Bevy's own [`RenderSet::PrepareBindGroups`]. PrepareBindGroups, } @@ -299,7 +307,7 @@ impl Plugin for HanabiPlugin { queue_effects .in_set(EffectSystems::QueueEffects) .after(prepare_effects), - prepare_resources + prepare_gpu_resources .in_set(EffectSystems::PrepareEffectGpuResources) .after(prepare_view_uniforms), prepare_bind_groups diff --git a/src/render/aligned_buffer_vec.rs b/src/render/aligned_buffer_vec.rs index 2b584b2a..c9d00bde 100644 --- a/src/render/aligned_buffer_vec.rs +++ b/src/render/aligned_buffer_vec.rs @@ -14,12 +14,22 @@ use copyless::VecHelper; use crate::next_multiple_of; -/// Like Bevy's [`BufferVec`], but with correct item alignment. +/// Like Bevy's [`BufferVec`], but with extra per-item alignment. /// -/// This is a helper to ensure the data is properly aligned when copied to GPU, -/// depending on the device constraints and the WGSL rules. Generally the -/// alignment is one of the [`WgpuLimits`], and is also ensured to be -/// compatible with WGSL. +/// This helper ensures the individual array elements are properly aligned, +/// depending on the device constraints and the WGSL rules. In general using +/// [`BufferVec`] is enough to ensure alignment; however when some array items +/// also need to be bound individually, then each item (not only the array +/// itself) needs to be aligned to the device requirements. This is admittedly a +/// very specific case, because the device alignment might be very large (256 +/// bytes) and this causes a lot of wasted space (padding per-element, instead +/// of padding for the entire array). +/// +/// For this buffer to work correctly and items be bindable individually, the +/// alignment must come from one of the [`WgpuLimits`]. For example for a +/// storage buffer, to be able to bind the entire buffer but also any subset of +/// it (including individual elements), the extra alignment must +/// be [`WgpuLimits::min_storage_buffer_offset_alignment`]. /// /// The element type `T` needs to implement the following traits: /// - [`Pod`] to allow copy. @@ -48,7 +58,7 @@ pub struct AlignedBufferVec { label: Option, } -impl Default for AlignedBufferVec { +impl Default for AlignedBufferVec { fn default() -> Self { let item_size = std::mem::size_of::(); let aligned_size = ::SHADER_SIZE.get() as usize; @@ -65,7 +75,7 @@ impl Default for AlignedBufferVec { } } -impl AlignedBufferVec { +impl AlignedBufferVec { /// Create a new collection. /// /// `item_align` is an optional additional alignment for items in the @@ -143,6 +153,7 @@ impl AlignedBufferVec { self.values.is_empty() } + /// Append a value to the buffer. pub fn push(&mut self, value: T) -> usize { let index = self.values.len(); self.values.alloc().init(value); @@ -151,10 +162,16 @@ impl AlignedBufferVec { /// Reserve some capacity into the buffer. /// + /// If the buffer is reallocated, the old content (on the GPU) is lost, and + /// needs to be re-uploaded to the newly-created buffer. This is done with + /// [`write_buffer()`]. + /// /// # Returns /// /// `true` if the buffer was (re)allocated, or `false` if an existing buffer /// was reused which already had enough capacity. + /// + /// [`write_buffer()`]: AlignedBufferVec::write_buffer pub fn reserve(&mut self, capacity: usize, device: &RenderDevice) -> bool { if capacity > self.capacity { let size = self.aligned_size * capacity; @@ -217,7 +234,7 @@ impl AlignedBufferVec { } } -impl std::ops::Index for AlignedBufferVec { +impl std::ops::Index for AlignedBufferVec { type Output = T; fn index(&self, index: usize) -> &Self::Output { @@ -225,7 +242,7 @@ impl std::ops::Index for AlignedBufferV } } -impl std::ops::IndexMut for AlignedBufferVec { +impl std::ops::IndexMut for AlignedBufferVec { fn index_mut(&mut self, index: usize) -> &mut Self::Output { &mut self.values[index] } diff --git a/src/render/mod.rs b/src/render/mod.rs index 0b38b474..bf643fab 100644 --- a/src/render/mod.rs +++ b/src/render/mod.rs @@ -1,6 +1,7 @@ use std::{ borrow::Cow, num::{NonZero, NonZeroU32, NonZeroU64}, + ops::Deref, }; use std::{iter, marker::PhantomData}; @@ -148,7 +149,14 @@ impl Default for GpuSimParams { } impl From for GpuSimParams { + #[inline] fn from(src: SimParams) -> Self { + Self::from(&src) + } +} + +impl From<&SimParams> for GpuSimParams { + fn from(src: &SimParams) -> Self { Self { delta_time: src.delta_time, time: src.time as f32, @@ -217,7 +225,10 @@ impl GpuCompressedTransform { /// Extension trait for shader types stored in a WGSL storage buffer. pub(crate) trait StorageType { - /// Get the aligned size of this type based on the given alignment in bytes. + /// Get the aligned size, in bytes, of this type such that it aligns to the + /// given alignment, in bytes. + /// + /// This is mainly used to align GPU types to device requirements. fn aligned_size(alignment: u32) -> NonZeroU64; /// Get the WGSL padding code to append to the GPU struct to align it. @@ -237,9 +248,10 @@ impl StorageType for T { fn padding_code(alignment: u32) -> String { let aligned_size = T::aligned_size(alignment); trace!( - "Aligning {} to {} bytes as device limits requires. Aligned size: {} bytes.", - stringify!(T), + "Aligning {} to {} bytes as device limits requires. Orignal size: {} bytes. Aligned size: {} bytes.", + std::any::type_name::(), alignment, + T::min_size().get(), aligned_size ); @@ -284,7 +296,6 @@ pub(crate) struct GpuSpawnerParams { pad: [u32; 3], } -// FIXME - min_storage_buffer_offset_alignment #[repr(C)] #[derive(Debug, Clone, Copy, Pod, Zeroable, ShaderType)] pub struct GpuDispatchIndirect { @@ -345,8 +356,8 @@ pub struct GpuParticleGroup { pub indirect_index: u32, /// The capacity of this group in number of particles. pub capacity: u32, - // The index of the first particle in this effect in the particle and - // indirect buffers. + /// The index of the first particle in this effect in the particle and + /// indirect buffers. pub effect_particle_offset: u32, } @@ -542,7 +553,7 @@ pub(crate) struct ParticlesInitPipeline { render_indirect_layout: BindGroupLayout, } -#[derive(Clone, PartialEq, Eq, Hash)] +#[derive(Debug, Clone, PartialEq, Eq, Hash)] pub(crate) struct ParticleInitPipelineKey { shader: Handle, particle_layout_min_binding_size: NonZero, @@ -551,7 +562,7 @@ pub(crate) struct ParticleInitPipelineKey { } bitflags! { - #[derive(Clone, Copy, PartialEq, Eq, Hash)] + #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] pub struct ParticleInitPipelineKeyFlags: u8 { const CLONE = 0x1; const ATTRIBUTE_PREV = 0x2; @@ -746,7 +757,7 @@ impl FromWorld for ParticlesUpdatePipeline { } } -#[derive(Default, Clone, Hash, PartialEq, Eq)] +#[derive(Debug, Default, Clone, Hash, PartialEq, Eq)] pub(crate) struct ParticleUpdatePipelineKey { /// Compute shader, with snippets applied, but not preprocessed yet. shader: Handle, @@ -1156,32 +1167,6 @@ impl SpecializedRenderPipeline for ParticlesRenderPipeline { TextureFormat::bevy_default() }; - let blend_state = match key.alpha_mode { - AlphaMode::Blend => BlendState::ALPHA_BLENDING, - AlphaMode::Premultiply => BlendState::PREMULTIPLIED_ALPHA_BLENDING, - AlphaMode::Add => BlendState { - color: BlendComponent { - src_factor: BlendFactor::SrcAlpha, - dst_factor: BlendFactor::One, - operation: BlendOperation::Add, - }, - alpha: BlendComponent { - src_factor: BlendFactor::Zero, - dst_factor: BlendFactor::One, - operation: BlendOperation::Add, - }, - }, - AlphaMode::Multiply => BlendState { - color: BlendComponent { - src_factor: BlendFactor::Dst, - dst_factor: BlendFactor::OneMinusSrcAlpha, - operation: BlendOperation::Add, - }, - alpha: BlendComponent::OVER, - }, - _ => BlendState::ALPHA_BLENDING, - }; - RenderPipelineDescriptor { vertex: VertexState { shader: key.shader.clone(), @@ -1195,7 +1180,7 @@ impl SpecializedRenderPipeline for ParticlesRenderPipeline { entry_point: "fragment".into(), targets: vec![Some(ColorTargetState { format, - blend: Some(blend_state), + blend: Some(key.alpha_mode.into()), write_mask: ColorWrites::ALL, })], }), @@ -1397,7 +1382,7 @@ pub(crate) fn extract_effects( acc }); trace!( - "Found {} removed entities.", + "Found {} removed effect(s).", extracted_effects.removed_effect_entities.len() ); @@ -1405,9 +1390,9 @@ pub(crate) fn extract_effects( extracted_effects.added_effects = query .p1() .iter() - .filter_map(|(entity, effect)| { - let handle = effect.asset.clone_weak(); - let asset = effects.get(&effect.asset)?; + .filter_map(|(entity, compiled_effect)| { + let handle = compiled_effect.asset.clone_weak(); + let asset = effects.get(&compiled_effect.asset)?; let particle_layout = asset.particle_layout(); assert!( particle_layout.size() > 0, @@ -1425,7 +1410,7 @@ pub(crate) fn extract_effects( asset.capacities(), particle_layout, property_layout, - effect.layout_flags); + compiled_effect.layout_flags); Some(AddedEffect { entity, @@ -1441,13 +1426,13 @@ pub(crate) fn extract_effects( particle_layout, property_layout, group_order, - layout_flags: effect.layout_flags, + layout_flags: compiled_effect.layout_flags, handle, }) }) .collect(); - // Loop over all existing effects to update them + // Loop over all existing effects to extract them extracted_effects.effects.clear(); for ( entity, @@ -1460,7 +1445,7 @@ pub(crate) fn extract_effects( ) in query.p0().iter_mut() { // Check if shaders are configured - let effect_shaders = effect.get_configured_shaders().to_vec(); + let effect_shaders = effect.get_configured_shaders(); if effect_shaders.is_empty() { continue; } @@ -1532,7 +1517,7 @@ pub(crate) fn extract_effects( texture_layout, textures: effect.textures.clone(), alpha_mode, - effect_shaders, + effect_shaders: effect_shaders.to_vec(), #[cfg(feature = "2d")] z_sort_key_2d, }, @@ -1557,17 +1542,29 @@ struct GpuLimits { /// /// [`WgpuLimits::min_storage_buffer_offset_alignment`]: bevy::render::settings::WgpuLimits::min_storage_buffer_offset_alignment storage_buffer_align: NonZeroU32, + /// Size of [`GpuDispatchIndirect`] aligned to the contraint of /// [`WgpuLimits::min_storage_buffer_offset_alignment`]. /// /// [`WgpuLimits::min_storage_buffer_offset_alignment`]: bevy::render::settings::WgpuLimits::min_storage_buffer_offset_alignment dispatch_indirect_aligned_size: NonZeroU32, + + /// Size of [`GpuRenderEffectMetadata`] aligned to the contraint of + /// [`WgpuLimits::min_storage_buffer_offset_alignment`]. + /// + /// [`WgpuLimits::min_storage_buffer_offset_alignment`]: bevy::render::settings::WgpuLimits::min_storage_buffer_offset_alignment render_effect_indirect_aligned_size: NonZeroU32, - /// Size of [`GpuRenderIndirect`] aligned to the contraint of + + /// Size of [`GpuRenderGroupIndirect`] aligned to the contraint of /// [`WgpuLimits::min_storage_buffer_offset_alignment`]. /// /// [`WgpuLimits::min_storage_buffer_offset_alignment`]: bevy::render::settings::WgpuLimits::min_storage_buffer_offset_alignment render_group_indirect_aligned_size: NonZeroU32, + + /// Size of [`GpuParticleGroup`] aligned to the contraint of + /// [`WgpuLimits::min_storage_buffer_offset_alignment`]. + /// + /// [`WgpuLimits::min_storage_buffer_offset_alignment`]: bevy::render::settings::WgpuLimits::min_storage_buffer_offset_alignment particle_group_aligned_size: NonZeroU32, } @@ -1627,23 +1624,27 @@ impl GpuLimits { self.dispatch_indirect_aligned_size.get() * buffer_index } - /// Byte alignment for [`GpuRenderEffectMetadata`]. + /// Byte offset of the [`GpuRenderEffectMetadata`] of a given buffer. pub fn render_effect_indirect_offset(&self, buffer_index: u32) -> u64 { self.render_effect_indirect_aligned_size.get() as u64 * buffer_index as u64 } + + /// Byte alignment for [`GpuRenderEffectMetadata`]. pub fn render_effect_indirect_size(&self) -> NonZeroU64 { NonZeroU64::new(self.render_effect_indirect_aligned_size.get() as u64).unwrap() } - /// Byte alignment for [`GpuRenderGroupIndirect`]. + /// Byte offset for the [`GpuRenderGroupIndirect`] of a given buffer. pub fn render_group_indirect_offset(&self, buffer_index: u32) -> u64 { self.render_group_indirect_aligned_size.get() as u64 * buffer_index as u64 } + + /// Byte alignment for [`GpuRenderGroupIndirect`]. pub fn render_group_indirect_size(&self) -> NonZeroU64 { NonZeroU64::new(self.render_group_indirect_aligned_size.get() as u64).unwrap() } - /// Byte alignment for [`GpuParticleGroup`]. + /// Byte offset for the [`GpuParticleGroup`] of a given buffer. pub fn particle_group_offset(&self, buffer_index: u32) -> u32 { self.particle_group_aligned_size.get() * buffer_index } @@ -1662,8 +1663,9 @@ struct CacheEntry { /// effects. #[derive(Resource)] pub struct EffectsMeta { - /// Map from an entity with a [`ParticleEffect`] component attached to it, - /// to the associated effect slice allocated in the [`EffectCache`]. + /// Map from an entity of the main world with a [`ParticleEffect`] component + /// attached to it, to the associated effect slice allocated in the + /// [`EffectCache`]. /// /// [`ParticleEffect`]: crate::ParticleEffect entity_map: HashMap, @@ -1682,12 +1684,17 @@ pub struct EffectsMeta { /// Bind group #3 of the vfx_init shader, containing the indirect render /// buffer. init_render_indirect_bind_group: Option, - + /// Global shared GPU uniform buffer storing the simulation parameters, + /// uploaded each frame from CPU to GPU. sim_params_uniforms: UniformBuffer, + /// Global shared GPU buffer storing the various spawner parameter structs + /// for the active effect instances. spawner_buffer: AlignedBufferVec, + /// Global shared GPU buffer storing the various indirect dispatch structs + /// for the indirect dispatch of the Update pass. dispatch_indirect_buffer: BufferTable, - /// Stores the GPU `RenderEffectMetadata` structures, which describe mutable - /// data relating to the entire effect. + /// Global shared GPU buffer storing the various `RenderEffectMetadata` + /// structs for the active effect instances. render_effect_dispatch_buffer: BufferTable, /// Stores the GPU `RenderGroupIndirect` structures, which describe mutable /// data specific to a particle group. @@ -1892,6 +1899,8 @@ impl EffectsMeta { trail_dispatch_buffer_indices, }; + // Insert the effect into the cache. This will allocate all the necessary GPU + // resources as needed. let cache_id = effect_cache.insert( added_effect.handle, added_effect @@ -2006,8 +2015,6 @@ pub(crate) fn prepare_effects( update_pipeline: Res, mut specialized_init_pipelines: ResMut>, mut specialized_update_pipelines: ResMut>, - // update_pipeline: Res, // TODO move update_pipeline.pipeline to - // EffectsMeta mut effects_meta: ResMut, mut effect_cache: ResMut, mut extracted_effects: ResMut, @@ -2103,8 +2110,8 @@ pub(crate) fn prepare_effects( // information, and the proper ordering implementation. // effect_entity_list.sort_by_key(|a| a.effect_slice.clone()); - // Loop on all extracted effects in order and try to batch them together to - // reduce draw calls + // Loop on all extracted effects in order, and try to batch them together to + // reduce draw calls. effects_meta.spawner_buffer.clear(); effects_meta.particle_group_buffer.clear(); let mut total_group_count = 0; @@ -2346,12 +2353,9 @@ pub(crate) fn prepare_effects( // Update simulation parameters effects_meta .sim_params_uniforms - .set(GpuSimParams::default()); + .set(sim_params.deref().into()); { let gpu_sim_params = effects_meta.sim_params_uniforms.get_mut(); - let sim_params = *sim_params; - *gpu_sim_params = sim_params.into(); - gpu_sim_params.num_groups = total_group_count; trace!( @@ -2366,7 +2370,6 @@ pub(crate) fn prepare_effects( gpu_sim_params.num_groups, ); } - // FIXME - There's no simple way to tell if write_buffer() reallocates... let prev_buffer_id = effects_meta.sim_params_uniforms.buffer().map(|b| b.id()); effects_meta .sim_params_uniforms @@ -2377,7 +2380,11 @@ pub(crate) fn prepare_effects( } } -/// The per-buffer bind group for the GPU particle buffer. +/// Per-buffer bind groups for a GPU effect buffer. +/// +/// This contains all bind groups specific to a single [`EffectBuffer`]. +/// +/// [`EffectBuffer`]: crate::render::effect_cache::EffectBuffer pub(crate) struct BufferBindGroups { /// Bind group for the render graphic shader. /// @@ -2409,14 +2416,15 @@ impl Material { .iter() .enumerate() .flat_map(|(index, id)| { + let base_binding = index as u32 * 2; if let Some(gpu_image) = gpu_images.get(*id) { vec![ BindGroupEntry { - binding: index as u32 * 2, + binding: base_binding, resource: BindingResource::TextureView(&gpu_image.texture_view), }, BindGroupEntry { - binding: index as u32 * 2 + 1, + binding: base_binding + 1, resource: BindingResource::Sampler(&gpu_image.sampler), }, ] @@ -2982,17 +2990,18 @@ pub(crate) fn queue_effects( /// Prepare GPU resources for effect rendering. /// -/// This system runs in the [`Prepare`] render set, after Bevy has updated the -/// [`ViewUniforms`], which need to be referenced to get access to the current -/// camera view. -pub(crate) fn prepare_resources( +/// This system runs in the [`RenderSet::Prepare`] render set, after Bevy has +/// updated the [`ViewUniforms`], which need to be referenced to get access to +/// the current camera view. +pub(crate) fn prepare_gpu_resources( mut effects_meta: ResMut, render_device: Res, view_uniforms: Res, render_pipeline: Res, ) { // Get the binding for the ViewUniform, the uniform data structure containing - // the Camera data for the current view. + // the Camera data for the current view. If not available, we cannot render + // anything. let Some(view_binding) = view_uniforms.uniforms.binding() else { return; }; @@ -3198,8 +3207,8 @@ pub(crate) fn prepare_bind_groups( return; }; - // Create the per-effect render bind groups - trace!("Create per-effect render bind groups..."); + // Create the per-buffer bind groups + trace!("Create per-buffer bind groups..."); for (buffer_index, buffer) in effect_cache.buffers().iter().enumerate() { #[cfg(feature = "trace")] let _span_buffer = bevy::utils::tracing::info_span!("create_buffer_bind_groups").entered(); @@ -3212,9 +3221,9 @@ pub(crate) fn prepare_bind_groups( continue; }; - // Ensure all effect groups have a bind group for the entire buffer of the - // group, since the update phase runs on an entire group/buffer at once, - // with all the effect instances in it batched together. + // Ensure all effects in this batch have a bind group for the entire buffer of + // the group, since the update phase runs on an entire group/buffer at + // once, with all the effect instances in it batched together. trace!("effect particle buffer_index=#{}", buffer_index); effect_bind_groups .particle_buffers @@ -3260,7 +3269,7 @@ pub(crate) fn prepare_bind_groups( } trace!("Creating render bind group with {} entries (layout flags: {:?})", entries.len(), buffer.layout_flags()); let render = render_device.create_bind_group( - &format!("hanabi:bind_group_render_vfx{buffer_index}_particles")[..], + &format!("hanabi:bind_group:render_vfx{buffer_index}_particles")[..], buffer.particle_layout_bind_group_with_dispatch(), &entries, ); @@ -3886,7 +3895,7 @@ impl Node for VfxSimulateNode { // let mut total_group_count = 0; { { - trace!("loop over effect batches..."); + trace!("init: loop over effect batches..."); // Dispatch init compute jobs for (entity, batches) in self.effect_query.iter_manual(world) { @@ -4165,7 +4174,8 @@ impl Node for VfxSimulateNode { && effects_meta.dr_indirect_bind_group.is_some() && effects_meta.sim_params_bind_group.is_some() { - // Only if there's an effect + // Only start a compute pass if there's an effect; makes things clearer in + // debugger. let mut compute_pass = render_context .command_encoder() diff --git a/src/render/vfx_common.wgsl b/src/render/vfx_common.wgsl index 37357dc0..c4bb56e4 100644 --- a/src/render/vfx_common.wgsl +++ b/src/render/vfx_common.wgsl @@ -18,9 +18,16 @@ struct SimParams { } struct Spawner { + // Compressed transform of the emitter. transform: mat3x4, // transposed (row-major) + /// Inverse compressed transform of the emitter. inverse_transform: mat3x4, // transposed (row-major) + /// Number of particles to spawn this frame, as calculated by the CPU Spawner. + /// + /// This is only used if the effect is not a child effect (driven by GPU events). spawn: i32, + /// PRNG seed for this effect instance. Currently this can change each time the + /// effect is recompiled, and cannot be set deterministically (TODO). seed: u32, // Can't use storage with atomics #ifdef SPAWNER_READONLY @@ -28,6 +35,10 @@ struct Spawner { #else count: atomic, #endif + /// Global index of the effect in the various shared buffers. + /// + /// This is a globally unique index for all active effect instances, used to index + /// global buffers like the spawner buffer or the render indirect dispatch buffer. effect_index: u32, // The lifetime to initialize particles with. This is only used for cloners // (i.e. trails or ribbons). @@ -70,8 +81,11 @@ const DI_OFFSET_PONG: u32 = 3u; /// Dispatch indirect parameters for GPU driven update compute. struct DispatchIndirect { + /// Number of workgroups. This is derived from the number of particles to update. x: u32, + /// Unused; always 1. y: u32, + /// Unused; always 1. z: u32, /// Index of the ping-pong buffer of particle indices to read particles from /// during rendering. Cached from RenderIndirect::ping after it's swapped @@ -79,6 +93,10 @@ struct DispatchIndirect { /// as an indirect draw source so cannot also be bound as regular storage /// buffer for reading. pong: u32, + /// Padding for storage buffer alignment. This struct is sometimes bound as part + /// of an array, or sometimes individually as a single unit. In the later case, + /// we need it to be aligned to the GPU limits of the device. That limit is only + /// known at runtime when initializing the WebGPU device. {{DISPATCH_INDIRECT_PADDING}} } @@ -127,7 +145,15 @@ struct RenderGroupIndirect { /// Number of dead particles, decremented during the init pass as new particles /// are spawned, and incremented during the update pass as existing particles die. dead_count: atomic, + /// Maxmimum number of init threads to run on next frame. This is cached from + /// `dead_count` during the indirect dispatch of the previous frame, so that the + /// init compute pass can cap its thread count while also decrementing the actual + /// `dead_count` as particles are spawned. max_spawn: atomic, + /// Padding for storage buffer alignment. This struct is sometimes bound as part + /// of an array, or sometimes individually as a single unit. In the later case, + /// we need it to be aligned to the GPU limits of the device. That limit is only + /// known at runtime when initializing the WebGPU device. {{RENDER_GROUP_INDIRECT_PADDING}} } diff --git a/src/spawn.rs b/src/spawn.rs index ac18d933..10b52d70 100644 --- a/src/spawn.rs +++ b/src/spawn.rs @@ -1085,6 +1085,7 @@ mod test { fn test_once() { let rng = &mut new_rng(); let spawner = Spawner::once(5.0.into(), true); + assert!(spawner.is_once()); let mut spawner = make_effect_spawner(spawner); let count = spawner.tick(0.001, rng); assert_eq!(count, 5); @@ -1096,6 +1097,7 @@ mod test { fn test_once_reset() { let rng = &mut new_rng(); let spawner = Spawner::once(5.0.into(), true); + assert!(spawner.is_once()); let mut spawner = make_effect_spawner(spawner); spawner.tick(1.0, rng); spawner.reset(); @@ -1107,6 +1109,7 @@ mod test { fn test_once_not_immediate() { let rng = &mut new_rng(); let spawner = Spawner::once(5.0.into(), false); + assert!(spawner.is_once()); let mut spawner = make_effect_spawner(spawner); let count = spawner.tick(1.0, rng); assert_eq!(count, 0); @@ -1119,6 +1122,7 @@ mod test { fn test_rate() { let rng = &mut new_rng(); let spawner = Spawner::rate(5.0.into()); + assert!(!spawner.is_once()); let mut spawner = make_effect_spawner(spawner); // Slightly over 1.0 to avoid edge case let count = spawner.tick(1.01, rng); @@ -1131,6 +1135,7 @@ mod test { fn test_rate_active() { let rng = &mut new_rng(); let spawner = Spawner::rate(5.0.into()); + assert!(!spawner.is_once()); let mut spawner = make_effect_spawner(spawner); spawner.tick(1.01, rng); spawner.set_active(false); @@ -1147,6 +1152,7 @@ mod test { fn test_rate_accumulate() { let rng = &mut new_rng(); let spawner = Spawner::rate(5.0.into()); + assert!(!spawner.is_once()); let mut spawner = make_effect_spawner(spawner); // 13 ticks instead of 12 to avoid edge case let count = (0..13).map(|_| spawner.tick(1.0 / 60.0, rng)).sum::(); @@ -1157,6 +1163,7 @@ mod test { fn test_burst() { let rng = &mut new_rng(); let spawner = Spawner::burst(5.0.into(), 2.0.into()); + assert!(!spawner.is_once()); let mut spawner = make_effect_spawner(spawner); let count = spawner.tick(1.0, rng); assert_eq!(count, 5);