From c8b616d5e9b4e7100c93b4228d5bd5f5e09f4684 Mon Sep 17 00:00:00 2001
From: Jerome Humbert <djeedai@gmail.com>
Date: Thu, 31 Oct 2024 13:39:08 +0900
Subject: [PATCH] Add `wgpu` as core dependency (#395)

Move `wgpu` from a dev-dependency to a core crate dependency, to allow the
direct use of its types.

Clean-up various documentations, and other minor code changes.
---
 Cargo.toml                       |   4 +-
 src/asset.rs                     |  71 ++++++++++++-
 src/plugin.rs                    |  20 ++--
 src/render/aligned_buffer_vec.rs |  35 +++++--
 src/render/mod.rs                | 168 ++++++++++++++++---------------
 src/render/vfx_common.wgsl       |  26 +++++
 src/spawn.rs                     |   7 ++
 7 files changed, 232 insertions(+), 99 deletions(-)
diff --git a/Cargo.toml b/Cargo.toml
index 3eb08be0..fe6db485 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -55,6 +55,7 @@ bitflags = "2.3"
 typetag = { version = "0.2", optional = true }
 thiserror = "1.0"
 # Same versions as Bevy 0.14 (bevy_render)
+wgpu = "0.20"
 naga = "0.20"
 naga_oil = { version = "0.14", default-features = false, features = ["test_shader"] }
 
@@ -67,9 +68,6 @@ features = [ "bevy_core_pipeline", "bevy_render", "bevy_asset", "x11" ]
 all-features = true
 
 [dev-dependencies]
-# Same versions as Bevy 0.14 (bevy_render)
-wgpu = "0.20"
-
 # For world inspector; required if "examples_world_inspector" is used.
 bevy-inspector-egui = "0.25"
 bevy_egui = { version = "0.28", default-features = false, features = [
diff --git a/src/asset.rs b/src/asset.rs
index 7b5ce5e2..90621171 100644
--- a/src/asset.rs
+++ b/src/asset.rs
@@ -10,6 +10,7 @@ use bevy::{
 use serde::{Deserialize, Serialize};
 #[cfg(feature = "serde")]
 use thiserror::Error;
+use wgpu::{BlendComponent, BlendFactor, BlendOperation, BlendState};
 
 use crate::{
     modifier::{Modifier, RenderModifier},
@@ -108,7 +109,7 @@ pub enum SimulationCondition {
 /// rendered during the [`Transparent2d`] render phase.
 ///
 /// [`Transparent2d`]: bevy::core_pipeline::core_2d::Transparent2d
-#[derive(Debug, Default, Clone, Copy, Eq, PartialEq, Reflect, Serialize, Deserialize, Hash)]
+#[derive(Debug, Default, Clone, Copy, PartialEq, Eq, Hash, Reflect, Serialize, Deserialize)]
 #[non_exhaustive]
 pub enum AlphaMode {
     /// Render the effect with alpha blending.
@@ -199,6 +200,36 @@ pub enum AlphaMode {
     Mask(ExprHandle),
 }
 
+impl From<AlphaMode> for BlendState {
+    fn from(value: AlphaMode) -> Self {
+        match value {
+            AlphaMode::Blend => BlendState::ALPHA_BLENDING,
+            AlphaMode::Premultiply => BlendState::PREMULTIPLIED_ALPHA_BLENDING,
+            AlphaMode::Add => BlendState {
+                color: BlendComponent {
+                    src_factor: BlendFactor::SrcAlpha,
+                    dst_factor: BlendFactor::One,
+                    operation: BlendOperation::Add,
+                },
+                alpha: BlendComponent {
+                    src_factor: BlendFactor::Zero,
+                    dst_factor: BlendFactor::One,
+                    operation: BlendOperation::Add,
+                },
+            },
+            AlphaMode::Multiply => BlendState {
+                color: BlendComponent {
+                    src_factor: BlendFactor::Dst,
+                    dst_factor: BlendFactor::OneMinusSrcAlpha,
+                    operation: BlendOperation::Add,
+                },
+                alpha: BlendComponent::OVER,
+            },
+            _ => BlendState::ALPHA_BLENDING,
+        }
+    }
+}
+
 /// Asset describing a visual effect.
 ///
 /// The effect can be instanciated with a [`ParticleEffect`] component, or a
@@ -824,7 +855,7 @@ impl EffectAsset {
     /// Build the particle layout of the asset based on its modifiers.
     ///
     /// This method calculates the particle layout of the effect based on the
-    /// currently existing particles, and return it as a newly allocated
+    /// currently existing modifiers, and return it as a newly allocated
     /// [`ParticleLayout`] object.
     pub fn particle_layout(&self) -> ParticleLayout {
         // Build the set of unique attributes required for all modifiers
@@ -1166,4 +1197,40 @@ mod tests {
             effect_serde.render_modifiers().count()
         );
     }
+
+    #[test]
+    fn alpha_mode_blend_state() {
+        assert_eq!(BlendState::ALPHA_BLENDING, AlphaMode::Blend.into());
+        assert_eq!(
+            BlendState::PREMULTIPLIED_ALPHA_BLENDING,
+            AlphaMode::Premultiply.into()
+        );
+
+        let blend_state = BlendState {
+            color: BlendComponent {
+                src_factor: BlendFactor::SrcAlpha,
+                dst_factor: BlendFactor::One,
+                operation: BlendOperation::Add,
+            },
+            alpha: BlendComponent {
+                src_factor: BlendFactor::Zero,
+                dst_factor: BlendFactor::One,
+                operation: BlendOperation::Add,
+            },
+        };
+        assert_eq!(blend_state, AlphaMode::Add.into());
+
+        let blend_state = BlendState {
+            color: BlendComponent {
+                src_factor: BlendFactor::Dst,
+                dst_factor: BlendFactor::OneMinusSrcAlpha,
+                operation: BlendOperation::Add,
+            },
+            alpha: BlendComponent::OVER,
+        };
+        assert_eq!(blend_state, AlphaMode::Multiply.into());
+
+        let expr = Module::default().lit(0.5);
+        assert_eq!(BlendState::ALPHA_BLENDING, AlphaMode::Mask(expr).into());
+    }
 }
diff --git a/src/plugin.rs b/src/plugin.rs
index 73be958d..1df336be 100644
--- a/src/plugin.rs
+++ b/src/plugin.rs
@@ -23,11 +23,11 @@ use crate::{
     properties::EffectProperties,
     render::{
         extract_effect_events, extract_effects, prepare_bind_groups, prepare_effects,
-        prepare_resources, queue_effects, DispatchIndirectPipeline, DrawEffects, EffectAssetEvents,
-        EffectBindGroups, EffectCache, EffectsMeta, ExtractedEffects, GpuDispatchIndirect,
-        GpuParticleGroup, GpuRenderEffectMetadata, GpuRenderGroupIndirect, GpuSpawnerParams,
-        ParticlesInitPipeline, ParticlesRenderPipeline, ParticlesUpdatePipeline, ShaderCache,
-        SimParams, StorageType as _, VfxSimulateDriverNode, VfxSimulateNode,
+        prepare_gpu_resources, queue_effects, DispatchIndirectPipeline, DrawEffects,
+        EffectAssetEvents, EffectBindGroups, EffectCache, EffectsMeta, ExtractedEffects,
+        GpuDispatchIndirect, GpuParticleGroup, GpuRenderEffectMetadata, GpuRenderGroupIndirect,
+        GpuSpawnerParams, ParticlesInitPipeline, ParticlesRenderPipeline, ParticlesUpdatePipeline,
+        ShaderCache, SimParams, StorageType as _, VfxSimulateDriverNode, VfxSimulateNode,
     },
     spawn::{self, Random},
     tick_initializers,
@@ -76,16 +76,24 @@ pub enum EffectSystems {
     GatherRemovedEffects,
 
     /// Prepare effect assets for the extracted effects.
+    ///
+    /// Part of Bevy's own [`RenderSet::PrepareAssets`].
     PrepareEffectAssets,
 
     /// Queue the GPU commands for the extracted effects.
+    ///
+    /// Part of Bevy's own [`RenderSet::Queue`].
     QueueEffects,
 
     /// Prepare GPU data for the queued effects.
+    ///
+    /// Part of Bevy's own [`RenderSet::Prepare`].
     PrepareEffectGpuResources,
 
     /// Prepare the GPU bind groups once all buffers have been (re-)allocated
     /// and won't change this frame.
+    ///
+    /// Part of Bevy's own [`RenderSet::PrepareBindGroups`].
     PrepareBindGroups,
 }
 
@@ -299,7 +307,7 @@ impl Plugin for HanabiPlugin {
                     queue_effects
                         .in_set(EffectSystems::QueueEffects)
                         .after(prepare_effects),
-                    prepare_resources
+                    prepare_gpu_resources
                         .in_set(EffectSystems::PrepareEffectGpuResources)
                         .after(prepare_view_uniforms),
                     prepare_bind_groups
diff --git a/src/render/aligned_buffer_vec.rs b/src/render/aligned_buffer_vec.rs
index 2b584b2a..c9d00bde 100644
--- a/src/render/aligned_buffer_vec.rs
+++ b/src/render/aligned_buffer_vec.rs
@@ -14,12 +14,22 @@ use copyless::VecHelper;
 
 use crate::next_multiple_of;
 
-/// Like Bevy's [`BufferVec`], but with correct item alignment.
+/// Like Bevy's [`BufferVec`], but with extra per-item alignment.
 ///
-/// This is a helper to ensure the data is properly aligned when copied to GPU,
-/// depending on the device constraints and the WGSL rules. Generally the
-/// alignment is one of the [`WgpuLimits`], and is also ensured to be
-/// compatible with WGSL.
+/// This helper ensures the individual array elements are properly aligned,
+/// depending on the device constraints and the WGSL rules. In general using
+/// [`BufferVec`] is enough to ensure alignment; however when some array items
+/// also need to be bound individually, then each item (not only the array
+/// itself) needs to be aligned to the device requirements. This is admittedly a
+/// very specific case, because the device alignment might be very large (256
+/// bytes) and this causes a lot of wasted space (padding per-element, instead
+/// of padding for the entire array).
+///
+/// For this buffer to work correctly and items be bindable individually, the
+/// alignment must come from one of the [`WgpuLimits`]. For example for a
+/// storage buffer, to be able to bind the entire buffer but also any subset of
+/// it (including individual elements), the extra alignment must
+/// be [`WgpuLimits::min_storage_buffer_offset_alignment`].
 ///
 /// The element type `T` needs to implement the following traits:
 /// - [`Pod`] to allow copy.
@@ -48,7 +58,7 @@ pub struct AlignedBufferVec<T: Pod + ShaderSize> {
     label: Option<String>,
 }
 
-impl<T: Pod + ShaderType + ShaderSize> Default for AlignedBufferVec<T> {
+impl<T: Pod + ShaderSize> Default for AlignedBufferVec<T> {
     fn default() -> Self {
         let item_size = std::mem::size_of::<T>();
         let aligned_size = <T as ShaderSize>::SHADER_SIZE.get() as usize;
@@ -65,7 +75,7 @@ impl<T: Pod + ShaderType + ShaderSize> Default for AlignedBufferVec<T> {
     }
 }
 
-impl<T: Pod + ShaderType + ShaderSize> AlignedBufferVec<T> {
+impl<T: Pod + ShaderSize> AlignedBufferVec<T> {
     /// Create a new collection.
     ///
     /// `item_align` is an optional additional alignment for items in the
@@ -143,6 +153,7 @@ impl<T: Pod + ShaderType + ShaderSize> AlignedBufferVec<T> {
         self.values.is_empty()
     }
 
+    /// Append a value to the buffer.
     pub fn push(&mut self, value: T) -> usize {
         let index = self.values.len();
         self.values.alloc().init(value);
@@ -151,10 +162,16 @@ impl<T: Pod + ShaderType + ShaderSize> AlignedBufferVec<T> {
 
     /// Reserve some capacity into the buffer.
     ///
+    /// If the buffer is reallocated, the old content (on the GPU) is lost, and
+    /// needs to be re-uploaded to the newly-created buffer. This is done with
+    /// [`write_buffer()`].
+    ///
     /// # Returns
     ///
     /// `true` if the buffer was (re)allocated, or `false` if an existing buffer
     /// was reused which already had enough capacity.
+    ///
+    /// [`write_buffer()`]: AlignedBufferVec::write_buffer
     pub fn reserve(&mut self, capacity: usize, device: &RenderDevice) -> bool {
         if capacity > self.capacity {
             let size = self.aligned_size * capacity;
@@ -217,7 +234,7 @@ impl<T: Pod + ShaderType + ShaderSize> AlignedBufferVec<T> {
     }
 }
 
-impl<T: Pod + ShaderType + ShaderSize> std::ops::Index<usize> for AlignedBufferVec<T> {
+impl<T: Pod + ShaderSize> std::ops::Index<usize> for AlignedBufferVec<T> {
     type Output = T;
 
     fn index(&self, index: usize) -> &Self::Output {
@@ -225,7 +242,7 @@ impl<T: Pod + ShaderType + ShaderSize> std::ops::Index<usize> for AlignedBufferV
     }
 }
 
-impl<T: Pod + ShaderType + ShaderSize> std::ops::IndexMut<usize> for AlignedBufferVec<T> {
+impl<T: Pod + ShaderSize> std::ops::IndexMut<usize> for AlignedBufferVec<T> {
     fn index_mut(&mut self, index: usize) -> &mut Self::Output {
         &mut self.values[index]
     }
diff --git a/src/render/mod.rs b/src/render/mod.rs
index 0b38b474..bf643fab 100644
--- a/src/render/mod.rs
+++ b/src/render/mod.rs
@@ -1,6 +1,7 @@
 use std::{
     borrow::Cow,
     num::{NonZero, NonZeroU32, NonZeroU64},
+    ops::Deref,
 };
 use std::{iter, marker::PhantomData};
 
@@ -148,7 +149,14 @@ impl Default for GpuSimParams {
 }
 
 impl From<SimParams> for GpuSimParams {
+    #[inline]
     fn from(src: SimParams) -> Self {
+        Self::from(&src)
+    }
+}
+
+impl From<&SimParams> for GpuSimParams {
+    fn from(src: &SimParams) -> Self {
         Self {
             delta_time: src.delta_time,
             time: src.time as f32,
@@ -217,7 +225,10 @@ impl GpuCompressedTransform {
 
 /// Extension trait for shader types stored in a WGSL storage buffer.
 pub(crate) trait StorageType {
-    /// Get the aligned size of this type based on the given alignment in bytes.
+    /// Get the aligned size, in bytes, of this type such that it aligns to the
+    /// given alignment, in bytes.
+    ///
+    /// This is mainly used to align GPU types to device requirements.
     fn aligned_size(alignment: u32) -> NonZeroU64;
 
     /// Get the WGSL padding code to append to the GPU struct to align it.
@@ -237,9 +248,10 @@ impl<T: ShaderType> StorageType for T {
     fn padding_code(alignment: u32) -> String {
         let aligned_size = T::aligned_size(alignment);
         trace!(
-            "Aligning {} to {} bytes as device limits requires. Aligned size: {} bytes.",
-            stringify!(T),
+            "Aligning {} to {} bytes as device limits requires. Orignal size: {} bytes. Aligned size: {} bytes.",
+            std::any::type_name::<T>(),
             alignment,
+            T::min_size().get(),
             aligned_size
         );
 
@@ -284,7 +296,6 @@ pub(crate) struct GpuSpawnerParams {
     pad: [u32; 3],
 }
 
-// FIXME - min_storage_buffer_offset_alignment
 #[repr(C)]
 #[derive(Debug, Clone, Copy, Pod, Zeroable, ShaderType)]
 pub struct GpuDispatchIndirect {
@@ -345,8 +356,8 @@ pub struct GpuParticleGroup {
     pub indirect_index: u32,
     /// The capacity of this group in number of particles.
     pub capacity: u32,
-    // The index of the first particle in this effect in the particle and
-    // indirect buffers.
+    /// The index of the first particle in this effect in the particle and
+    /// indirect buffers.
     pub effect_particle_offset: u32,
 }
 
@@ -542,7 +553,7 @@ pub(crate) struct ParticlesInitPipeline {
     render_indirect_layout: BindGroupLayout,
 }
 
-#[derive(Clone, PartialEq, Eq, Hash)]
+#[derive(Debug, Clone, PartialEq, Eq, Hash)]
 pub(crate) struct ParticleInitPipelineKey {
     shader: Handle<Shader>,
     particle_layout_min_binding_size: NonZero<u64>,
@@ -551,7 +562,7 @@ pub(crate) struct ParticleInitPipelineKey {
 }
 
 bitflags! {
-    #[derive(Clone, Copy, PartialEq, Eq, Hash)]
+    #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
     pub struct ParticleInitPipelineKeyFlags: u8 {
         const CLONE = 0x1;
         const ATTRIBUTE_PREV = 0x2;
@@ -746,7 +757,7 @@ impl FromWorld for ParticlesUpdatePipeline {
     }
 }
 
-#[derive(Default, Clone, Hash, PartialEq, Eq)]
+#[derive(Debug, Default, Clone, Hash, PartialEq, Eq)]
 pub(crate) struct ParticleUpdatePipelineKey {
     /// Compute shader, with snippets applied, but not preprocessed yet.
     shader: Handle<Shader>,
@@ -1156,32 +1167,6 @@ impl SpecializedRenderPipeline for ParticlesRenderPipeline {
             TextureFormat::bevy_default()
         };
 
-        let blend_state = match key.alpha_mode {
-            AlphaMode::Blend => BlendState::ALPHA_BLENDING,
-            AlphaMode::Premultiply => BlendState::PREMULTIPLIED_ALPHA_BLENDING,
-            AlphaMode::Add => BlendState {
-                color: BlendComponent {
-                    src_factor: BlendFactor::SrcAlpha,
-                    dst_factor: BlendFactor::One,
-                    operation: BlendOperation::Add,
-                },
-                alpha: BlendComponent {
-                    src_factor: BlendFactor::Zero,
-                    dst_factor: BlendFactor::One,
-                    operation: BlendOperation::Add,
-                },
-            },
-            AlphaMode::Multiply => BlendState {
-                color: BlendComponent {
-                    src_factor: BlendFactor::Dst,
-                    dst_factor: BlendFactor::OneMinusSrcAlpha,
-                    operation: BlendOperation::Add,
-                },
-                alpha: BlendComponent::OVER,
-            },
-            _ => BlendState::ALPHA_BLENDING,
-        };
-
         RenderPipelineDescriptor {
             vertex: VertexState {
                 shader: key.shader.clone(),
@@ -1195,7 +1180,7 @@ impl SpecializedRenderPipeline for ParticlesRenderPipeline {
                 entry_point: "fragment".into(),
                 targets: vec![Some(ColorTargetState {
                     format,
-                    blend: Some(blend_state),
+                    blend: Some(key.alpha_mode.into()),
                     write_mask: ColorWrites::ALL,
                 })],
             }),
@@ -1397,7 +1382,7 @@ pub(crate) fn extract_effects(
                 acc
             });
     trace!(
-        "Found {} removed entities.",
+        "Found {} removed effect(s).",
         extracted_effects.removed_effect_entities.len()
     );
 
@@ -1405,9 +1390,9 @@ pub(crate) fn extract_effects(
     extracted_effects.added_effects = query
         .p1()
         .iter()
-        .filter_map(|(entity, effect)| {
-            let handle = effect.asset.clone_weak();
-            let asset = effects.get(&effect.asset)?;
+        .filter_map(|(entity, compiled_effect)| {
+            let handle = compiled_effect.asset.clone_weak();
+            let asset = effects.get(&compiled_effect.asset)?;
             let particle_layout = asset.particle_layout();
             assert!(
                 particle_layout.size() > 0,
@@ -1425,7 +1410,7 @@ pub(crate) fn extract_effects(
                  asset.capacities(),
                  particle_layout,
                  property_layout,
-                 effect.layout_flags);
+                 compiled_effect.layout_flags);
 
             Some(AddedEffect {
                 entity,
@@ -1441,13 +1426,13 @@ pub(crate) fn extract_effects(
                 particle_layout,
                 property_layout,
                 group_order,
-                layout_flags: effect.layout_flags,
+                layout_flags: compiled_effect.layout_flags,
                 handle,
             })
         })
         .collect();
 
-    // Loop over all existing effects to update them
+    // Loop over all existing effects to extract them
     extracted_effects.effects.clear();
     for (
         entity,
@@ -1460,7 +1445,7 @@ pub(crate) fn extract_effects(
     ) in query.p0().iter_mut()
     {
         // Check if shaders are configured
-        let effect_shaders = effect.get_configured_shaders().to_vec();
+        let effect_shaders = effect.get_configured_shaders();
         if effect_shaders.is_empty() {
             continue;
         }
@@ -1532,7 +1517,7 @@ pub(crate) fn extract_effects(
                 texture_layout,
                 textures: effect.textures.clone(),
                 alpha_mode,
-                effect_shaders,
+                effect_shaders: effect_shaders.to_vec(),
                 #[cfg(feature = "2d")]
                 z_sort_key_2d,
             },
@@ -1557,17 +1542,29 @@ struct GpuLimits {
     ///
     /// [`WgpuLimits::min_storage_buffer_offset_alignment`]: bevy::render::settings::WgpuLimits::min_storage_buffer_offset_alignment
     storage_buffer_align: NonZeroU32,
+
     /// Size of [`GpuDispatchIndirect`] aligned to the contraint of
     /// [`WgpuLimits::min_storage_buffer_offset_alignment`].
     ///
     /// [`WgpuLimits::min_storage_buffer_offset_alignment`]: bevy::render::settings::WgpuLimits::min_storage_buffer_offset_alignment
     dispatch_indirect_aligned_size: NonZeroU32,
+
+    /// Size of [`GpuRenderEffectMetadata`] aligned to the contraint of
+    /// [`WgpuLimits::min_storage_buffer_offset_alignment`].
+    ///
+    /// [`WgpuLimits::min_storage_buffer_offset_alignment`]: bevy::render::settings::WgpuLimits::min_storage_buffer_offset_alignment
     render_effect_indirect_aligned_size: NonZeroU32,
-    /// Size of [`GpuRenderIndirect`] aligned to the contraint of
+
+    /// Size of [`GpuRenderGroupIndirect`] aligned to the contraint of
     /// [`WgpuLimits::min_storage_buffer_offset_alignment`].
     ///
     /// [`WgpuLimits::min_storage_buffer_offset_alignment`]: bevy::render::settings::WgpuLimits::min_storage_buffer_offset_alignment
     render_group_indirect_aligned_size: NonZeroU32,
+
+    /// Size of [`GpuParticleGroup`] aligned to the contraint of
+    /// [`WgpuLimits::min_storage_buffer_offset_alignment`].
+    ///
+    /// [`WgpuLimits::min_storage_buffer_offset_alignment`]: bevy::render::settings::WgpuLimits::min_storage_buffer_offset_alignment
     particle_group_aligned_size: NonZeroU32,
 }
 
@@ -1627,23 +1624,27 @@ impl GpuLimits {
         self.dispatch_indirect_aligned_size.get() * buffer_index
     }
 
-    /// Byte alignment for [`GpuRenderEffectMetadata`].
+    /// Byte offset of the [`GpuRenderEffectMetadata`] of a given buffer.
     pub fn render_effect_indirect_offset(&self, buffer_index: u32) -> u64 {
         self.render_effect_indirect_aligned_size.get() as u64 * buffer_index as u64
     }
+
+    /// Byte alignment for [`GpuRenderEffectMetadata`].
     pub fn render_effect_indirect_size(&self) -> NonZeroU64 {
         NonZeroU64::new(self.render_effect_indirect_aligned_size.get() as u64).unwrap()
     }
 
-    /// Byte alignment for [`GpuRenderGroupIndirect`].
+    /// Byte offset for the [`GpuRenderGroupIndirect`] of a given buffer.
     pub fn render_group_indirect_offset(&self, buffer_index: u32) -> u64 {
         self.render_group_indirect_aligned_size.get() as u64 * buffer_index as u64
     }
+
+    /// Byte alignment for [`GpuRenderGroupIndirect`].
     pub fn render_group_indirect_size(&self) -> NonZeroU64 {
         NonZeroU64::new(self.render_group_indirect_aligned_size.get() as u64).unwrap()
     }
 
-    /// Byte alignment for [`GpuParticleGroup`].
+    /// Byte offset for the [`GpuParticleGroup`] of a given buffer.
     pub fn particle_group_offset(&self, buffer_index: u32) -> u32 {
         self.particle_group_aligned_size.get() * buffer_index
     }
@@ -1662,8 +1663,9 @@ struct CacheEntry {
 /// effects.
 #[derive(Resource)]
 pub struct EffectsMeta {
-    /// Map from an entity with a [`ParticleEffect`] component attached to it,
-    /// to the associated effect slice allocated in the [`EffectCache`].
+    /// Map from an entity of the main world with a [`ParticleEffect`] component
+    /// attached to it, to the associated effect slice allocated in the
+    /// [`EffectCache`].
     ///
     /// [`ParticleEffect`]: crate::ParticleEffect
     entity_map: HashMap<Entity, CacheEntry>,
@@ -1682,12 +1684,17 @@ pub struct EffectsMeta {
     /// Bind group #3 of the vfx_init shader, containing the indirect render
     /// buffer.
     init_render_indirect_bind_group: Option<BindGroup>,
-
+    /// Global shared GPU uniform buffer storing the simulation parameters,
+    /// uploaded each frame from CPU to GPU.
     sim_params_uniforms: UniformBuffer<GpuSimParams>,
+    /// Global shared GPU buffer storing the various spawner parameter structs
+    /// for the active effect instances.
     spawner_buffer: AlignedBufferVec<GpuSpawnerParams>,
+    /// Global shared GPU buffer storing the various indirect dispatch structs
+    /// for the indirect dispatch of the Update pass.
     dispatch_indirect_buffer: BufferTable<GpuDispatchIndirect>,
-    /// Stores the GPU `RenderEffectMetadata` structures, which describe mutable
-    /// data relating to the entire effect.
+    /// Global shared GPU buffer storing the various `RenderEffectMetadata`
+    /// structs for the active effect instances.
     render_effect_dispatch_buffer: BufferTable<GpuRenderEffectMetadata>,
     /// Stores the GPU `RenderGroupIndirect` structures, which describe mutable
     /// data specific to a particle group.
@@ -1892,6 +1899,8 @@ impl EffectsMeta {
                 trail_dispatch_buffer_indices,
             };
 
+            // Insert the effect into the cache. This will allocate all the necessary GPU
+            // resources as needed.
             let cache_id = effect_cache.insert(
                 added_effect.handle,
                 added_effect
@@ -2006,8 +2015,6 @@ pub(crate) fn prepare_effects(
     update_pipeline: Res<ParticlesUpdatePipeline>,
     mut specialized_init_pipelines: ResMut<SpecializedComputePipelines<ParticlesInitPipeline>>,
     mut specialized_update_pipelines: ResMut<SpecializedComputePipelines<ParticlesUpdatePipeline>>,
-    // update_pipeline: Res<ParticlesUpdatePipeline>, // TODO move update_pipeline.pipeline to
-    // EffectsMeta
     mut effects_meta: ResMut<EffectsMeta>,
     mut effect_cache: ResMut<EffectCache>,
     mut extracted_effects: ResMut<ExtractedEffects>,
@@ -2103,8 +2110,8 @@ pub(crate) fn prepare_effects(
     // information, and the proper ordering implementation.
     // effect_entity_list.sort_by_key(|a| a.effect_slice.clone());
 
-    // Loop on all extracted effects in order and try to batch them together to
-    // reduce draw calls
+    // Loop on all extracted effects in order, and try to batch them together to
+    // reduce draw calls.
     effects_meta.spawner_buffer.clear();
     effects_meta.particle_group_buffer.clear();
     let mut total_group_count = 0;
@@ -2346,12 +2353,9 @@ pub(crate) fn prepare_effects(
     // Update simulation parameters
     effects_meta
         .sim_params_uniforms
-        .set(GpuSimParams::default());
+        .set(sim_params.deref().into());
     {
         let gpu_sim_params = effects_meta.sim_params_uniforms.get_mut();
-        let sim_params = *sim_params;
-        *gpu_sim_params = sim_params.into();
-
         gpu_sim_params.num_groups = total_group_count;
 
         trace!(
@@ -2366,7 +2370,6 @@ pub(crate) fn prepare_effects(
             gpu_sim_params.num_groups,
         );
     }
-    // FIXME - There's no simple way to tell if write_buffer() reallocates...
     let prev_buffer_id = effects_meta.sim_params_uniforms.buffer().map(|b| b.id());
     effects_meta
         .sim_params_uniforms
@@ -2377,7 +2380,11 @@ pub(crate) fn prepare_effects(
     }
 }
 
-/// The per-buffer bind group for the GPU particle buffer.
+/// Per-buffer bind groups for a GPU effect buffer.
+///
+/// This contains all bind groups specific to a single [`EffectBuffer`].
+///
+/// [`EffectBuffer`]: crate::render::effect_cache::EffectBuffer
 pub(crate) struct BufferBindGroups {
     /// Bind group for the render graphic shader.
     ///
@@ -2409,14 +2416,15 @@ impl Material {
             .iter()
             .enumerate()
             .flat_map(|(index, id)| {
+                let base_binding = index as u32 * 2;
                 if let Some(gpu_image) = gpu_images.get(*id) {
                     vec![
                         BindGroupEntry {
-                            binding: index as u32 * 2,
+                            binding: base_binding,
                             resource: BindingResource::TextureView(&gpu_image.texture_view),
                         },
                         BindGroupEntry {
-                            binding: index as u32 * 2 + 1,
+                            binding: base_binding + 1,
                             resource: BindingResource::Sampler(&gpu_image.sampler),
                         },
                     ]
@@ -2982,17 +2990,18 @@ pub(crate) fn queue_effects(
 
 /// Prepare GPU resources for effect rendering.
 ///
-/// This system runs in the [`Prepare`] render set, after Bevy has updated the
-/// [`ViewUniforms`], which need to be referenced to get access to the current
-/// camera view.
-pub(crate) fn prepare_resources(
+/// This system runs in the [`RenderSet::Prepare`] render set, after Bevy has
+/// updated the [`ViewUniforms`], which need to be referenced to get access to
+/// the current camera view.
+pub(crate) fn prepare_gpu_resources(
     mut effects_meta: ResMut<EffectsMeta>,
     render_device: Res<RenderDevice>,
     view_uniforms: Res<ViewUniforms>,
     render_pipeline: Res<ParticlesRenderPipeline>,
 ) {
     // Get the binding for the ViewUniform, the uniform data structure containing
-    // the Camera data for the current view.
+    // the Camera data for the current view. If not available, we cannot render
+    // anything.
     let Some(view_binding) = view_uniforms.uniforms.binding() else {
         return;
     };
@@ -3198,8 +3207,8 @@ pub(crate) fn prepare_bind_groups(
         return;
     };
 
-    // Create the per-effect render bind groups
-    trace!("Create per-effect render bind groups...");
+    // Create the per-buffer bind groups
+    trace!("Create per-buffer bind groups...");
     for (buffer_index, buffer) in effect_cache.buffers().iter().enumerate() {
         #[cfg(feature = "trace")]
         let _span_buffer = bevy::utils::tracing::info_span!("create_buffer_bind_groups").entered();
@@ -3212,9 +3221,9 @@ pub(crate) fn prepare_bind_groups(
             continue;
         };
 
-        // Ensure all effect groups have a bind group for the entire buffer of the
-        // group, since the update phase runs on an entire group/buffer at once,
-        // with all the effect instances in it batched together.
+        // Ensure all effects in this batch have a bind group for the entire buffer of
+        // the group, since the update phase runs on an entire group/buffer at
+        // once, with all the effect instances in it batched together.
         trace!("effect particle buffer_index=#{}", buffer_index);
         effect_bind_groups
             .particle_buffers
@@ -3260,7 +3269,7 @@ pub(crate) fn prepare_bind_groups(
                 }
                 trace!("Creating render bind group with {} entries (layout flags: {:?})", entries.len(), buffer.layout_flags());
                 let render = render_device.create_bind_group(
-                    &format!("hanabi:bind_group_render_vfx{buffer_index}_particles")[..],
+                    &format!("hanabi:bind_group:render_vfx{buffer_index}_particles")[..],
                      buffer.particle_layout_bind_group_with_dispatch(),
                      &entries,
                 );
@@ -3886,7 +3895,7 @@ impl Node for VfxSimulateNode {
         // let mut total_group_count = 0;
         {
             {
-                trace!("loop over effect batches...");
+                trace!("init: loop over effect batches...");
 
                 // Dispatch init compute jobs
                 for (entity, batches) in self.effect_query.iter_manual(world) {
@@ -4165,7 +4174,8 @@ impl Node for VfxSimulateNode {
             && effects_meta.dr_indirect_bind_group.is_some()
             && effects_meta.sim_params_bind_group.is_some()
         {
-            // Only if there's an effect
+            // Only start a compute pass if there's an effect; makes things clearer in
+            // debugger.
             let mut compute_pass =
                 render_context
                     .command_encoder()
diff --git a/src/render/vfx_common.wgsl b/src/render/vfx_common.wgsl
index 37357dc0..c4bb56e4 100644
--- a/src/render/vfx_common.wgsl
+++ b/src/render/vfx_common.wgsl
@@ -18,9 +18,16 @@ struct SimParams {
 }
 
 struct Spawner {
+    // Compressed transform of the emitter.
     transform: mat3x4<f32>, // transposed (row-major)
+    /// Inverse compressed transform of the emitter.
     inverse_transform: mat3x4<f32>, // transposed (row-major)
+    /// Number of particles to spawn this frame, as calculated by the CPU Spawner.
+    ///
+    /// This is only used if the effect is not a child effect (driven by GPU events).
     spawn: i32,
+    /// PRNG seed for this effect instance. Currently this can change each time the
+    /// effect is recompiled, and cannot be set deterministically (TODO).
     seed: u32,
     // Can't use storage<read> with atomics
 #ifdef SPAWNER_READONLY
@@ -28,6 +35,10 @@ struct Spawner {
 #else
     count: atomic<i32>,
 #endif
+    /// Global index of the effect in the various shared buffers.
+    ///
+    /// This is a globally unique index for all active effect instances, used to index
+    /// global buffers like the spawner buffer or the render indirect dispatch buffer.
     effect_index: u32,
     // The lifetime to initialize particles with. This is only used for cloners
     // (i.e. trails or ribbons).
@@ -70,8 +81,11 @@ const DI_OFFSET_PONG: u32 = 3u;
 
 /// Dispatch indirect parameters for GPU driven update compute.
 struct DispatchIndirect {
+    /// Number of workgroups. This is derived from the number of particles to update.
     x: u32,
+    /// Unused; always 1.
     y: u32,
+    /// Unused; always 1.
     z: u32,
     /// Index of the ping-pong buffer of particle indices to read particles from
     /// during rendering. Cached from RenderIndirect::ping after it's swapped
@@ -79,6 +93,10 @@ struct DispatchIndirect {
     /// as an indirect draw source so cannot also be bound as regular storage
     /// buffer for reading.
     pong: u32,
+    /// Padding for storage buffer alignment. This struct is sometimes bound as part
+    /// of an array, or sometimes individually as a single unit. In the later case,
+    /// we need it to be aligned to the GPU limits of the device. That limit is only
+    /// known at runtime when initializing the WebGPU device.
     {{DISPATCH_INDIRECT_PADDING}}
 }
 
@@ -127,7 +145,15 @@ struct RenderGroupIndirect {
     /// Number of dead particles, decremented during the init pass as new particles
     /// are spawned, and incremented during the update pass as existing particles die.
     dead_count: atomic<u32>,
+    /// Maxmimum number of init threads to run on next frame. This is cached from
+    /// `dead_count` during the indirect dispatch of the previous frame, so that the
+    /// init compute pass can cap its thread count while also decrementing the actual
+    /// `dead_count` as particles are spawned.
     max_spawn: atomic<u32>,
+    /// Padding for storage buffer alignment. This struct is sometimes bound as part
+    /// of an array, or sometimes individually as a single unit. In the later case,
+    /// we need it to be aligned to the GPU limits of the device. That limit is only
+    /// known at runtime when initializing the WebGPU device.
     {{RENDER_GROUP_INDIRECT_PADDING}}
 }
 
diff --git a/src/spawn.rs b/src/spawn.rs
index ac18d933..10b52d70 100644
--- a/src/spawn.rs
+++ b/src/spawn.rs
@@ -1085,6 +1085,7 @@ mod test {
     fn test_once() {
         let rng = &mut new_rng();
         let spawner = Spawner::once(5.0.into(), true);
+        assert!(spawner.is_once());
         let mut spawner = make_effect_spawner(spawner);
         let count = spawner.tick(0.001, rng);
         assert_eq!(count, 5);
@@ -1096,6 +1097,7 @@ mod test {
     fn test_once_reset() {
         let rng = &mut new_rng();
         let spawner = Spawner::once(5.0.into(), true);
+        assert!(spawner.is_once());
         let mut spawner = make_effect_spawner(spawner);
         spawner.tick(1.0, rng);
         spawner.reset();
@@ -1107,6 +1109,7 @@ mod test {
     fn test_once_not_immediate() {
         let rng = &mut new_rng();
         let spawner = Spawner::once(5.0.into(), false);
+        assert!(spawner.is_once());
         let mut spawner = make_effect_spawner(spawner);
         let count = spawner.tick(1.0, rng);
         assert_eq!(count, 0);
@@ -1119,6 +1122,7 @@ mod test {
     fn test_rate() {
         let rng = &mut new_rng();
         let spawner = Spawner::rate(5.0.into());
+        assert!(!spawner.is_once());
         let mut spawner = make_effect_spawner(spawner);
         // Slightly over 1.0 to avoid edge case
         let count = spawner.tick(1.01, rng);
@@ -1131,6 +1135,7 @@ mod test {
     fn test_rate_active() {
         let rng = &mut new_rng();
         let spawner = Spawner::rate(5.0.into());
+        assert!(!spawner.is_once());
         let mut spawner = make_effect_spawner(spawner);
         spawner.tick(1.01, rng);
         spawner.set_active(false);
@@ -1147,6 +1152,7 @@ mod test {
     fn test_rate_accumulate() {
         let rng = &mut new_rng();
         let spawner = Spawner::rate(5.0.into());
+        assert!(!spawner.is_once());
         let mut spawner = make_effect_spawner(spawner);
         // 13 ticks instead of 12 to avoid edge case
         let count = (0..13).map(|_| spawner.tick(1.0 / 60.0, rng)).sum::<u32>();
@@ -1157,6 +1163,7 @@ mod test {
     fn test_burst() {
         let rng = &mut new_rng();
         let spawner = Spawner::burst(5.0.into(), 2.0.into());
+        assert!(!spawner.is_once());
         let mut spawner = make_effect_spawner(spawner);
         let count = spawner.tick(1.0, rng);
         assert_eq!(count, 5);