Fix storage padding and re-enable buffer clean-up (#324)

Fix the padding of all storage buffers to comply with the GPU device requirements, which on most Desktop platforms is 32 bytes, but can be different (for example on macOS M1 and later, it's generally 256 bytes). This fixes a number of issues where effects are not rendering at all, or display strong artifacts. The change is likely over-doing it by padding all structures used in arrays, however this can be optimized as a later pass. Fix a bug in the `CloneModifier` where the shader code was allocating cloned particles without checking the buffer capacity, leading to underflow error and stomping over other parts of the buffer. Re-enable buffer cleaning up on effect deletion, which was commented out during the ribbon change but not fixed, and left disabled. This caused the buffers to fill, leaving no space for new effects even after some effects were deleting, which resulted in newly spawned effects not updating (code was dispatching 0 workground in update pass). Fixes #322
djeedai · May 18, 2024 · eb669f5 · eb669f5
1 parent 2da6d8d
commit eb669f5
Show file tree

Hide file tree

Showing 7 changed files with 356 additions and 96 deletions.
diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
@@ -147,19 +147,19 @@ jobs:
           for example in .github/example-run/3d/*.ron; do
             example_name=`basename $example .ron`
             echo "running $example_name - "`date`
-            time WGPU_BACKEND=dx12 CI_TESTING_CONFIG=$example cargo run --example $example_name --no-default-features --features="bevy/bevy_winit bevy/bevy_pbr 3d bevy/bevy_ci_testing"
+            time WGPU_BACKEND=dx12 CI_TESTING_CONFIG=$example cargo run --example $example_name --no-default-features --features="bevy/bevy_winit bevy/bevy_pbr bevy/bevy_ui bevy/default_font 3d bevy/bevy_ci_testing"
             sleep 10
           done
           for example in .github/example-run/3dpng/*.ron; do
             example_name=`basename $example .ron`
             echo "running $example_name - "`date`
-            time WGPU_BACKEND=dx12 CI_TESTING_CONFIG=$example cargo run --example $example_name --no-default-features --features="bevy/bevy_winit bevy/bevy_pbr bevy/png 3d bevy/bevy_ci_testing"
+            time WGPU_BACKEND=dx12 CI_TESTING_CONFIG=$example cargo run --example $example_name --no-default-features --features="bevy/bevy_winit bevy/bevy_pbr bevy/bevy_ui bevy/default_font bevy/png 3d bevy/bevy_ci_testing"
             sleep 10
           done
           for example in .github/example-run/2d/*.ron; do
             example_name=`basename $example .ron`
             echo "running $example_name - "`date`
-            time WGPU_BACKEND=dx12 CI_TESTING_CONFIG=$example cargo run --example $example_name --no-default-features --features="bevy/bevy_winit bevy/bevy_sprite 2d bevy/bevy_ci_testing"
+            time WGPU_BACKEND=dx12 CI_TESTING_CONFIG=$example cargo run --example $example_name --no-default-features --features="bevy/bevy_winit bevy/bevy_sprite bevy/bevy_ui bevy/default_font 2d bevy/bevy_ci_testing"
             sleep 10
           done
         env:
@@ -234,7 +234,7 @@ jobs:
       #   if: runner.os == 'linux'
       - name: Install cargo-tarpaulin
         run: |
-          RUST_BACKTRACE=1 cargo install --version 0.22.0 cargo-tarpaulin
+          RUST_BACKTRACE=1 cargo install --version 0.30.0 cargo-tarpaulin
       - name: Generate code coverage
         run: |
           RUST_BACKTRACE=1 cargo tarpaulin --engine llvm --verbose --timeout 120 --out Lcov --workspace --all-features

diff --git a/src/modifier/clone.rs b/src/modifier/clone.rs
@@ -100,6 +100,12 @@ impl Modifier for CloneModifier {
 
                     // Recycle a dead particle.
                     let dead_index = atomicSub(&render_group_indirect[{dest}u].dead_count, 1u) - 1u;
+                    // HACK - we have no limiter for dead_count, so could go negative (wrap around).
+                    // Assume that any value above 2^31 is a wrap around, undo the atomic op and return.
+                    if (dead_index >= 0xF0000000) {{
+                        atomicAdd(&render_group_indirect[{dest}u].dead_count, 1u);
+                        return;
+                    }}
                     let new_index = indirect_buffer.indices[3u * (base_index + dead_index) + 2u];
 
                     // Initialize the new particle.

diff --git a/src/plugin.rs b/src/plugin.rs
@@ -22,7 +22,8 @@ use crate::{
     render::{
         extract_effect_events, extract_effects, prepare_bind_groups, prepare_effects,
         prepare_resources, queue_effects, DispatchIndirectPipeline, DrawEffects, EffectAssetEvents,
-        EffectBindGroups, EffectCache, EffectsMeta, ExtractedEffects, GpuSpawnerParams,
+        EffectBindGroups, EffectCache, EffectsMeta, ExtractedEffects, GpuDispatchIndirect,
+        GpuParticleGroup, GpuRenderEffectMetadata, GpuRenderGroupIndirect, GpuSpawnerParams,
         ParticlesInitPipeline, ParticlesRenderPipeline, ParticlesUpdatePipeline, ShaderCache,
         SimParams, VfxSimulateDriverNode, VfxSimulateNode,
     },
@@ -129,8 +130,29 @@ impl HanabiPlugin {
     pub(crate) fn make_common_shader(min_storage_buffer_offset_alignment: usize) -> Shader {
         let spawner_padding_code =
             GpuSpawnerParams::padding_code(min_storage_buffer_offset_alignment);
+        let dispatch_indirect_padding_code =
+            GpuDispatchIndirect::padding_code(min_storage_buffer_offset_alignment);
+        let render_effect_indirect_padding_code =
+            GpuRenderEffectMetadata::padding_code(min_storage_buffer_offset_alignment);
+        let render_group_indirect_padding_code =
+            GpuRenderGroupIndirect::padding_code(min_storage_buffer_offset_alignment);
+        let particle_group_padding_code =
+            GpuParticleGroup::padding_code(min_storage_buffer_offset_alignment);
         let common_code = include_str!("render/vfx_common.wgsl")
-            .replace("{{SPAWNER_PADDING}}", &spawner_padding_code);
+            .replace("{{SPAWNER_PADDING}}", &spawner_padding_code)
+            .replace(
+                "{{DISPATCH_INDIRECT_PADDING}}",
+                &dispatch_indirect_padding_code,
+            )
+            .replace(
+                "{{RENDER_EFFECT_INDIRECT_PADDING}}",
+                &render_effect_indirect_padding_code,
+            )
+            .replace(
+                "{{RENDER_GROUP_INDIRECT_PADDING}}",
+                &render_group_indirect_padding_code,
+            )
+            .replace("{{PARTICLE_GROUP_PADDING}}", &particle_group_padding_code);
         Shader::from_wgsl(
             common_code,
             std::path::Path::new(file!())

diff --git a/src/render/buffer_table.rs b/src/render/buffer_table.rs
@@ -167,7 +167,8 @@ impl<T: Pod + ShaderSize> BufferTable<T> {
             item_size
         };
         trace!(
-            "BufferTable: item_size={} aligned_size={}",
+            "BufferTable[\"{}\"]: item_size={} aligned_size={}",
+            label.as_ref().unwrap_or(&String::new()),
             item_size,
             aligned_size
         );

diff --git a/src/render/effect_cache.rs b/src/render/effect_cache.rs
@@ -57,12 +57,12 @@ impl PartialOrd for EffectSlices {
 /// Describes all particle groups' slices of particles in the particle buffer
 /// for a single effect, as well as the [`DispatchBufferIndices`].
 pub struct SlicesRef {
-    ranges: Vec<u32>,
+    pub ranges: Vec<u32>,
     /// Size of a single item in the slice. Currently equal to the unique size
     /// of all items in an [`EffectBuffer`] (no mixed size supported in same
     /// buffer), so cached only for convenience.
     particle_layout: ParticleLayout,
-    dispatch_buffer_indices: DispatchBufferIndices,
+    pub dispatch_buffer_indices: DispatchBufferIndices,
 }
 
 /// A reference to a slice allocated inside an [`EffectBuffer`].
@@ -239,7 +239,12 @@ impl EffectBuffer {
 
         // TODO - Cache particle_layout and associated bind group layout, instead of
         // creating one bind group layout per buffer using that layout...
+        let particle_group_size = NonZeroU64::new(GpuParticleGroup::aligned_size(
+            render_device.limits().min_storage_buffer_offset_alignment as usize,
+        ) as u64)
+        .unwrap();
         let mut entries = vec![
+            // @binding(0) var<storage, read_write> particle_buffer : ParticleBuffer
             BindGroupLayoutEntry {
                 binding: 0,
                 visibility: ShaderStages::COMPUTE,
@@ -250,6 +255,7 @@ impl EffectBuffer {
                 },
                 count: None,
             },
+            // @binding(1) var<storage, read_write> indirect_buffer : IndirectBuffer
             BindGroupLayoutEntry {
                 binding: 1,
                 visibility: ShaderStages::COMPUTE,
@@ -260,13 +266,16 @@ impl EffectBuffer {
                 },
                 count: None,
             },
+            // @binding(2) var<storage, read> particle_groups : array<ParticleGroup>
             BindGroupLayoutEntry {
                 binding: 2,
                 visibility: ShaderStages::COMPUTE,
                 ty: BindingType::Buffer {
                     ty: BufferBindingType::Storage { read_only: true },
                     has_dynamic_offset: false,
-                    min_binding_size: Some(GpuParticleGroup::min_size()),
+                    // Despite no dynamic offset, we do bind a non-zero offset sometimes,
+                    // so keep this aligned
+                    min_binding_size: Some(particle_group_size),
                 },
                 count: None,
             },
@@ -292,6 +301,10 @@ impl EffectBuffer {
         let particles_buffer_layout_sim = render_device.create_bind_group_layout(label, &entries);
 
         // Create the render layout.
+        let dispatch_indirect_size = NonZeroU64::new(GpuDispatchIndirect::aligned_size(
+            render_device.limits().min_storage_buffer_offset_alignment as usize,
+        ) as u64)
+        .unwrap();
         let mut entries = vec![
             BindGroupLayoutEntry {
                 binding: 0,
@@ -319,7 +332,7 @@ impl EffectBuffer {
                 ty: BindingType::Buffer {
                     ty: BufferBindingType::Storage { read_only: true },
                     has_dynamic_offset: true,
-                    min_binding_size: Some(GpuDispatchIndirect::min_size()),
+                    min_binding_size: Some(dispatch_indirect_size),
                 },
                 count: None,
             },
@@ -785,10 +798,12 @@ impl EffectCache {
         let id = EffectCacheId::new();
 
         let mut ranges = vec![slice.range.start];
+        let group_count = capacities.len();
         for capacity in capacities {
             let start_index = ranges.last().unwrap();
             ranges.push(start_index + capacity);
         }
+        debug_assert_eq!(ranges.len(), group_count + 1);
 
         let slices = SlicesRef {
             ranges,
@@ -858,20 +873,22 @@ impl EffectCache {
 
     /// Remove an effect from the cache. If this was the last effect, drop the
     /// underlying buffer and return the index of the dropped buffer.
-    pub fn remove(&mut self, id: EffectCacheId) -> Option<u32> {
+    pub fn remove(&mut self, id: EffectCacheId) -> Option<CachedEffectIndices> {
         let indices = self.effects.remove(&id)?;
         let &mut Some(ref mut buffer) = &mut self.buffers[indices.buffer_index as usize] else {
             return None;
         };
 
         let slice = SliceRef {
             range: indices.slices.ranges[0]..*indices.slices.ranges.last().unwrap(),
-            particle_layout: indices.slices.particle_layout,
+            // FIXME: clone() needed to return CachedEffectIndices, but really we don't care about
+            // returning the ParticleLayout, so should split...
+            particle_layout: indices.slices.particle_layout.clone(),
         };
 
         if buffer.free_slice(slice) == BufferState::Free {
             self.buffers[indices.buffer_index as usize] = None;
-            return Some(indices.buffer_index);
+            return Some(indices);
         }
 
         None
@@ -1166,9 +1183,8 @@ mod gpu_tests {
         assert_eq!(slice2.slices, vec![0, capacity]);
         assert_eq!(effect_cache.buffers().len(), 2);
 
-        let buffer_index = effect_cache.remove(id1);
-        assert!(buffer_index.is_some());
-        assert_eq!(buffer_index.unwrap(), 0);
+        let cached_effect_indices = effect_cache.remove(id1).unwrap();
+        assert_eq!(cached_effect_indices.buffer_index, 0);
         assert_eq!(effect_cache.buffers().len(), 2);
         {
             let buffers = effect_cache.buffers();