Skip to content

Commit

Permalink
Fix storage padding and re-enable buffer clean-up (#324)
Browse files Browse the repository at this point in the history
Fix the padding of all storage buffers to comply with the GPU device requirements, which on most Desktop platforms is 32 bytes, but can be different (for example on macOS M1 and later, it's generally 256 bytes). This fixes a number of issues where effects are not rendering at all, or display strong artifacts. The change is likely over-doing it by padding all structures used in arrays, however this can be optimized as a later pass.

Fix a bug in the `CloneModifier` where the shader code was allocating cloned particles without checking the buffer capacity, leading to underflow error and stomping over other parts of the buffer.

Re-enable buffer cleaning up on effect deletion, which was commented out during the ribbon change but not fixed, and left disabled. This caused the buffers to fill, leaving no space for new effects even after some effects were deleting, which resulted in newly spawned effects not updating (code was dispatching 0 workground in update pass).

Fixes #322
  • Loading branch information
djeedai authored May 18, 2024
1 parent 2da6d8d commit eb669f5
Show file tree
Hide file tree
Showing 7 changed files with 356 additions and 96 deletions.
8 changes: 4 additions & 4 deletions .github/workflows/ci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -147,19 +147,19 @@ jobs:
for example in .github/example-run/3d/*.ron; do
example_name=`basename $example .ron`
echo "running $example_name - "`date`
time WGPU_BACKEND=dx12 CI_TESTING_CONFIG=$example cargo run --example $example_name --no-default-features --features="bevy/bevy_winit bevy/bevy_pbr 3d bevy/bevy_ci_testing"
time WGPU_BACKEND=dx12 CI_TESTING_CONFIG=$example cargo run --example $example_name --no-default-features --features="bevy/bevy_winit bevy/bevy_pbr bevy/bevy_ui bevy/default_font 3d bevy/bevy_ci_testing"
sleep 10
done
for example in .github/example-run/3dpng/*.ron; do
example_name=`basename $example .ron`
echo "running $example_name - "`date`
time WGPU_BACKEND=dx12 CI_TESTING_CONFIG=$example cargo run --example $example_name --no-default-features --features="bevy/bevy_winit bevy/bevy_pbr bevy/png 3d bevy/bevy_ci_testing"
time WGPU_BACKEND=dx12 CI_TESTING_CONFIG=$example cargo run --example $example_name --no-default-features --features="bevy/bevy_winit bevy/bevy_pbr bevy/bevy_ui bevy/default_font bevy/png 3d bevy/bevy_ci_testing"
sleep 10
done
for example in .github/example-run/2d/*.ron; do
example_name=`basename $example .ron`
echo "running $example_name - "`date`
time WGPU_BACKEND=dx12 CI_TESTING_CONFIG=$example cargo run --example $example_name --no-default-features --features="bevy/bevy_winit bevy/bevy_sprite 2d bevy/bevy_ci_testing"
time WGPU_BACKEND=dx12 CI_TESTING_CONFIG=$example cargo run --example $example_name --no-default-features --features="bevy/bevy_winit bevy/bevy_sprite bevy/bevy_ui bevy/default_font 2d bevy/bevy_ci_testing"
sleep 10
done
env:
Expand Down Expand Up @@ -234,7 +234,7 @@ jobs:
# if: runner.os == 'linux'
- name: Install cargo-tarpaulin
run: |
RUST_BACKTRACE=1 cargo install --version 0.22.0 cargo-tarpaulin
RUST_BACKTRACE=1 cargo install --version 0.30.0 cargo-tarpaulin
- name: Generate code coverage
run: |
RUST_BACKTRACE=1 cargo tarpaulin --engine llvm --verbose --timeout 120 --out Lcov --workspace --all-features
Expand Down
6 changes: 6 additions & 0 deletions src/modifier/clone.rs
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,12 @@ impl Modifier for CloneModifier {
// Recycle a dead particle.
let dead_index = atomicSub(&render_group_indirect[{dest}u].dead_count, 1u) - 1u;
// HACK - we have no limiter for dead_count, so could go negative (wrap around).
// Assume that any value above 2^31 is a wrap around, undo the atomic op and return.
if (dead_index >= 0xF0000000) {{
atomicAdd(&render_group_indirect[{dest}u].dead_count, 1u);
return;
}}
let new_index = indirect_buffer.indices[3u * (base_index + dead_index) + 2u];
// Initialize the new particle.
Expand Down
26 changes: 24 additions & 2 deletions src/plugin.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,8 @@ use crate::{
render::{
extract_effect_events, extract_effects, prepare_bind_groups, prepare_effects,
prepare_resources, queue_effects, DispatchIndirectPipeline, DrawEffects, EffectAssetEvents,
EffectBindGroups, EffectCache, EffectsMeta, ExtractedEffects, GpuSpawnerParams,
EffectBindGroups, EffectCache, EffectsMeta, ExtractedEffects, GpuDispatchIndirect,
GpuParticleGroup, GpuRenderEffectMetadata, GpuRenderGroupIndirect, GpuSpawnerParams,
ParticlesInitPipeline, ParticlesRenderPipeline, ParticlesUpdatePipeline, ShaderCache,
SimParams, VfxSimulateDriverNode, VfxSimulateNode,
},
Expand Down Expand Up @@ -129,8 +130,29 @@ impl HanabiPlugin {
pub(crate) fn make_common_shader(min_storage_buffer_offset_alignment: usize) -> Shader {
let spawner_padding_code =
GpuSpawnerParams::padding_code(min_storage_buffer_offset_alignment);
let dispatch_indirect_padding_code =
GpuDispatchIndirect::padding_code(min_storage_buffer_offset_alignment);
let render_effect_indirect_padding_code =
GpuRenderEffectMetadata::padding_code(min_storage_buffer_offset_alignment);
let render_group_indirect_padding_code =
GpuRenderGroupIndirect::padding_code(min_storage_buffer_offset_alignment);
let particle_group_padding_code =
GpuParticleGroup::padding_code(min_storage_buffer_offset_alignment);
let common_code = include_str!("render/vfx_common.wgsl")
.replace("{{SPAWNER_PADDING}}", &spawner_padding_code);
.replace("{{SPAWNER_PADDING}}", &spawner_padding_code)
.replace(
"{{DISPATCH_INDIRECT_PADDING}}",
&dispatch_indirect_padding_code,
)
.replace(
"{{RENDER_EFFECT_INDIRECT_PADDING}}",
&render_effect_indirect_padding_code,
)
.replace(
"{{RENDER_GROUP_INDIRECT_PADDING}}",
&render_group_indirect_padding_code,
)
.replace("{{PARTICLE_GROUP_PADDING}}", &particle_group_padding_code);
Shader::from_wgsl(
common_code,
std::path::Path::new(file!())
Expand Down
3 changes: 2 additions & 1 deletion src/render/buffer_table.rs
Original file line number Diff line number Diff line change
Expand Up @@ -167,7 +167,8 @@ impl<T: Pod + ShaderSize> BufferTable<T> {
item_size
};
trace!(
"BufferTable: item_size={} aligned_size={}",
"BufferTable[\"{}\"]: item_size={} aligned_size={}",
label.as_ref().unwrap_or(&String::new()),
item_size,
aligned_size
);
Expand Down
36 changes: 26 additions & 10 deletions src/render/effect_cache.rs
Original file line number Diff line number Diff line change
Expand Up @@ -57,12 +57,12 @@ impl PartialOrd for EffectSlices {
/// Describes all particle groups' slices of particles in the particle buffer
/// for a single effect, as well as the [`DispatchBufferIndices`].
pub struct SlicesRef {
ranges: Vec<u32>,
pub ranges: Vec<u32>,
/// Size of a single item in the slice. Currently equal to the unique size
/// of all items in an [`EffectBuffer`] (no mixed size supported in same
/// buffer), so cached only for convenience.
particle_layout: ParticleLayout,
dispatch_buffer_indices: DispatchBufferIndices,
pub dispatch_buffer_indices: DispatchBufferIndices,
}

/// A reference to a slice allocated inside an [`EffectBuffer`].
Expand Down Expand Up @@ -239,7 +239,12 @@ impl EffectBuffer {

// TODO - Cache particle_layout and associated bind group layout, instead of
// creating one bind group layout per buffer using that layout...
let particle_group_size = NonZeroU64::new(GpuParticleGroup::aligned_size(
render_device.limits().min_storage_buffer_offset_alignment as usize,
) as u64)
.unwrap();
let mut entries = vec![
// @binding(0) var<storage, read_write> particle_buffer : ParticleBuffer
BindGroupLayoutEntry {
binding: 0,
visibility: ShaderStages::COMPUTE,
Expand All @@ -250,6 +255,7 @@ impl EffectBuffer {
},
count: None,
},
// @binding(1) var<storage, read_write> indirect_buffer : IndirectBuffer
BindGroupLayoutEntry {
binding: 1,
visibility: ShaderStages::COMPUTE,
Expand All @@ -260,13 +266,16 @@ impl EffectBuffer {
},
count: None,
},
// @binding(2) var<storage, read> particle_groups : array<ParticleGroup>
BindGroupLayoutEntry {
binding: 2,
visibility: ShaderStages::COMPUTE,
ty: BindingType::Buffer {
ty: BufferBindingType::Storage { read_only: true },
has_dynamic_offset: false,
min_binding_size: Some(GpuParticleGroup::min_size()),
// Despite no dynamic offset, we do bind a non-zero offset sometimes,
// so keep this aligned
min_binding_size: Some(particle_group_size),
},
count: None,
},
Expand All @@ -292,6 +301,10 @@ impl EffectBuffer {
let particles_buffer_layout_sim = render_device.create_bind_group_layout(label, &entries);

// Create the render layout.
let dispatch_indirect_size = NonZeroU64::new(GpuDispatchIndirect::aligned_size(
render_device.limits().min_storage_buffer_offset_alignment as usize,
) as u64)
.unwrap();
let mut entries = vec![
BindGroupLayoutEntry {
binding: 0,
Expand Down Expand Up @@ -319,7 +332,7 @@ impl EffectBuffer {
ty: BindingType::Buffer {
ty: BufferBindingType::Storage { read_only: true },
has_dynamic_offset: true,
min_binding_size: Some(GpuDispatchIndirect::min_size()),
min_binding_size: Some(dispatch_indirect_size),
},
count: None,
},
Expand Down Expand Up @@ -785,10 +798,12 @@ impl EffectCache {
let id = EffectCacheId::new();

let mut ranges = vec![slice.range.start];
let group_count = capacities.len();
for capacity in capacities {
let start_index = ranges.last().unwrap();
ranges.push(start_index + capacity);
}
debug_assert_eq!(ranges.len(), group_count + 1);

let slices = SlicesRef {
ranges,
Expand Down Expand Up @@ -858,20 +873,22 @@ impl EffectCache {

/// Remove an effect from the cache. If this was the last effect, drop the
/// underlying buffer and return the index of the dropped buffer.
pub fn remove(&mut self, id: EffectCacheId) -> Option<u32> {
pub fn remove(&mut self, id: EffectCacheId) -> Option<CachedEffectIndices> {
let indices = self.effects.remove(&id)?;
let &mut Some(ref mut buffer) = &mut self.buffers[indices.buffer_index as usize] else {
return None;
};

let slice = SliceRef {
range: indices.slices.ranges[0]..*indices.slices.ranges.last().unwrap(),
particle_layout: indices.slices.particle_layout,
// FIXME: clone() needed to return CachedEffectIndices, but really we don't care about
// returning the ParticleLayout, so should split...
particle_layout: indices.slices.particle_layout.clone(),
};

if buffer.free_slice(slice) == BufferState::Free {
self.buffers[indices.buffer_index as usize] = None;
return Some(indices.buffer_index);
return Some(indices);
}

None
Expand Down Expand Up @@ -1166,9 +1183,8 @@ mod gpu_tests {
assert_eq!(slice2.slices, vec![0, capacity]);
assert_eq!(effect_cache.buffers().len(), 2);

let buffer_index = effect_cache.remove(id1);
assert!(buffer_index.is_some());
assert_eq!(buffer_index.unwrap(), 0);
let cached_effect_indices = effect_cache.remove(id1).unwrap();
assert_eq!(cached_effect_indices.buffer_index, 0);
assert_eq!(effect_cache.buffers().len(), 2);
{
let buffers = effect_cache.buffers();
Expand Down
Loading

0 comments on commit eb669f5

Please sign in to comment.