Move rand functions to vfx_common (#265)

Move all shader functions related to random to the shared `vfx_common` module, to avoid code duplication.
djeedai · Jan 13, 2024 · 712e5d8 · 712e5d8
1 parent 5a3685d
commit 712e5d8
Show file tree

Hide file tree

Showing 6 changed files with 94 additions and 210 deletions.
diff --git a/src/render/mod.rs b/src/render/mod.rs
@@ -61,7 +61,7 @@ use aligned_buffer_vec::AlignedBufferVec;
 use buffer_table::{BufferTable, BufferTableId};
 pub(crate) use effect_cache::{EffectCache, EffectCacheId};
 
-pub use effect_cache::{EffectBuffer, EffectSlice};
+pub use effect_cache::EffectSlice;
 pub use shader_cache::ShaderCache;
 
 /// Labels for the Hanabi systems.
@@ -2001,6 +2001,9 @@ pub(crate) fn prepare_effects(
                     seed: random::<u32>(), /* FIXME - Probably bad to re-seed each time there's a
                                             * change */
                     count: 0,
+                    // FIXME: the effect_index is global inside the global spawner buffer,
+                    // but the group_index is the index of the particle buffer, which can
+                    // in theory (with batching) contain > 1 effect per buffer.
                     effect_index: input.effect_slice.group_index,
                     force_field: input.force_field.map(Into::into),
                 };
@@ -2977,6 +2980,7 @@ impl Node for VfxSimulateNode {
             .write_buffer(render_context.command_encoder());
 
         // Compute init pass
+        let mut num_batches = 0;
         {
             let mut compute_pass =
                 render_context
@@ -2990,6 +2994,8 @@ impl Node for VfxSimulateNode {
 
                 // Dispatch init compute jobs
                 for batch in self.effect_query.iter_manual(world) {
+                    num_batches += 1;
+
                     if let Some(init_pipeline) =
                         pipeline_cache.get_compute_pipeline(batch.init_pipeline_id)
                     {
@@ -3094,8 +3100,6 @@ impl Node for VfxSimulateNode {
             if let Some(indirect_dispatch_pipeline) = &effects_meta.indirect_dispatch_pipeline {
                 trace!("record commands for indirect dispatch pipeline...");
 
-                let num_batches = self.effect_query.iter_manual(world).count() as u32;
-
                 const WORKGROUP_SIZE: u32 = 64;
                 let workgroup_count = (num_batches + WORKGROUP_SIZE - 1) / WORKGROUP_SIZE;
 

diff --git a/src/render/vfx_common.wgsl b/src/render/vfx_common.wgsl
@@ -118,3 +118,72 @@ struct RenderIndirect {
     /// dies during the update pass.
     max_update: u32,
 }
+
+var<private> seed : u32 = 0u;
+
+const tau: f32 = 6.283185307179586476925286766559;
+
+// Rand: PCG
+// https://www.reedbeta.com/blog/hash-functions-for-gpu-rendering/
+fn pcg_hash(input: u32) -> u32 {
+    var state: u32 = input * 747796405u + 2891336453u;
+    var word: u32 = ((state >> ((state >> 28u) + 4u)) ^ state) * 277803737u;
+    return (word >> 22u) ^ word;
+}
+
+fn to_float01(u: u32) -> f32 {
+    // Note: could generate only 24 bits of randomness
+    return bitcast<f32>((u & 0x007fffffu) | 0x3f800000u) - 1.;
+}
+
+// Random floating-point number in [0:1]
+fn frand() -> f32 {
+    seed = pcg_hash(seed);
+    return to_float01(pcg_hash(seed));
+}
+
+// Random floating-point number in [0:1]^2
+fn frand2() -> vec2<f32> {
+    seed = pcg_hash(seed);
+    var x = to_float01(seed);
+    seed = pcg_hash(seed);
+    var y = to_float01(seed);
+    return vec2<f32>(x, y);
+}
+
+// Random floating-point number in [0:1]^3
+fn frand3() -> vec3<f32> {
+    seed = pcg_hash(seed);
+    var x = to_float01(seed);
+    seed = pcg_hash(seed);
+    var y = to_float01(seed);
+    seed = pcg_hash(seed);
+    var z = to_float01(seed);
+    return vec3<f32>(x, y, z);
+}
+
+// Random floating-point number in [0:1]^4
+fn frand4() -> vec4<f32> {
+    // Each rand() produces 32 bits, and we need 24 bits per component,
+    // so can get away with only 3 calls.
+    var r0 = pcg_hash(seed);
+    var r1 = pcg_hash(r0);
+    var r2 = pcg_hash(r1);
+    seed = r2;
+    var x = to_float01(r0);
+    var r01 = (r0 & 0xff000000u) >> 8u | (r1 & 0x0000ffffu);
+    var y = to_float01(r01);
+    var r12 = (r1 & 0xffff0000u) >> 8u | (r2 & 0x000000ffu);
+    var z = to_float01(r12);
+    var r22 = r2 >> 8u;
+    var w = to_float01(r22);
+    return vec4<f32>(x, y, z, w);
+}
+
+fn rand_uniform(a: f32, b: f32) -> f32 {
+    return a + frand() * (b - a);
+}
+
+fn proj(u: vec3<f32>, v: vec3<f32>) -> vec3<f32> {
+    return dot(v, u) / dot(u,u) * u;
+}
diff --git a/src/render/vfx_indirect.wgsl b/src/render/vfx_indirect.wgsl
@@ -10,7 +10,7 @@ struct SpawnerBuffer {
 }
 
 @group(0) @binding(0) var<storage, read_write> render_indirect_buffer : array<u32>;
-@group(0) @binding(1) var<storage, read_write> dispatch_indirect : array<u32>;
+@group(0) @binding(1) var<storage, read_write> dispatch_indirect_buffer : array<u32>;
 @group(0) @binding(2) var<storage, read> spawner_buffer : SpawnerBuffer;
 @group(1) @binding(0) var<uniform> sim_params : SimParams;
 
@@ -40,7 +40,7 @@ fn main(@builtin(global_invocation_id) global_invocation_id: vec3<u32>) {
     // Calculate the number of thread groups to dispatch for the update pass, which is
     // the number of alive particles rounded up to 64 (workgroup_size).
     let alive_count = render_indirect_buffer[ri_base + RI_OFFSET_ALIVE_COUNT];
-    dispatch_indirect[di_base + DI_OFFSET_X] = (alive_count + 63u) / 64u;
+    dispatch_indirect_buffer[di_base + DI_OFFSET_X] = (alive_count + 63u) >> 6u;
 
     // Update max_update from current value of alive_count, so that the update pass
     // coming next can cap its threads to this value, while also atomically modifying
@@ -65,5 +65,5 @@ fn main(@builtin(global_invocation_id) global_invocation_id: vec3<u32>) {
 
     // Copy the new pong into the dispatch buffer, which will be used during rendering
     // to determine where to read particle indices.
-    dispatch_indirect[di_base + DI_OFFSET_PONG] = pong;
+    dispatch_indirect_buffer[di_base + DI_OFFSET_PONG] = pong;
 }
diff --git a/src/render/vfx_init.wgsl b/src/render/vfx_init.wgsl
@@ -1,4 +1,8 @@
-#import bevy_hanabi::vfx_common::{ForceFieldSource, IndirectBuffer, RenderIndirect, SimParams, Spawner}
+#import bevy_hanabi::vfx_common::{
+    ForceFieldSource, IndirectBuffer, RenderIndirect, SimParams, Spawner,
+    seed, tau, pcg_hash, to_float01, frand, frand2, frand3, frand4,
+    rand_uniform, proj
+}
 
 struct Particle {
 {{ATTRIBUTES}}
@@ -17,75 +21,6 @@ struct ParticleBuffer {
 @group(2) @binding(0) var<storage, read_write> spawner : Spawner; // NOTE - same group as update
 @group(3) @binding(0) var<storage, read_write> render_indirect : RenderIndirect;
 
-var<private> seed : u32 = 0u;
-
-const tau: f32 = 6.283185307179586476925286766559;
-
-// Rand: PCG
-// https://www.reedbeta.com/blog/hash-functions-for-gpu-rendering/
-fn pcg_hash(input: u32) -> u32 {
-    var state: u32 = input * 747796405u + 2891336453u;
-    var word: u32 = ((state >> ((state >> 28u) + 4u)) ^ state) * 277803737u;
-    return (word >> 22u) ^ word;
-}
-
-fn to_float01(u: u32) -> f32 {
-    // Note: could generate only 24 bits of randomness
-    return bitcast<f32>((u & 0x007fffffu) | 0x3f800000u) - 1.;
-}
-
-// Random floating-point number in [0:1]
-fn frand() -> f32 {
-    seed = pcg_hash(seed);
-    return to_float01(pcg_hash(seed));
-}
-
-// Random floating-point number in [0:1]^2
-fn frand2() -> vec2<f32> {
-    seed = pcg_hash(seed);
-    var x = to_float01(seed);
-    seed = pcg_hash(seed);
-    var y = to_float01(seed);
-    return vec2<f32>(x, y);
-}
-
-// Random floating-point number in [0:1]^3
-fn frand3() -> vec3<f32> {
-    seed = pcg_hash(seed);
-    var x = to_float01(seed);
-    seed = pcg_hash(seed);
-    var y = to_float01(seed);
-    seed = pcg_hash(seed);
-    var z = to_float01(seed);
-    return vec3<f32>(x, y, z);
-}
-
-// Random floating-point number in [0:1]^4
-fn frand4() -> vec4<f32> {
-    // Each rand() produces 32 bits, and we need 24 bits per component,
-    // so can get away with only 3 calls.
-    var r0 = pcg_hash(seed);
-    var r1 = pcg_hash(r0);
-    var r2 = pcg_hash(r1);
-    seed = r2;
-    var x = to_float01(r0);
-    var r01 = (r0 & 0xff000000u) >> 8u | (r1 & 0x0000ffffu);
-    var y = to_float01(r01);
-    var r12 = (r1 & 0xffff0000u) >> 8u | (r2 & 0x000000ffu);
-    var z = to_float01(r12);
-    var r22 = r2 >> 8u;
-    var w = to_float01(r22);
-    return vec4<f32>(x, y, z, w);
-}
-
-fn rand_uniform(a: f32, b: f32) -> f32 {
-    return a + frand() * (b - a);
-}
-
-fn proj(u: vec3<f32>, v: vec3<f32>) -> vec3<f32> {
-    return dot(v, u) / dot(u,u) * u;
-}
-
 {{INIT_EXTRA}}
 
 @compute @workgroup_size(64)

diff --git a/src/render/vfx_render.wgsl b/src/render/vfx_render.wgsl
@@ -1,5 +1,9 @@
 #import bevy_render::view::View
-#import bevy_hanabi::vfx_common::{DispatchIndirect, ForceFieldSource, IndirectBuffer, SimParams, Spawner}
+#import bevy_hanabi::vfx_common::{
+    DispatchIndirect, ForceFieldSource, IndirectBuffer, SimParams, Spawner,
+    seed, tau, pcg_hash, to_float01, frand, frand2, frand3, frand4,
+    rand_uniform, proj
+}
 
 struct Particle {
 {{ATTRIBUTES}}
@@ -34,69 +38,6 @@ struct VertexOutput {
 // @group(3) @binding(1) var gradient_sampler: sampler;
 // #endif
 
-var<private> seed : u32 = 0u;
-
-// Rand: PCG
-// https://www.reedbeta.com/blog/hash-functions-for-gpu-rendering/
-fn pcg_hash(input: u32) -> u32 {
-    var state: u32 = input * 747796405u + 2891336453u;
-    var word: u32 = ((state >> ((state >> 28u) + 4u)) ^ state) * 277803737u;
-    return (word >> 22u) ^ word;
-}
-
-fn to_float01(u: u32) -> f32 {
-    // Note: could generate only 24 bits of randomness
-    return bitcast<f32>((u & 0x007fffffu) | 0x3f800000u) - 1.;
-}
-
-// Random floating-point number in [0:1]
-fn frand() -> f32 {
-    seed = pcg_hash(seed);
-    return to_float01(pcg_hash(seed));
-}
-
-// Random floating-point number in [0:1]^2
-fn frand2() -> vec2<f32> {
-    seed = pcg_hash(seed);
-    var x = to_float01(seed);
-    seed = pcg_hash(seed);
-    var y = to_float01(seed);
-    return vec2<f32>(x, y);
-}
-
-// Random floating-point number in [0:1]^3
-fn frand3() -> vec3<f32> {
-    seed = pcg_hash(seed);
-    var x = to_float01(seed);
-    seed = pcg_hash(seed);
-    var y = to_float01(seed);
-    seed = pcg_hash(seed);
-    var z = to_float01(seed);
-    return vec3<f32>(x, y, z);
-}
-
-// Random floating-point number in [0:1]^4
-fn frand4() -> vec4<f32> {
-    // Each rand() produces 32 bits, and we need 24 bits per component,
-    // so can get away with only 3 calls.
-    var r0 = pcg_hash(seed);
-    var r1 = pcg_hash(r0);
-    var r2 = pcg_hash(r1);
-    seed = r2;
-    var x = to_float01(r0);
-    var r01 = (r0 & 0xff000000u) >> 8u | (r1 & 0x0000ffffu);
-    var y = to_float01(r01);
-    var r12 = (r1 & 0xffff0000u) >> 8u | (r2 & 0x000000ffu);
-    var z = to_float01(r12);
-    var r22 = r2 >> 8u;
-    var w = to_float01(r22);
-    return vec4<f32>(x, y, z, w);
-}
-
-fn rand_uniform(a: f32, b: f32) -> f32 {
-    return a + frand() * (b - a);
-}
-
 fn get_camera_position_effect_space() -> vec3<f32> {
     let view_pos = view.view[3].xyz;
 #ifdef LOCAL_SPACE_SIMULATION

diff --git a/src/render/vfx_update.wgsl b/src/render/vfx_update.wgsl
@@ -1,4 +1,8 @@
-#import bevy_hanabi::vfx_common::{ForceFieldSource, IndirectBuffer, RenderIndirect, SimParams, Spawner}
+#import bevy_hanabi::vfx_common::{
+    ForceFieldSource, IndirectBuffer, RenderIndirect, SimParams, Spawner,
+    seed, tau, pcg_hash, to_float01, frand, frand2, frand3, frand4,
+    rand_uniform, proj
+}
 
 struct Particle {
 {{ATTRIBUTES}}
@@ -17,75 +21,6 @@ struct ParticleBuffer {
 @group(2) @binding(0) var<storage, read_write> spawner : Spawner; // NOTE - same group as init
 @group(3) @binding(0) var<storage, read_write> render_indirect : RenderIndirect;
 
-var<private> seed : u32 = 0u;
-
-const tau: f32 = 6.283185307179586476925286766559;
-
-// Rand: PCG
-// https://www.reedbeta.com/blog/hash-functions-for-gpu-rendering/
-fn pcg_hash(input: u32) -> u32 {
-    var state: u32 = input * 747796405u + 2891336453u;
-    var word: u32 = ((state >> ((state >> 28u) + 4u)) ^ state) * 277803737u;
-    return (word >> 22u) ^ word;
-}
-
-fn to_float01(u: u32) -> f32 {
-    // Note: could generate only 24 bits of randomness
-    return bitcast<f32>((u & 0x007fffffu) | 0x3f800000u) - 1.;
-}
-
-// Random floating-point number in [0:1]
-fn frand() -> f32 {
-    seed = pcg_hash(seed);
-    return to_float01(pcg_hash(seed));
-}
-
-// Random floating-point number in [0:1]^2
-fn frand2() -> vec2<f32> {
-    seed = pcg_hash(seed);
-    var x = to_float01(seed);
-    seed = pcg_hash(seed);
-    var y = to_float01(seed);
-    return vec2<f32>(x, y);
-}
-
-// Random floating-point number in [0:1]^3
-fn frand3() -> vec3<f32> {
-    seed = pcg_hash(seed);
-    var x = to_float01(seed);
-    seed = pcg_hash(seed);
-    var y = to_float01(seed);
-    seed = pcg_hash(seed);
-    var z = to_float01(seed);
-    return vec3<f32>(x, y, z);
-}
-
-// Random floating-point number in [0:1]^4
-fn frand4() -> vec4<f32> {
-    // Each rand() produces 32 bits, and we need 24 bits per component,
-    // so can get away with only 3 calls.
-    var r0 = pcg_hash(seed);
-    var r1 = pcg_hash(r0);
-    var r2 = pcg_hash(r1);
-    seed = r2;
-    var x = to_float01(r0);
-    var r01 = (r0 & 0xff000000u) >> 8u | (r1 & 0x0000ffffu);
-    var y = to_float01(r01);
-    var r12 = (r1 & 0xffff0000u) >> 8u | (r2 & 0x000000ffu);
-    var z = to_float01(r12);
-    var r22 = r2 >> 8u;
-    var w = to_float01(r22);
-    return vec4<f32>(x, y, z, w);
-}
-
-fn rand_uniform(a: f32, b: f32) -> f32 {
-    return a + frand() * (b - a);
-}
-
-fn proj(u: vec3<f32>, v: vec3<f32>) -> vec3<f32> {
-    return dot(v, u) / dot(u,u) * u;
-}
-
 {{UPDATE_EXTRA}}
 
 @compute @workgroup_size(64)