diff --git a/src/render/mod.rs b/src/render/mod.rs index 70a8edb5..09d9881d 100644 --- a/src/render/mod.rs +++ b/src/render/mod.rs @@ -2001,6 +2001,9 @@ pub(crate) fn prepare_effects( seed: random::(), /* FIXME - Probably bad to re-seed each time there's a * change */ count: 0, + // FIXME: the effect_index is global inside the global spawner buffer, + // but the group_index is the index of the particle buffer, which can + // in theory (with batching) contain > 1 effect per buffer. effect_index: input.effect_slice.group_index, force_field: input.force_field.map(Into::into), }; @@ -2977,6 +2980,7 @@ impl Node for VfxSimulateNode { .write_buffer(render_context.command_encoder()); // Compute init pass + let mut num_batches = 0; { let mut compute_pass = render_context @@ -2990,6 +2994,8 @@ impl Node for VfxSimulateNode { // Dispatch init compute jobs for batch in self.effect_query.iter_manual(world) { + num_batches += 1; + if let Some(init_pipeline) = pipeline_cache.get_compute_pipeline(batch.init_pipeline_id) { @@ -3094,8 +3100,6 @@ impl Node for VfxSimulateNode { if let Some(indirect_dispatch_pipeline) = &effects_meta.indirect_dispatch_pipeline { trace!("record commands for indirect dispatch pipeline..."); - let num_batches = self.effect_query.iter_manual(world).count() as u32; - const WORKGROUP_SIZE: u32 = 64; let workgroup_count = (num_batches + WORKGROUP_SIZE - 1) / WORKGROUP_SIZE; diff --git a/src/render/vfx_common.wgsl b/src/render/vfx_common.wgsl index 94c2196d..1bd8c425 100644 --- a/src/render/vfx_common.wgsl +++ b/src/render/vfx_common.wgsl @@ -118,3 +118,72 @@ struct RenderIndirect { /// dies during the update pass. max_update: u32, } + +var seed : u32 = 0u; + +const tau: f32 = 6.283185307179586476925286766559; + +// Rand: PCG +// https://www.reedbeta.com/blog/hash-functions-for-gpu-rendering/ +fn pcg_hash(input: u32) -> u32 { + var state: u32 = input * 747796405u + 2891336453u; + var word: u32 = ((state >> ((state >> 28u) + 4u)) ^ state) * 277803737u; + return (word >> 22u) ^ word; +} + +fn to_float01(u: u32) -> f32 { + // Note: could generate only 24 bits of randomness + return bitcast((u & 0x007fffffu) | 0x3f800000u) - 1.; +} + +// Random floating-point number in [0:1] +fn frand() -> f32 { + seed = pcg_hash(seed); + return to_float01(pcg_hash(seed)); +} + +// Random floating-point number in [0:1]^2 +fn frand2() -> vec2 { + seed = pcg_hash(seed); + var x = to_float01(seed); + seed = pcg_hash(seed); + var y = to_float01(seed); + return vec2(x, y); +} + +// Random floating-point number in [0:1]^3 +fn frand3() -> vec3 { + seed = pcg_hash(seed); + var x = to_float01(seed); + seed = pcg_hash(seed); + var y = to_float01(seed); + seed = pcg_hash(seed); + var z = to_float01(seed); + return vec3(x, y, z); +} + +// Random floating-point number in [0:1]^4 +fn frand4() -> vec4 { + // Each rand() produces 32 bits, and we need 24 bits per component, + // so can get away with only 3 calls. + var r0 = pcg_hash(seed); + var r1 = pcg_hash(r0); + var r2 = pcg_hash(r1); + seed = r2; + var x = to_float01(r0); + var r01 = (r0 & 0xff000000u) >> 8u | (r1 & 0x0000ffffu); + var y = to_float01(r01); + var r12 = (r1 & 0xffff0000u) >> 8u | (r2 & 0x000000ffu); + var z = to_float01(r12); + var r22 = r2 >> 8u; + var w = to_float01(r22); + return vec4(x, y, z, w); +} + +fn rand_uniform(a: f32, b: f32) -> f32 { + return a + frand() * (b - a); +} + +fn proj(u: vec3, v: vec3) -> vec3 { + return dot(v, u) / dot(u,u) * u; +} diff --git a/src/render/vfx_indirect.wgsl b/src/render/vfx_indirect.wgsl index cebb9173..3e2c5653 100644 --- a/src/render/vfx_indirect.wgsl +++ b/src/render/vfx_indirect.wgsl @@ -10,7 +10,7 @@ struct SpawnerBuffer { } @group(0) @binding(0) var render_indirect_buffer : array; -@group(0) @binding(1) var dispatch_indirect : array; +@group(0) @binding(1) var dispatch_indirect_buffer : array; @group(0) @binding(2) var spawner_buffer : SpawnerBuffer; @group(1) @binding(0) var sim_params : SimParams; @@ -40,7 +40,7 @@ fn main(@builtin(global_invocation_id) global_invocation_id: vec3) { // Calculate the number of thread groups to dispatch for the update pass, which is // the number of alive particles rounded up to 64 (workgroup_size). let alive_count = render_indirect_buffer[ri_base + RI_OFFSET_ALIVE_COUNT]; - dispatch_indirect[di_base + DI_OFFSET_X] = (alive_count + 63u) / 64u; + dispatch_indirect_buffer[di_base + DI_OFFSET_X] = (alive_count + 63u) >> 6u; // Update max_update from current value of alive_count, so that the update pass // coming next can cap its threads to this value, while also atomically modifying @@ -65,5 +65,5 @@ fn main(@builtin(global_invocation_id) global_invocation_id: vec3) { // Copy the new pong into the dispatch buffer, which will be used during rendering // to determine where to read particle indices. - dispatch_indirect[di_base + DI_OFFSET_PONG] = pong; + dispatch_indirect_buffer[di_base + DI_OFFSET_PONG] = pong; } diff --git a/src/render/vfx_init.wgsl b/src/render/vfx_init.wgsl index aaadcd57..043e6e6f 100644 --- a/src/render/vfx_init.wgsl +++ b/src/render/vfx_init.wgsl @@ -1,4 +1,8 @@ -#import bevy_hanabi::vfx_common::{ForceFieldSource, IndirectBuffer, RenderIndirect, SimParams, Spawner} +#import bevy_hanabi::vfx_common::{ + ForceFieldSource, IndirectBuffer, RenderIndirect, SimParams, Spawner, + seed, tau, pcg_hash, to_float01, frand, frand2, frand3, frand4, + rand_uniform, proj +} struct Particle { {{ATTRIBUTES}} @@ -17,75 +21,6 @@ struct ParticleBuffer { @group(2) @binding(0) var spawner : Spawner; // NOTE - same group as update @group(3) @binding(0) var render_indirect : RenderIndirect; -var seed : u32 = 0u; - -const tau: f32 = 6.283185307179586476925286766559; - -// Rand: PCG -// https://www.reedbeta.com/blog/hash-functions-for-gpu-rendering/ -fn pcg_hash(input: u32) -> u32 { - var state: u32 = input * 747796405u + 2891336453u; - var word: u32 = ((state >> ((state >> 28u) + 4u)) ^ state) * 277803737u; - return (word >> 22u) ^ word; -} - -fn to_float01(u: u32) -> f32 { - // Note: could generate only 24 bits of randomness - return bitcast((u & 0x007fffffu) | 0x3f800000u) - 1.; -} - -// Random floating-point number in [0:1] -fn frand() -> f32 { - seed = pcg_hash(seed); - return to_float01(pcg_hash(seed)); -} - -// Random floating-point number in [0:1]^2 -fn frand2() -> vec2 { - seed = pcg_hash(seed); - var x = to_float01(seed); - seed = pcg_hash(seed); - var y = to_float01(seed); - return vec2(x, y); -} - -// Random floating-point number in [0:1]^3 -fn frand3() -> vec3 { - seed = pcg_hash(seed); - var x = to_float01(seed); - seed = pcg_hash(seed); - var y = to_float01(seed); - seed = pcg_hash(seed); - var z = to_float01(seed); - return vec3(x, y, z); -} - -// Random floating-point number in [0:1]^4 -fn frand4() -> vec4 { - // Each rand() produces 32 bits, and we need 24 bits per component, - // so can get away with only 3 calls. - var r0 = pcg_hash(seed); - var r1 = pcg_hash(r0); - var r2 = pcg_hash(r1); - seed = r2; - var x = to_float01(r0); - var r01 = (r0 & 0xff000000u) >> 8u | (r1 & 0x0000ffffu); - var y = to_float01(r01); - var r12 = (r1 & 0xffff0000u) >> 8u | (r2 & 0x000000ffu); - var z = to_float01(r12); - var r22 = r2 >> 8u; - var w = to_float01(r22); - return vec4(x, y, z, w); -} - -fn rand_uniform(a: f32, b: f32) -> f32 { - return a + frand() * (b - a); -} - -fn proj(u: vec3, v: vec3) -> vec3 { - return dot(v, u) / dot(u,u) * u; -} - {{INIT_EXTRA}} @compute @workgroup_size(64) diff --git a/src/render/vfx_render.wgsl b/src/render/vfx_render.wgsl index 795adf5c..cc5a6504 100644 --- a/src/render/vfx_render.wgsl +++ b/src/render/vfx_render.wgsl @@ -1,5 +1,9 @@ #import bevy_render::view::View -#import bevy_hanabi::vfx_common::{DispatchIndirect, ForceFieldSource, IndirectBuffer, SimParams, Spawner} +#import bevy_hanabi::vfx_common::{ + DispatchIndirect, ForceFieldSource, IndirectBuffer, SimParams, Spawner, + seed, tau, pcg_hash, to_float01, frand, frand2, frand3, frand4, + rand_uniform, proj +} struct Particle { {{ATTRIBUTES}} @@ -34,69 +38,6 @@ struct VertexOutput { // @group(3) @binding(1) var gradient_sampler: sampler; // #endif -var seed : u32 = 0u; - -// Rand: PCG -// https://www.reedbeta.com/blog/hash-functions-for-gpu-rendering/ -fn pcg_hash(input: u32) -> u32 { - var state: u32 = input * 747796405u + 2891336453u; - var word: u32 = ((state >> ((state >> 28u) + 4u)) ^ state) * 277803737u; - return (word >> 22u) ^ word; -} - -fn to_float01(u: u32) -> f32 { - // Note: could generate only 24 bits of randomness - return bitcast((u & 0x007fffffu) | 0x3f800000u) - 1.; -} - -// Random floating-point number in [0:1] -fn frand() -> f32 { - seed = pcg_hash(seed); - return to_float01(pcg_hash(seed)); -} - -// Random floating-point number in [0:1]^2 -fn frand2() -> vec2 { - seed = pcg_hash(seed); - var x = to_float01(seed); - seed = pcg_hash(seed); - var y = to_float01(seed); - return vec2(x, y); -} - -// Random floating-point number in [0:1]^3 -fn frand3() -> vec3 { - seed = pcg_hash(seed); - var x = to_float01(seed); - seed = pcg_hash(seed); - var y = to_float01(seed); - seed = pcg_hash(seed); - var z = to_float01(seed); - return vec3(x, y, z); -} - -// Random floating-point number in [0:1]^4 -fn frand4() -> vec4 { - // Each rand() produces 32 bits, and we need 24 bits per component, - // so can get away with only 3 calls. - var r0 = pcg_hash(seed); - var r1 = pcg_hash(r0); - var r2 = pcg_hash(r1); - seed = r2; - var x = to_float01(r0); - var r01 = (r0 & 0xff000000u) >> 8u | (r1 & 0x0000ffffu); - var y = to_float01(r01); - var r12 = (r1 & 0xffff0000u) >> 8u | (r2 & 0x000000ffu); - var z = to_float01(r12); - var r22 = r2 >> 8u; - var w = to_float01(r22); - return vec4(x, y, z, w); -} - -fn rand_uniform(a: f32, b: f32) -> f32 { - return a + frand() * (b - a); -} - fn get_camera_position_effect_space() -> vec3 { let view_pos = view.view[3].xyz; #ifdef LOCAL_SPACE_SIMULATION diff --git a/src/render/vfx_update.wgsl b/src/render/vfx_update.wgsl index 33fd9d3a..516d1742 100644 --- a/src/render/vfx_update.wgsl +++ b/src/render/vfx_update.wgsl @@ -1,4 +1,8 @@ -#import bevy_hanabi::vfx_common::{ForceFieldSource, IndirectBuffer, RenderIndirect, SimParams, Spawner} +#import bevy_hanabi::vfx_common::{ + ForceFieldSource, IndirectBuffer, RenderIndirect, SimParams, Spawner, + seed, tau, pcg_hash, to_float01, frand, frand2, frand3, frand4, + rand_uniform, proj +} struct Particle { {{ATTRIBUTES}} @@ -17,75 +21,6 @@ struct ParticleBuffer { @group(2) @binding(0) var spawner : Spawner; // NOTE - same group as init @group(3) @binding(0) var render_indirect : RenderIndirect; -var seed : u32 = 0u; - -const tau: f32 = 6.283185307179586476925286766559; - -// Rand: PCG -// https://www.reedbeta.com/blog/hash-functions-for-gpu-rendering/ -fn pcg_hash(input: u32) -> u32 { - var state: u32 = input * 747796405u + 2891336453u; - var word: u32 = ((state >> ((state >> 28u) + 4u)) ^ state) * 277803737u; - return (word >> 22u) ^ word; -} - -fn to_float01(u: u32) -> f32 { - // Note: could generate only 24 bits of randomness - return bitcast((u & 0x007fffffu) | 0x3f800000u) - 1.; -} - -// Random floating-point number in [0:1] -fn frand() -> f32 { - seed = pcg_hash(seed); - return to_float01(pcg_hash(seed)); -} - -// Random floating-point number in [0:1]^2 -fn frand2() -> vec2 { - seed = pcg_hash(seed); - var x = to_float01(seed); - seed = pcg_hash(seed); - var y = to_float01(seed); - return vec2(x, y); -} - -// Random floating-point number in [0:1]^3 -fn frand3() -> vec3 { - seed = pcg_hash(seed); - var x = to_float01(seed); - seed = pcg_hash(seed); - var y = to_float01(seed); - seed = pcg_hash(seed); - var z = to_float01(seed); - return vec3(x, y, z); -} - -// Random floating-point number in [0:1]^4 -fn frand4() -> vec4 { - // Each rand() produces 32 bits, and we need 24 bits per component, - // so can get away with only 3 calls. - var r0 = pcg_hash(seed); - var r1 = pcg_hash(r0); - var r2 = pcg_hash(r1); - seed = r2; - var x = to_float01(r0); - var r01 = (r0 & 0xff000000u) >> 8u | (r1 & 0x0000ffffu); - var y = to_float01(r01); - var r12 = (r1 & 0xffff0000u) >> 8u | (r2 & 0x000000ffu); - var z = to_float01(r12); - var r22 = r2 >> 8u; - var w = to_float01(r22); - return vec4(x, y, z, w); -} - -fn rand_uniform(a: f32, b: f32) -> f32 { - return a + frand() * (b - a); -} - -fn proj(u: vec3, v: vec3) -> vec3 { - return dot(v, u) / dot(u,u) * u; -} - {{UPDATE_EXTRA}} @compute @workgroup_size(64)