diff --git a/CHANGELOG.md b/CHANGELOG.md
index cf6ca9b0..bd662416 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -9,6 +9,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 - Added a new `ScreenSpaceSizeModifier` which negates the effect of perspective projection, and makes the particle's size a pixel size in screen space, instead of a Bevy world unit size. This replaces the hard-coded behavior previously available on the `SetSizeModifier`.
 - Added a new `ConformToSphereModifier` acting as an attractor applying a force toward a point (sphere center) to all particles in range, and making particles conform ("stick") to the sphere surface.
+- Added `vec2` and `vec3` functions that allow construction of vectors from dynamic parts.
+- Added basic support for particle trails. To use them, replace calls to `EffectAsset::new()` with `EffectAsset::with_trails()`, and call `with_trail_length()` and optionally `with_trail_period()` on the `Spawner`.
 
 ### Changed
 
diff --git a/Cargo.toml b/Cargo.toml
index a0c1ab34..dcfb48b1 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -144,6 +144,10 @@ required-features = [ "bevy/bevy_winit", "bevy/bevy_pbr", "bevy/png", "3d" ]
 name = "2d"
 required-features = [ "bevy/bevy_winit", "bevy/bevy_sprite", "2d" ]
 
+[[example]]
+name = "worms"
+required-features = [ "bevy/bevy_winit", "bevy/bevy_pbr", "3d" ]
+
 [workspace]
 resolver = "2"
 members = ["."]
diff --git a/assets/circle.png b/assets/circle.png
new file mode 100644
index 00000000..11c22664
Binary files /dev/null and b/assets/circle.png differ
diff --git a/examples/worms.rs b/examples/worms.rs
new file mode 100644
index 00000000..a6ad86f0
--- /dev/null
+++ b/examples/worms.rs
@@ -0,0 +1,156 @@
+//! Worms
+//!
+//! Demonstrates simple use of particle trails.
+
+use std::f32::consts::{FRAC_PI_2, PI};
+
+use bevy::{
+    core_pipeline::{bloom::BloomSettings, tonemapping::Tonemapping},
+    log::LogPlugin,
+    math::{vec3, vec4},
+    prelude::*,
+};
+#[cfg(feature = "examples_world_inspector")]
+use bevy_inspector_egui::quick::WorldInspectorPlugin;
+
+use bevy_hanabi::prelude::*;
+
+fn main() {
+    let mut app = App::default();
+    app.add_plugins(
+        DefaultPlugins
+            .set(LogPlugin {
+                level: bevy::log::Level::WARN,
+                filter: "bevy_hanabi=warn,worms=trace".to_string(),
+                update_subscriber: None,
+            })
+            .set(WindowPlugin {
+                primary_window: Some(Window {
+                    title: "🎆 Hanabi — worms".to_string(),
+                    ..default()
+                }),
+                ..default()
+            }),
+    )
+    .add_systems(Update, bevy::window::close_on_esc)
+    .add_plugins(HanabiPlugin);
+
+    #[cfg(feature = "examples_world_inspector")]
+    app.add_plugins(WorldInspectorPlugin::default());
+
+    app.add_systems(Startup, setup).run();
+}
+
+fn setup(
+    mut commands: Commands,
+    asset_server: ResMut<AssetServer>,
+    mut effects: ResMut<Assets<EffectAsset>>,
+) {
+    commands.spawn((
+        Camera3dBundle {
+            transform: Transform::from_translation(Vec3::new(0., 0., 25.)),
+            camera: Camera {
+                hdr: true,
+                clear_color: Color::BLACK.into(),
+                ..default()
+            },
+            tonemapping: Tonemapping::None,
+            ..default()
+        },
+        BloomSettings::default(),
+    ));
+
+    let circle: Handle<Image> = asset_server.load("circle.png");
+
+    let writer = ExprWriter::new();
+
+    // Init modifiers
+
+    // Spawn the particles within a reasonably large box.
+    let set_initial_position_modifier = SetAttributeModifier::new(
+        Attribute::POSITION,
+        ((writer.rand(ValueType::Vector(VectorType::VEC3F)) + writer.lit(vec3(-0.5, -0.5, 0.0)))
+            * writer.lit(vec3(16.0, 16.0, 0.0)))
+        .expr(),
+    );
+
+    // Randomize the initial angle of the particle, storing it in the `F32_0`
+    // scratch attribute.`
+    let set_initial_angle_modifier = SetAttributeModifier::new(
+        Attribute::F32_0,
+        writer.lit(0.0).uniform(writer.lit(PI * 2.0)).expr(),
+    );
+
+    // Give each particle a random opaque color.
+    let set_color_modifier = SetAttributeModifier::new(
+        Attribute::COLOR,
+        (writer.rand(ValueType::Vector(VectorType::VEC4F)) * writer.lit(vec4(1.0, 1.0, 1.0, 0.0))
+            + writer.lit(Vec4::W))
+        .pack4x8unorm()
+        .expr(),
+    );
+
+    // Give the particles a long lifetime.
+    let set_lifetime_modifier =
+        SetAttributeModifier::new(Attribute::LIFETIME, writer.lit(10.0).expr());
+
+    // Update modifiers
+
+    // Make the particle wiggle, following a sine wave.
+    let set_velocity_modifier = SetAttributeModifier::new(
+        Attribute::VELOCITY,
+        WriterExpr::sin(
+            writer.lit(vec3(1.0, 1.0, 0.0))
+                * (writer.attr(Attribute::F32_0)
+                    + (writer.time() * writer.lit(5.0)).sin() * writer.lit(1.0))
+                + writer.lit(vec3(0.0, FRAC_PI_2, 0.0)),
+        )
+        .mul(writer.lit(5.0))
+        .expr(),
+    );
+
+    // Render modifiers
+
+    // Set the particle size.
+    let set_size_modifier = SetSizeModifier {
+        size: Vec2::splat(0.4).into(),
+    };
+
+    // Make each particle round.
+    let particle_texture_modifier = ParticleTextureModifier {
+        texture: circle,
+        sample_mapping: ImageSampleMapping::Modulate,
+    };
+
+    let module = writer.finish();
+
+    // Allocate room for 32,768 trail particles. Give each particle a 5-particle
+    // trail, and spawn a new trail particle every ⅛ of a second.
+    let effect = effects.add(
+        EffectAsset::with_trails(
+            32768,
+            32768,
+            Spawner::rate(4.0.into())
+                .with_trail_length(5)
+                .with_trail_period(0.125.into()),
+            module,
+        )
+        .with_name("worms")
+        .init(set_initial_position_modifier)
+        .init(set_initial_angle_modifier)
+        .init(set_lifetime_modifier)
+        .init(set_color_modifier)
+        .update(set_velocity_modifier)
+        .render(set_size_modifier)
+        .render(particle_texture_modifier),
+    );
+
+    commands.spawn((
+        Name::new("worms"),
+        ParticleEffectBundle {
+            effect: ParticleEffect::new(effect),
+            transform: Transform::IDENTITY,
+            ..default()
+        },
+    ));
+}
diff --git a/src/asset.rs b/src/asset.rs
index f7a3e0bb..b5ad3f39 100644
--- a/src/asset.rs
+++ b/src/asset.rs
@@ -186,6 +186,11 @@ pub struct EffectAsset {
     /// should keep this quantity as close as possible to the maximum number of
     /// particles they expect to render.
     capacity: u32,
+    /// Maximum number of concurrent trail particles.
+    ///
+    /// The same caveats as [`capacity`] apply. This value can't be changed
+    /// after the effect is created.
+    trail_capacity: u32,
     /// Spawner.
     pub spawner: Spawner,
     /// For 2D rendering, the Z coordinate used as the sort key.
@@ -250,6 +255,9 @@ impl EffectAsset {
     /// which should be passed to this method. If expressions are not used, just
     /// pass an empty module [`Module::default()`].
     ///
+    /// This function doesn't allocate space for any trails. If you need
+    /// particle trails, use [`with_trails`] instead.
+    ///
     /// # Examples
     ///
     /// Create a new effect asset without any modifier. This effect doesn't
@@ -290,12 +298,30 @@ impl EffectAsset {
         }
     }
 
+    /// As [`new`], but reserves space for trails.
+    ///
+    /// Use this method when you want to enable particle trails.
+    pub fn with_trails(
+        capacity: u32,
+        trail_capacity: u32,
+        spawner: Spawner,
+        module: Module,
+    ) -> Self {
+        Self {
+            capacity,
+            trail_capacity,
+            spawner,
+            module,
+            ..default()
+        }
+    }
+
     /// Get the capacity of the effect, in number of particles.
     ///
     /// This represents the number of particles stored in GPU memory at all
     /// time, even if unused, so you should try to minimize this value. However,
     /// the [`Spawner`] cannot emit more particles than this capacity. Whatever
-    /// the spanwer settings, if the number of particles reaches the capacity,
+    /// the spawner settings, if the number of particles reaches the capacity,
     /// no new particle can be emitted. Setting an appropriate capacity for an
     /// effect is therefore a compromise between more particles available for
     /// visuals and more GPU memory usage.
@@ -310,6 +336,15 @@ impl EffectAsset {
         self.capacity
     }
 
+    /// Get the trail capacity of the effect, in number of trail particles.
+    ///
+    /// The same caveats as [`capacity`] apply here: the GPU always allocates
+    /// space for this many trail particles, regardless of the number actually
+    /// used.
+    pub fn trail_capacity(&self) -> u32 {
+        self.trail_capacity
+    }
+
     /// Get the expression module storing all expressions in use by modifiers of
     /// this effect.
     pub fn module(&self) -> &Module {
diff --git a/src/gradient.rs b/src/gradient.rs
index 5e85b647..d1a32efc 100644
--- a/src/gradient.rs
+++ b/src/gradient.rs
@@ -4,10 +4,7 @@ use bevy::{
     utils::FloatOrd,
 };
 use serde::{Deserialize, Serialize};
-use std::{
-    hash::{Hash, Hasher},
-    vec::Vec,
-};
+use std::hash::{Hash, Hasher};
 
 /// Describes a type that can be linearly interpolated between two keys.
 ///
diff --git a/src/lib.rs b/src/lib.rs
index 417e995a..b9676297 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -196,8 +196,6 @@ mod spawn;
 #[cfg(test)]
 mod test_utils;
 
-use properties::PropertyInstance;
-
 pub use asset::{AlphaMode, EffectAsset, MotionIntegration, SimulationCondition};
 pub use attributes::*;
 pub use bundle::ParticleEffectBundle;
@@ -840,6 +838,19 @@ impl EffectShaderSource {
             "@group(1) @binding(2) var<storage, read> properties : Properties;".to_string()
         };
 
+        let (trail_binding_code, trail_render_indirect_binding_code) = if asset.trail_capacity()
+            == 0
+        {
+            ("// (no trails)".to_string(), "// (no trails)".to_string())
+        } else {
+            (
+                "@group(1) @binding(3) var<storage, read_write> trail_buffer : ParticleBuffer;"
+                    .to_string(),
+                "@group(3) @binding(1) var<storage, read_write> trail_render_indirect : TrailRenderIndirect;"
+                    .to_string(),
+            )
+        };
+
         // Start from the base module containing the expressions actually serialized in
         // the asset. We will add the ones created on-the-fly by applying the
         // modifiers to the contexts.
@@ -968,6 +979,10 @@ impl EffectShaderSource {
                 (String::new(), String::new())
             };
 
+            if asset.trail_capacity() > 0 {
+                layout_flags |= LayoutFlags::TRAILS_BUFFER_PRESENT;
+            }
+
             (
                 render_context.vertex_code,
                 render_context.fragment_code,
@@ -1040,7 +1055,12 @@ impl EffectShaderSource {
             .replace("{{UPDATE_CODE}}", &update_code)
             .replace("{{UPDATE_EXTRA}}", &update_extra)
             .replace("{{PROPERTIES}}", &properties_code)
-            .replace("{{PROPERTIES_BINDING}}", &properties_binding_code);
+            .replace("{{PROPERTIES_BINDING}}", &properties_binding_code)
+            .replace("{{TRAIL_BINDING}}", &trail_binding_code)
+            .replace(
+                "{{TRAIL_RENDER_INDIRECT_BINDING}}",
+                &trail_render_indirect_binding_code,
+            );
         trace!("Configured update shader:\n{}", update_shader_source);
 
         // Configure the render shader template, and make sure a corresponding shader
diff --git a/src/render/batch.rs b/src/render/batch.rs
index 259665d2..ac5de42d 100644
--- a/src/render/batch.rs
+++ b/src/render/batch.rs
@@ -49,6 +49,9 @@ pub(crate) struct EffectBatch {
     ///
     /// [`ParticleEffect`]: crate::ParticleEffect
     pub entities: Vec<u32>,
+    /// Whether trails are active for this effect (present with a nonzero
+    /// length).
+    pub trails_active: bool,
 }
 
 impl EffectBatch {
@@ -74,6 +77,7 @@ impl EffectBatch {
             #[cfg(feature = "2d")]
             z_sort_key_2d: input.z_sort_key_2d,
             entities: vec![input.entity_index],
+            trails_active: input.trail_capacity > 0 && input.trail_length > 0,
         }
     }
 }
@@ -98,6 +102,11 @@ pub(crate) struct BatchInput {
     pub image_handle: Handle<Image>,
     /// Number of particles to spawn for this effect.
     pub spawn_count: u32,
+    pub spawn_trail_particle: bool,
+    pub trail_length: u32,
+    pub trail_capacity: u32,
+    pub trail_head_chunk: u32,
+    pub trail_tail_chunk: u32,
     /// Emitter transform.
     pub transform: GpuCompressedTransform,
     /// Emitter inverse transform.
@@ -295,8 +304,6 @@ impl<'a, S, B, I: Batchable<S, B>> Batcher<'a, S, B, I> {
 
 #[cfg(test)]
 mod tests {
-    use crate::EffectShader;
-
     use super::*;
 
     // Test item to batch
@@ -550,6 +557,11 @@ mod tests {
             layout_flags: LayoutFlags::NONE,
             image_handle,
             spawn_count: 32,
+            spawn_trail_particle: false,
+            trail_length: 0,
+            trail_capacity: 0,
+            trail_head_chunk: 0,
+            trail_tail_chunk: 0,
             transform: GpuCompressedTransform::default(),
             inverse_transform: GpuCompressedTransform::default(),
             property_buffer: None,
diff --git a/src/render/effect_cache.rs b/src/render/effect_cache.rs
index 9573d491..723e24e1 100644
--- a/src/render/effect_cache.rs
+++ b/src/render/effect_cache.rs
@@ -17,11 +17,15 @@ use std::{
 
 use crate::{
     asset::EffectAsset,
-    render::GpuDispatchIndirect,
-    render::{GpuSpawnerParams, LayoutFlags},
+    render::{
+        particle_bind_group_layout_entries, GpuDispatchIndirect, GpuRenderIndirect,
+        GpuSpawnerParams, GpuTrailRenderIndirect, LayoutFlags,
+    },
     ParticleLayout, PropertyLayout,
 };
 
+use super::buffer_table::BufferTableId;
+
 #[derive(Debug, Clone, PartialEq, Eq)]
 pub struct EffectSlice {
     /// Slice into the underlying BufferVec of the group.
@@ -100,18 +104,23 @@ pub struct EffectBuffer {
     /// GPU buffer holding the properties of the effect(s), if any. This is
     /// always `None` if the property layout is empty.
     properties_buffer: Option<Buffer>,
+    /// GPU ring buffer holding trail particles, if any.
+    trail_buffer: Option<Buffer>,
     /// Layout of particles.
     particle_layout: ParticleLayout,
     /// Layout of properties of the effect(s), if using properties.
     property_layout: PropertyLayout,
+    trail_chunks: TrailChunks,
     /// Flags
     layout_flags: LayoutFlags,
     /// -
     particles_buffer_layout_simulate: BindGroupLayout,
     /// -
     particles_buffer_layout_with_dispatch: BindGroupLayout,
+    update_render_indirect_layout: BindGroupLayout,
     /// Total buffer capacity, in number of particles.
     capacity: u32,
+    trail_capacity: u32,
     /// Used buffer size, in number of particles, either from allocated slices
     /// or from slices in the free list.
     used_size: u32,
@@ -126,6 +135,71 @@ pub struct EffectBuffer {
     asset: Handle<EffectAsset>,
 }
 
+/// The CPU-side list of trail chunks.
+///
+/// A *chunk* is a group of trail particles that are all spawned at the same
+/// time. Chunking is useful because it allows trail particles to be despawned
+/// all at once instead of having to despawn them individually. The index of the
+/// first particle within each chunk is stored on GPU within the *chunk buffer*.
+/// In turn, the index of the slot within the chunk buffer of each such particle
+/// index is stored here, on CPU. See the diagram:
+///
+/// ```text
+///                            ┌───┬───┬───┬───┬───┐
+///          Chunks (CPU)      │   │   │   │   │   │
+///                            └─┬─┴─┬─┴─┬─┴─┬─┴─┬─┘
+///                              │   │   │   │   │
+///                          ┌───┘ ┌─┘   │   └─┐ └───┐
+///                          │     │     │     │     │
+///                        ┌─▼─┐ ┌─▼─┐ ┌─▼─┐ ┌─▼─┐ ┌─▼─┐
+///    Chunk Buffer (GPU)  │   │ │   │ │   │ │   │ │   │
+///                        └─┬─┘ └─┬─┘ └─┬─┘ └─┬─┘ └─┬─┘
+///                          │     │     │     │     │
+///                          │    ┌┘   ┌─┘  ┌──┘ ┌───┘
+///                          ▼    ▼    ▼    ▼    ▼
+///                        ──┬────┬────┬────┬────┬────┬───
+/// Trail Particles (GPU)    │    │    │    │    │    │➜
+///                        ──┴────┴────┴────┴────┴────┴───
+///                          ▲
+///                          │
+///                    Base Instance
+///
+///                          └────────────────────────┘
+///                                Instance Count
+/// ```
+///
+/// A reasonable question at this point is "why not store a
+/// contiguously-allocated ring buffer on GPU instead of storing the buffer on
+/// CPU and using indirection?" To answer this, we start by making two
+/// observations:
+///
+/// 1. We need to access the chunk positions during the invocation of
+/// `vfx_indirect.wgsl`, which is dispatched only once and is expected to handle
+/// every particle effect in the scene. This means that the all the chunk
+/// positions for every particle system need to be stored in the same GPU
+/// buffer. At the same time, particle effects can be spawned and despawned
+/// arbitrarily. If we wanted to store them contiguously, then we would need a
+/// dynamic memory allocator capable of handling blocks of arbitrary size, which
+/// would be complicated and would lead to fragmentation.
+///
+/// 2. The GPU doesn't actually need to consult the entire chunk buffer during a
+/// single frame. It only needs to know the index of the oldest alive trail
+/// particle and the index of the most recent trail particle. This is because
+/// the trail particle ring buffer is naturally sorted from the oldest particle
+/// to the newest.
+///
+/// By allocating each slot in the chunk value separately, and not necessarily
+/// contiguously, we can use a simple free list (reusing the [`BufferTable`]
+/// infrastructure) instead of a full-blown memory allocator, solving point (1).
+/// And, because of point (2), the fact that the chunk slots aren't contiguous
+/// isn't a problem, because the GPU only needs to look at the head and the tail
+/// on each frame.
+#[derive(Default, Debug)]
+pub(crate) struct TrailChunks {
+    /// Indices of the chunk pointers within the GPU chunk buffer.
+    pub(crate) chunk_ids: Vec<BufferTableId>,
+}
+
 #[derive(Debug, Clone, Copy, PartialEq, Eq)]
 pub enum BufferState {
     Used,
@@ -149,8 +223,10 @@ impl EffectBuffer {
     pub fn new(
         asset: Handle<EffectAsset>,
         capacity: u32,
+        trail_capacity: u32,
         particle_layout: ParticleLayout,
         property_layout: PropertyLayout,
+        trail_chunks: TrailChunks,
         layout_flags: LayoutFlags,
         // compute_pipeline: ComputePipeline,
         render_device: &RenderDevice,
@@ -183,6 +259,20 @@ impl EffectBuffer {
 
         let capacity_bytes: BufferAddress = capacity as u64 * 4;
 
+        // Create the trail buffer.
+        let trail_bytes: BufferAddress =
+            trail_capacity as u64 * particle_layout.min_binding_size().get();
+        let trail_buffer = if trail_bytes > 0 {
+            Some(render_device.create_buffer(&BufferDescriptor {
+                label,
+                size: trail_bytes,
+                usage: BufferUsages::STORAGE,
+                mapped_at_creation: false,
+            }))
+        } else {
+            None
+        };
+
         let indirect_label = if let Some(label) = label {
             format!("{label}_indirect")
         } else {
@@ -229,40 +319,16 @@ impl EffectBuffer {
         // TODO - Cache particle_layout and associated bind group layout, instead of
         // creating one bind group layout per buffer using that layout...
 
-        let mut entries = vec![
-            BindGroupLayoutEntry {
-                binding: 0,
-                visibility: ShaderStages::COMPUTE,
-                ty: BindingType::Buffer {
-                    ty: BufferBindingType::Storage { read_only: false },
-                    has_dynamic_offset: true,
-                    min_binding_size: Some(particle_layout.min_binding_size()),
-                },
-                count: None,
-            },
-            BindGroupLayoutEntry {
-                binding: 1,
-                visibility: ShaderStages::COMPUTE,
-                ty: BindingType::Buffer {
-                    ty: BufferBindingType::Storage { read_only: false },
-                    has_dynamic_offset: true,
-                    min_binding_size: BufferSize::new(12),
-                },
-                count: None,
-            },
-        ];
-        if !property_layout.is_empty() {
-            entries.push(BindGroupLayoutEntry {
-                binding: 2,
-                visibility: ShaderStages::COMPUTE,
-                ty: BindingType::Buffer {
-                    ty: BufferBindingType::Storage { read_only: true },
-                    has_dynamic_offset: false, // TODO
-                    min_binding_size: Some(property_layout.min_binding_size()),
-                },
-                count: None,
-            });
-        }
+        let property_layout_min_binding_size = if property_layout.is_empty() {
+            None
+        } else {
+            Some(property_layout.min_binding_size())
+        };
+        let entries = particle_bind_group_layout_entries(
+            particle_layout.min_binding_size(),
+            property_layout_min_binding_size,
+            trail_buffer.is_some(),
+        );
         let label = "hanabi:simualate_particles_buffer_layout";
         trace!(
             "Creating particle bind group layout '{}' for simulate passes (init & update) with {} entries.",
@@ -324,16 +390,45 @@ impl EffectBuffer {
         let particles_buffer_layout_with_dispatch =
             render_device.create_bind_group_layout("hanabi:buffer_layout_render", &entries);
 
+        let mut entries = vec![BindGroupLayoutEntry {
+            binding: 0,
+            visibility: ShaderStages::COMPUTE,
+            ty: BindingType::Buffer {
+                ty: BufferBindingType::Storage { read_only: false },
+                has_dynamic_offset: true,
+                min_binding_size: Some(GpuRenderIndirect::min_size()),
+            },
+            count: None,
+        }];
+        if layout_flags.contains(LayoutFlags::TRAILS_BUFFER_PRESENT) {
+            entries.push(BindGroupLayoutEntry {
+                binding: 1,
+                visibility: ShaderStages::COMPUTE,
+                ty: BindingType::Buffer {
+                    ty: BufferBindingType::Storage { read_only: false },
+                    has_dynamic_offset: true,
+                    min_binding_size: Some(GpuTrailRenderIndirect::min_size()),
+                },
+                count: None,
+            });
+        }
+        let update_render_indirect_layout = render_device
+            .create_bind_group_layout("hanabi:update_render_indirect_layout", &entries);
+
         Self {
             particle_buffer,
+            trail_buffer,
             indirect_buffer,
             properties_buffer,
             particle_layout,
             property_layout,
+            trail_chunks,
             layout_flags,
             particles_buffer_layout_simulate,
             particles_buffer_layout_with_dispatch,
+            update_render_indirect_layout,
             capacity,
+            trail_capacity,
             used_size: 0,
             free_slices: vec![],
             // compute_pipeline,
@@ -365,6 +460,10 @@ impl EffectBuffer {
         &self.particles_buffer_layout_with_dispatch
     }
 
+    pub fn update_render_indirect_layout_bind_group(&self) -> &BindGroupLayout {
+        &self.update_render_indirect_layout
+    }
+
     /// Return a binding for the entire particle buffer.
     pub fn max_binding(&self) -> BindingResource {
         let capacity_bytes = self.capacity as u64 * self.particle_layout.min_binding_size().get();
@@ -386,6 +485,20 @@ impl EffectBuffer {
         })
     }
 
+    /// Return a binding for the entire trail buffer associated with the
+    /// current effect buffer.
+    pub fn trail_max_binding(&self) -> Option<BindingResource> {
+        self.trail_buffer.as_ref().map(|trail_buffer| {
+            let capacity_bytes =
+                self.trail_capacity as u64 * self.particle_layout.min_binding_size().get();
+            BindingResource::Buffer(BufferBinding {
+                buffer: trail_buffer,
+                offset: 0,
+                size: Some(NonZeroU64::new(capacity_bytes).unwrap()),
+            })
+        })
+    }
+
     /// Return a binding for the entire indirect buffer associated with the
     /// current effect buffer.
     pub fn indirect_max_binding(&self) -> BindingResource {
@@ -587,6 +700,14 @@ pub(crate) struct EffectCache {
     effects: HashMap<EffectCacheId, (usize, SliceRef)>,
 }
 
+/// Information about an [`EffectBuffer`] that was just deleted.
+pub(crate) struct DroppedEffectBuffer {
+    /// The index of the buffer.
+    pub(crate) index: u32,
+    /// Indices within the trail chunk buffer that should now be freed.
+    pub(crate) trail_chunks: TrailChunks,
+}
+
 impl EffectCache {
     pub fn new(device: RenderDevice) -> Self {
         Self {
@@ -610,8 +731,10 @@ impl EffectCache {
         &mut self,
         asset: Handle<EffectAsset>,
         capacity: u32,
+        trail_capacity: u32,
         particle_layout: &ParticleLayout,
         property_layout: &PropertyLayout,
+        trail_chunks: TrailChunks,
         layout_flags: LayoutFlags,
         // pipeline: ComputePipeline,
         _queue: &RenderQueue,
@@ -644,10 +767,11 @@ impl EffectCache {
                     capacity, particle_layout, particle_layout.min_binding_size().get()
                 ));
                 trace!(
-                    "Creating new effect buffer #{} for effect {:?} (capacity={}, particle_layout={:?} item_size={}, byte_size={})",
+                    "Creating new effect buffer #{} for effect {:?} (capacity={}, trail_capacity={}, particle_layout={:?} item_size={}, byte_size={})",
                     buffer_index,
                     asset,
                     capacity,
+                    trail_capacity,
                     particle_layout,
                     particle_layout.min_binding_size().get(),
                     byte_size
@@ -655,8 +779,10 @@ impl EffectCache {
                 let mut buffer = EffectBuffer::new(
                     asset,
                     capacity,
+                    trail_capacity,
                     particle_layout.clone(),
                     property_layout.clone(),
+                    trail_chunks,
                     layout_flags,
                     //pipeline,
                     &self.device,
@@ -708,6 +834,38 @@ impl EffectCache {
         }
     }
 
+    /// An internal method that returns the indices of the head and tail chunks
+    /// in the trail chunk buffer for this frame.
+    ///
+    /// The head chunk is the index of the most recent trail chunk, while the
+    /// tail chunk is the index of the oldest trail chunk that is still alive.
+    ///
+    /// See the diagram in [`TrailChunks`] for more details.
+    pub fn get_trail_head_and_tail_chunks(
+        &self,
+        id: EffectCacheId,
+        tick: u32,
+    ) -> Option<(u32, u32)> {
+        self.effects
+            .get(&id)
+            .and_then(|(buffer_index, _)| self.buffers[*buffer_index].as_ref())
+            .map(|buffer| {
+                let head_index = tick as usize;
+
+                // Wrap around if necessary, since this is a ring buffer.
+                let tail_index = if head_index == buffer.trail_chunks.chunk_ids.len() - 1 {
+                    0
+                } else {
+                    head_index + 1
+                };
+
+                (
+                    buffer.trail_chunks.chunk_ids[head_index].0,
+                    buffer.trail_chunks.chunk_ids[tail_index].0,
+                )
+            })
+    }
+
     /// Get the zero-based index of the buffer. Used internally.
     pub(crate) fn buffer_index(&self, id: EffectCacheId) -> Option<usize> {
         self.effects.get(&id).map(|(buffer_index, _)| *buffer_index)
@@ -715,12 +873,15 @@ impl EffectCache {
 
     /// Remove an effect from the cache. If this was the last effect, drop the
     /// underlying buffer and return the index of the dropped buffer.
-    pub fn remove(&mut self, id: EffectCacheId) -> Option<u32> {
+    pub fn remove(&mut self, id: EffectCacheId) -> Option<DroppedEffectBuffer> {
         if let Some((buffer_index, slice)) = self.effects.remove(&id) {
             if let Some(buffer) = &mut self.buffers[buffer_index] {
                 if buffer.free_slice(slice) == BufferState::Free {
-                    self.buffers[buffer_index] = None;
-                    return Some(buffer_index as u32);
+                    let freed_buffer = self.buffers[buffer_index].take().unwrap();
+                    return Some(DroppedEffectBuffer {
+                        index: buffer_index as u32,
+                        trail_chunks: freed_buffer.trail_chunks,
+                    });
                 }
             }
         }
@@ -836,11 +997,14 @@ mod gpu_tests {
 
         let asset = Handle::<EffectAsset>::default();
         let capacity = 4096;
+        let trail_capacity = 0;
         let mut buffer = EffectBuffer::new(
             asset,
             capacity,
+            trail_capacity,
             l64.clone(),
             PropertyLayout::empty(), // not using properties
+            TrailChunks::default(),  // not using trails
             LayoutFlags::NONE,
             &render_device,
             Some("my_buffer"),
@@ -912,11 +1076,14 @@ mod gpu_tests {
         let asset = Handle::<EffectAsset>::default();
         let capacity = 2048; // EffectBuffer::MIN_CAPACITY;
         assert!(capacity >= 2048); // otherwise the logic below breaks
+        let trail_capacity = 0;
         let mut buffer = EffectBuffer::new(
             asset,
             capacity,
+            trail_capacity,
             l64.clone(),
             PropertyLayout::empty(), // not using properties
+            TrailChunks::default(),  // not using trails
             LayoutFlags::NONE,
             &render_device,
             Some("my_buffer"),
@@ -983,12 +1150,15 @@ mod gpu_tests {
         let asset = Handle::<EffectAsset>::default();
         let capacity = EffectBuffer::MIN_CAPACITY;
         let item_size = l32.size();
+        let trail_capacity = 0;
 
         let id1 = effect_cache.insert(
             asset.clone(),
             capacity,
+            trail_capacity,
             &l32,
             &empty_property_layout,
+            TrailChunks::default(),
             LayoutFlags::NONE,
             &render_queue,
         );
@@ -1004,8 +1174,10 @@ mod gpu_tests {
         let id2 = effect_cache.insert(
             asset.clone(),
             capacity,
+            trail_capacity,
             &l32,
             &empty_property_layout,
+            TrailChunks::default(),
             LayoutFlags::NONE,
             &render_queue,
         );
@@ -1018,9 +1190,9 @@ mod gpu_tests {
         assert_eq!(slice2.slice, 0..capacity);
         assert_eq!(effect_cache.buffers().len(), 2);
 
-        let buffer_index = effect_cache.remove(id1);
-        assert!(buffer_index.is_some());
-        assert_eq!(buffer_index.unwrap(), 0);
+        let dropped_buffer = effect_cache.remove(id1);
+        assert!(dropped_buffer.is_some());
+        assert_eq!(dropped_buffer.unwrap().index, 0);
         assert_eq!(effect_cache.buffers().len(), 2);
         {
             let buffers = effect_cache.buffers();
@@ -1032,8 +1204,10 @@ mod gpu_tests {
         let id3 = effect_cache.insert(
             asset,
             capacity,
+            trail_capacity,
             &l32,
             &empty_property_layout,
+            TrailChunks::default(),
             LayoutFlags::NONE,
             &render_queue,
         );
diff --git a/src/render/mod.rs b/src/render/mod.rs
index 890af81b..6fc2f18e 100644
--- a/src/render/mod.rs
+++ b/src/render/mod.rs
@@ -1,14 +1,11 @@
 #[cfg(feature = "2d")]
 use bevy::utils::FloatOrd;
 use bevy::{
-    asset::{AssetEvent, AssetId, Assets, Handle},
-    core::{Pod, Zeroable},
     ecs::{
         prelude::*,
         system::{lifetimeless::*, SystemParam, SystemState},
     },
     log::trace,
-    math::{Mat4, Vec3},
     prelude::*,
     render::{
         render_asset::RenderAssets,
@@ -16,23 +13,22 @@ use bevy::{
         render_phase::{Draw, DrawFunctions, PhaseItem, RenderPhase, TrackedRenderPass},
         render_resource::*,
         renderer::{RenderContext, RenderDevice, RenderQueue},
-        texture::{BevyDefault, Image},
+        texture::BevyDefault,
         view::{
-            ExtractedView, InheritedVisibility, Msaa, ViewTarget, ViewUniform, ViewUniformOffset,
-            ViewUniforms, ViewVisibility, VisibleEntities,
+            ExtractedView, ViewTarget, ViewUniform, ViewUniformOffset, ViewUniforms,
+            VisibleEntities,
         },
         Extract,
     },
-    time::Time,
-    transform::components::GlobalTransform,
     utils::HashMap,
 };
 use bitflags::bitflags;
+use bytemuck::{Pod, Zeroable};
 use naga_oil::compose::{Composer, NagaModuleDescriptor};
 use rand::random;
-use std::marker::PhantomData;
 use std::{
     borrow::Cow,
+    marker::PhantomData,
     num::{NonZeroU32, NonZeroU64},
 };
 
@@ -44,7 +40,10 @@ use bevy::core_pipeline::core_3d::{AlphaMask3d, Transparent3d};
 use crate::{
     asset::EffectAsset,
     next_multiple_of,
-    render::batch::{BatchInput, BatchState, Batcher, EffectBatch},
+    render::{
+        batch::{BatchInput, BatchState, Batcher, EffectBatch},
+        effect_cache::TrailChunks,
+    },
     spawn::EffectSpawner,
     CompiledParticleEffect, EffectProperties, EffectShader, HanabiPlugin, ParticleLayout,
     PropertyLayout, RemovedEffectsEvent, SimulationCondition,
@@ -148,6 +147,13 @@ struct GpuSimParams {
     ///
     /// This is only used by the `vfx_indirect` compute shader.
     dispatch_stride: u32,
+    /// Stride in bytes of the render indirect block that the trail rendering
+    /// pass uses, used to index the effect's block based on its index.
+    ///
+    /// This is only used by the `vfx_indirect` compute shader.
+    trail_render_stride: u32,
+    __pad1: u32,
+    __pad2: u32,
 }
 
 impl Default for GpuSimParams {
@@ -156,8 +162,11 @@ impl Default for GpuSimParams {
             delta_time: 0.04,
             time: 0.0,
             num_effects: 0,
-            render_stride: 0,   // invalid
-            dispatch_stride: 0, // invalid
+            render_stride: 0,       // invalid
+            dispatch_stride: 0,     // invalid
+            trail_render_stride: 0, // invalid
+            __pad1: 0,
+            __pad2: 0,
         }
     }
 }
@@ -236,6 +245,16 @@ pub(crate) struct GpuSpawnerParams {
     count: i32,
     /// Index of the effect into the indirect dispatch and render buffers.
     effect_index: u32,
+    /// Whether we should create a trail particle this frame.
+    spawn_trail_particle: u32,
+    /// Capacity of the trail buffer.
+    trail_capacity: u32,
+    /// Index within the chunk buffer of the index of the start of the most
+    /// recent chunk in the list.
+    trail_head_chunk: u32,
+    /// Index within the chunk buffer of the index of the start of the last
+    /// still-visible chunk in the list.
+    trail_tail_chunk: u32,
 }
 
 impl GpuSpawnerParams {
@@ -301,156 +320,57 @@ pub struct GpuRenderIndirect {
     //
     pub ping: u32,
     pub max_update: u32,
+    pub trail_index: u32,
     pub __pad1: u32,
-    pub __pad2: u32,
     // FIXME - min_storage_buffer_offset_alignment
 }
 
+/// The indirect buffer for trail rendering.
+///
+/// This specifies the set of trail particles within the trail particle buffer
+/// that are to be rendered this frame.
+///
+/// Note that, because the trail particle buffer is a ring buffer, it's
+/// entirely possible for the bounds of `(base_index, base_index +
+/// instance_count)` to be beyond the boundaries of that buffer. This is
+/// expected behavior, and the shader will perform the modulo operation
+/// correctly to look the particle up in the buffer.
+#[repr(C)]
+#[derive(Debug, Default, Clone, Copy, Pod, Zeroable, ShaderType)]
+pub struct GpuTrailRenderIndirect {
+    /// This is presently always 6, since we render quads.
+    pub vertex_count: u32,
+    pub instance_count: u32,
+    /// This is presently always 0, since we render quads.
+    pub base_index: u32,
+    pub base_instance: u32,
+}
+
 /// Compute pipeline to run the `vfx_indirect` dispatch workgroup calculation
 /// shader.
+///
+/// Two versions of the pipeline are present: one for effects in which trails
+/// are enabled, and one for effects that have no trails. This avoids the need
+/// to allocate a trail buffer unnecessarily.
 #[derive(Resource)]
 pub(crate) struct DispatchIndirectPipeline {
-    dispatch_indirect_layout: BindGroupLayout,
-    pipeline: ComputePipeline,
+    dispatch_indirect_layout_no_trails: BindGroupLayout,
+    dispatch_indirect_layout_trails: BindGroupLayout,
+    pipeline_no_trails: ComputePipeline,
+    pipeline_trails: ComputePipeline,
 }
 
 impl FromWorld for DispatchIndirectPipeline {
     fn from_world(world: &mut World) -> Self {
-        let world = world.cell();
-        let render_device = world.get_resource::<RenderDevice>().unwrap();
-
-        // The GpuSpawnerParams is bound as an array element or as a standalone struct,
-        // so needs the proper align. Because WGSL removed the @stride attribute, we pad
-        // the WGSL type manually, so need to enforce min_binding_size everywhere.
-        let item_align = render_device.limits().min_storage_buffer_offset_alignment as usize;
-        let spawner_aligned_size = GpuSpawnerParams::aligned_size(item_align);
-        trace!(
-            "Aligning spawner params to {} bytes as device limits requires. Size: {} bytes.",
-            item_align,
-            spawner_aligned_size
-        );
-
-        trace!(
-            "GpuRenderIndirect: min_size={} | GpuDispatchIndirect: min_size={}",
-            GpuRenderIndirect::min_size(),
-            GpuDispatchIndirect::min_size()
-        );
-        let dispatch_indirect_layout = render_device.create_bind_group_layout(
-            "hanabi:bind_group_layout:dispatch_indirect_dispatch_indirect",
-            &[
-                BindGroupLayoutEntry {
-                    binding: 0,
-                    visibility: ShaderStages::COMPUTE,
-                    ty: BindingType::Buffer {
-                        ty: BufferBindingType::Storage { read_only: false },
-                        has_dynamic_offset: false,
-                        min_binding_size: Some(GpuRenderIndirect::min_size()),
-                    },
-                    count: None,
-                },
-                BindGroupLayoutEntry {
-                    binding: 1,
-                    visibility: ShaderStages::COMPUTE,
-                    ty: BindingType::Buffer {
-                        ty: BufferBindingType::Storage { read_only: false },
-                        has_dynamic_offset: false,
-                        min_binding_size: Some(GpuDispatchIndirect::min_size()),
-                    },
-                    count: None,
-                },
-                BindGroupLayoutEntry {
-                    binding: 2,
-                    visibility: ShaderStages::COMPUTE,
-                    ty: BindingType::Buffer {
-                        ty: BufferBindingType::Storage { read_only: true },
-                        has_dynamic_offset: false,
-                        min_binding_size: Some(
-                            NonZeroU64::new(spawner_aligned_size as u64).unwrap(),
-                        ),
-                    },
-                    count: None,
-                },
-            ],
-        );
-
-        trace!("GpuSimParams: min_size={}", GpuSimParams::min_size());
-        let sim_params_layout = render_device.create_bind_group_layout(
-            "hanabi:bind_group_layout:dispatch_indirect_sim_params",
-            &[BindGroupLayoutEntry {
-                binding: 0,
-                visibility: ShaderStages::COMPUTE,
-                ty: BindingType::Buffer {
-                    ty: BufferBindingType::Uniform,
-                    has_dynamic_offset: false,
-                    min_binding_size: Some(GpuSimParams::min_size()),
-                },
-                count: None,
-            }],
-        );
-
-        let pipeline_layout = render_device.create_pipeline_layout(&PipelineLayoutDescriptor {
-            label: Some("hanabi:pipeline_layout:dispatch_indirect"),
-            bind_group_layouts: &[&dispatch_indirect_layout, &sim_params_layout],
-            push_constant_ranges: &[],
-        });
-
-        // We need to pad the Spawner WGSL struct based on the device padding so that we
-        // can use it as an array element but also has a direct struct binding.
-        let spawner_padding_code = GpuSpawnerParams::padding_code(item_align);
-        let indirect_code =
-            include_str!("vfx_indirect.wgsl").replace("{{SPAWNER_PADDING}}", &spawner_padding_code);
-
-        // Resolve imports. Because we don't insert this shader into Bevy' pipeline
-        // cache, we don't get that part "for free", so we have to do it manually here.
-        let indirect_naga_module = {
-            let mut composer = Composer::default();
-
-            // Import bevy_hanabi::vfx_common
-            {
-                let common_shader = HanabiPlugin::make_common_shader(item_align);
-                let mut desc: naga_oil::compose::ComposableModuleDescriptor<'_> =
-                    (&common_shader).into();
-                desc.shader_defs.insert(
-                    "SPAWNER_PADDING".to_string(),
-                    naga_oil::compose::ShaderDefValue::Bool(true),
-                );
-                let res = composer.add_composable_module(desc);
-                assert!(res.is_ok());
-            }
-
-            let shader_defs = default();
-
-            match composer.make_naga_module(NagaModuleDescriptor {
-                source: &indirect_code,
-                file_path: "vfx_indirect.wgsl",
-                shader_defs,
-                ..Default::default()
-            }) {
-                Ok(naga_module) => ShaderSource::Naga(Cow::Owned(naga_module)),
-                Err(compose_error) => panic!(
-                    "Failed to compose vfx_indirect.wgsl, naga_oil returned: {}",
-                    compose_error.emit_to_string(&composer)
-                ),
-            }
-        };
-
-        debug!("Create indirect dispatch shader:\n{}", indirect_code);
-
-        let shader_module = render_device.create_shader_module(ShaderModuleDescriptor {
-            label: Some("hanabi:vfx_indirect_shader"),
-            source: indirect_naga_module,
-        });
-
-        let pipeline = render_device.create_compute_pipeline(&RawComputePipelineDescriptor {
-            label: Some("hanabi:compute_pipeline:dispatch_indirect"),
-            layout: Some(&pipeline_layout),
-            module: &shader_module,
-            entry_point: "main",
-        });
-
-        Self {
-            dispatch_indirect_layout,
-            pipeline,
+        let (dispatch_indirect_layout_no_trails, pipeline_no_trails) =
+            create_dispatch_indirect_pipeline(world, false);
+        let (dispatch_indirect_layout_trails, pipeline_trails) =
+            create_dispatch_indirect_pipeline(world, true);
+        DispatchIndirectPipeline {
+            dispatch_indirect_layout_no_trails,
+            dispatch_indirect_layout_trails,
+            pipeline_no_trails,
+            pipeline_trails,
         }
     }
 }
@@ -545,6 +465,10 @@ pub(crate) struct ParticleInitPipelineKey {
     /// Minimum binding size in bytes for the property layout buffer, if the
     /// effect has any property. Otherwise this is `None`.
     property_layout_min_binding_size: Option<NonZeroU64>,
+    /// Whether the particle trail buffer has been allocated.
+    trail_buffer_present: bool,
+    /// Whether trails are active (enabled with a nonzero length).
+    trails_active: bool,
 }
 
 impl SpecializedComputePipeline for ParticlesInitPipeline {
@@ -559,42 +483,11 @@ impl SpecializedComputePipeline for ParticlesInitPipeline {
                 .unwrap_or(0),
         );
 
-        let mut entries = Vec::with_capacity(3);
-        // (1,0) ParticleBuffer
-        entries.push(BindGroupLayoutEntry {
-            binding: 0,
-            visibility: ShaderStages::COMPUTE,
-            ty: BindingType::Buffer {
-                ty: BufferBindingType::Storage { read_only: false },
-                has_dynamic_offset: true,
-                min_binding_size: Some(key.particle_layout_min_binding_size),
-            },
-            count: None,
-        });
-        // (1,1) IndirectBuffer
-        entries.push(BindGroupLayoutEntry {
-            binding: 1,
-            visibility: ShaderStages::COMPUTE,
-            ty: BindingType::Buffer {
-                ty: BufferBindingType::Storage { read_only: false },
-                has_dynamic_offset: true,
-                min_binding_size: BufferSize::new(12),
-            },
-            count: None,
-        });
-        if let Some(min_binding_size) = key.property_layout_min_binding_size {
-            // (1,2) Properties
-            entries.push(BindGroupLayoutEntry {
-                binding: 2,
-                visibility: ShaderStages::COMPUTE,
-                ty: BindingType::Buffer {
-                    ty: BufferBindingType::Storage { read_only: true },
-                    has_dynamic_offset: false, // TODO
-                    min_binding_size: Some(min_binding_size),
-                },
-                count: None,
-            });
-        }
+        let entries = particle_bind_group_layout_entries(
+            key.particle_layout_min_binding_size,
+            key.property_layout_min_binding_size,
+            key.trail_buffer_present,
+        );
 
         let label = "hanabi:init_particles_buffer_layout";
         trace!(
@@ -625,7 +518,6 @@ pub(crate) struct ParticlesUpdatePipeline {
     render_device: RenderDevice,
     sim_params_layout: BindGroupLayout,
     spawner_buffer_layout: BindGroupLayout,
-    render_indirect_layout: BindGroupLayout,
 }
 
 impl FromWorld for ParticlesUpdatePipeline {
@@ -677,25 +569,11 @@ impl FromWorld for ParticlesUpdatePipeline {
             "GpuRenderIndirect: min_size={}",
             GpuRenderIndirect::min_size()
         );
-        let render_indirect_layout = render_device.create_bind_group_layout(
-            "hanabi:update_render_indirect_layout",
-            &[BindGroupLayoutEntry {
-                binding: 0,
-                visibility: ShaderStages::COMPUTE,
-                ty: BindingType::Buffer {
-                    ty: BufferBindingType::Storage { read_only: false },
-                    has_dynamic_offset: true,
-                    min_binding_size: Some(GpuRenderIndirect::min_size()),
-                },
-                count: None,
-            }],
-        );
 
         Self {
             render_device: render_device.clone(),
             sim_params_layout,
             spawner_buffer_layout,
-            render_indirect_layout,
         }
     }
 }
@@ -708,6 +586,10 @@ pub(crate) struct ParticleUpdatePipelineKey {
     particle_layout: ParticleLayout,
     /// Property layout.
     property_layout: PropertyLayout,
+    /// Whether the particle trail buffer has been allocated.
+    trail_buffer_present: bool,
+    /// Whether trails are active (enabled with a nonzero length).
+    trails_active: bool,
 }
 
 impl SpecializedComputePipeline for ParticlesUpdatePipeline {
@@ -724,48 +606,58 @@ impl SpecializedComputePipeline for ParticlesUpdatePipeline {
             },
         );
 
-        let mut entries = vec![
-            BindGroupLayoutEntry {
-                binding: 0,
-                visibility: ShaderStages::COMPUTE,
-                ty: BindingType::Buffer {
-                    ty: BufferBindingType::Storage { read_only: false },
-                    has_dynamic_offset: true,
-                    min_binding_size: Some(key.particle_layout.min_binding_size()),
-                },
-                count: None,
+        let property_layout_min_binding_size = if key.property_layout.is_empty() {
+            None
+        } else {
+            Some(key.property_layout.min_binding_size())
+        };
+        let entries = particle_bind_group_layout_entries(
+            key.particle_layout.min_binding_size(),
+            property_layout_min_binding_size,
+            key.trail_buffer_present,
+        );
+
+        let label = "hanabi:update_particles_buffer_layout";
+        trace!(
+            "Creating particle bind group layout '{}' for update pass with {} entries.",
+            label,
+            entries.len()
+        );
+        let particles_buffer_layout = self.render_device.create_bind_group_layout(label, &entries);
+
+        let mut entries = vec![BindGroupLayoutEntry {
+            binding: 0,
+            visibility: ShaderStages::COMPUTE,
+            ty: BindingType::Buffer {
+                ty: BufferBindingType::Storage { read_only: false },
+                has_dynamic_offset: true,
+                min_binding_size: Some(GpuRenderIndirect::min_size()),
             },
-            BindGroupLayoutEntry {
+            count: None,
+        }];
+        if key.trail_buffer_present {
+            entries.push(BindGroupLayoutEntry {
                 binding: 1,
                 visibility: ShaderStages::COMPUTE,
                 ty: BindingType::Buffer {
                     ty: BufferBindingType::Storage { read_only: false },
                     has_dynamic_offset: true,
-                    min_binding_size: BufferSize::new(12),
-                },
-                count: None,
-            },
-        ];
-        if !key.property_layout.is_empty() {
-            entries.push(BindGroupLayoutEntry {
-                binding: 2,
-                visibility: ShaderStages::COMPUTE,
-                ty: BindingType::Buffer {
-                    ty: BufferBindingType::Storage { read_only: true },
-                    has_dynamic_offset: false, // TODO
-                    min_binding_size: Some(key.property_layout.min_binding_size()),
+                    min_binding_size: Some(GpuTrailRenderIndirect::min_size()),
                 },
                 count: None,
             });
         }
+        let render_indirect_layout = self
+            .render_device
+            .create_bind_group_layout("hanabi:update_render_indirect_layout", &entries);
 
-        let label = "hanabi:update_particles_buffer_layout";
-        trace!(
-            "Creating particle bind group layout '{}' for update pass with {} entries.",
-            label,
-            entries.len()
-        );
-        let particles_buffer_layout = self.render_device.create_bind_group_layout(label, &entries);
+        let mut shader_defs = vec!["RI_MAX_SPAWN_ATOMIC".into()];
+        if key.trail_buffer_present {
+            shader_defs.push("TRI_TRAIL_INDEX_ATOMIC".into());
+        }
+        if key.trails_active {
+            shader_defs.push("TRAILS".into());
+        }
 
         ComputePipelineDescriptor {
             label: Some("hanabi:pipeline_update_compute".into()),
@@ -773,10 +665,10 @@ impl SpecializedComputePipeline for ParticlesUpdatePipeline {
                 self.sim_params_layout.clone(),
                 particles_buffer_layout,
                 self.spawner_buffer_layout.clone(),
-                self.render_indirect_layout.clone(),
+                render_indirect_layout,
             ],
             shader: key.shader,
-            shader_defs: vec!["RI_MAX_SPAWN_ATOMIC".into()],
+            shader_defs,
             entry_point: "main".into(),
             push_constant_ranges: Vec::new(),
         }
@@ -880,6 +772,18 @@ pub(crate) struct ParticleRenderPipelineKey {
     /// The effect is rendered with flipbook texture animation based on the
     /// sprite index of each particle.
     flipbook: bool,
+    /// Key: TRAILS_BUFFER_PRESENT
+    /// A particle trails buffer is allocated.
+    trails_buffer_present: bool,
+    /// Particle trails are active (present with a nonzero length).
+    ///
+    /// This is separate from `trails_buffer_present` because it's perfectly
+    /// legal to have a nonzero trail particle capacity with zero trail length.
+    /// For example, the game might want to temporarily disable trail rendering
+    /// by setting the trail length to 0. In this case, we need to keep the
+    /// trail buffers around and attach them to the bind groups, but we should
+    /// omit the actual shader code that manages the buffers.
+    trails_active: bool,
     /// For dual-mode configurations only, the actual mode of the current render
     /// pipeline. Otherwise the mode is implicitly determined by the active
     /// feature.
@@ -900,6 +804,8 @@ impl Default for ParticleRenderPipelineKey {
             local_space_simulation: false,
             use_alpha_mask: false,
             flipbook: false,
+            trails_buffer_present: false,
+            trails_active: false,
             #[cfg(all(feature = "2d", feature = "3d"))]
             pipeline_mode: PipelineMode::Camera3d,
             msaa_samples: Msaa::default().samples(),
@@ -994,7 +900,7 @@ impl SpecializedRenderPipeline for ParticlesRenderPipeline {
                     min_binding_size: Some(GpuSpawnerParams::min_size()),
                 },
                 count: None,
-            });
+            })
         }
 
         trace!(
@@ -1041,6 +947,10 @@ impl SpecializedRenderPipeline for ParticlesRenderPipeline {
             shader_defs.push("FLIPBOOK".into());
         }
 
+        if key.trails_active {
+            shader_defs.push("TRAILS".into());
+        }
+
         #[cfg(all(feature = "2d", feature = "3d"))]
         let depth_stencil = match key.pipeline_mode {
             // Bevy's Transparent2d render phase doesn't support a depth-stencil buffer.
@@ -1142,6 +1052,17 @@ pub(crate) struct ExtractedEffect {
     ///
     /// [`EffectSpawner::tick()`]: crate::EffectSpawner::tick
     pub spawn_count: u32,
+    /// Whether a trail particle is due to be spawned this frame.
+    /// Obtained from calling [`EffectSpawner::tick()`] on the source effect
+    /// instance.
+    pub spawn_trail_particle: bool,
+    /// The trail length.
+    pub trail_length: u32,
+    /// The total number of trail particles that can exist at one time.
+    pub trail_capacity: u32,
+    /// The total number of trail chunks that have been spawned since this
+    /// effect has existed.
+    pub trail_tick: u32,
     /// Global transform of the effect origin, extracted from the
     /// [`GlobalTransform`].
     pub transform: Mat4,
@@ -1172,6 +1093,8 @@ pub struct AddedEffect {
     /// Capacity of the effect (and therefore, the particle buffer), in number
     /// of particles.
     pub capacity: u32,
+    pub trail_capacity: u32,
+    pub trail_length: u32,
     /// Layout of particle attributes.
     pub particle_layout: ParticleLayout,
     /// Layout of properties for the effect, if properties are used at all, or
@@ -1298,6 +1221,8 @@ pub(crate) fn extract_effects(
             AddedEffect {
                 entity,
                 capacity: asset.capacity(),
+                trail_capacity: asset.trail_capacity(),
+                trail_length: asset.spawner.trail_length(),
                 particle_layout,
                 property_layout,
                 layout_flags: effect.layout_flags,
@@ -1335,6 +1260,9 @@ pub(crate) fn extract_effects(
 
         // Retrieve other values from the compiled effect
         let spawn_count = spawner.spawn_count();
+        let spawn_trail_particle = spawner.spawn_trail_particle();
+        let trail_length = spawner.spawner().trail_length();
+        let trail_tick = spawner.trail_tick();
 
         // Check if asset is available, otherwise silently ignore
         let Some(asset) = effects.get(&effect.asset) else {
@@ -1345,6 +1273,8 @@ pub(crate) fn extract_effects(
             continue;
         };
 
+        let trail_capacity = asset.trail_capacity();
+
         #[cfg(feature = "2d")]
         let z_sort_key_2d = effect.z_layer_2d;
 
@@ -1389,6 +1319,10 @@ pub(crate) fn extract_effects(
                 property_layout,
                 property_data,
                 spawn_count,
+                spawn_trail_particle,
+                trail_length,
+                trail_capacity,
+                trail_tick,
                 transform: transform.compute_matrix(),
                 // TODO - more efficient/correct way than inverse()?
                 inverse_transform: transform.compute_matrix().inverse(),
@@ -1429,6 +1363,7 @@ struct GpuLimits {
     ///
     /// [`WgpuLimits::min_storage_buffer_offset_alignment`]: bevy::render::settings::WgpuLimits::min_storage_buffer_offset_alignment
     render_indirect_aligned_size: NonZeroU32,
+    trail_render_indirect_aligned_size: NonZeroU32,
 }
 
 impl GpuLimits {
@@ -1447,17 +1382,25 @@ impl GpuLimits {
         ) as u32)
         .unwrap();
 
+        let trail_render_indirect_aligned_size = NonZeroU32::new(next_multiple_of(
+            GpuTrailRenderIndirect::min_size().get() as usize,
+            storage_buffer_align as usize,
+        ) as u32)
+        .unwrap();
+
         trace!(
-            "GpuLimits: storage_buffer_align={} gpu_dispatch_indirect_aligned_size={} gpu_render_indirect_aligned_size={}",
+            "GpuLimits: storage_buffer_align={} gpu_dispatch_indirect_aligned_size={} gpu_render_indirect_aligned_size={} gpu_trail_render_indirect_aligned_size={}",
             storage_buffer_align,
             dispatch_indirect_aligned_size.get(),
-            render_indirect_aligned_size.get()
+            render_indirect_aligned_size.get(),
+            trail_render_indirect_aligned_size.get(),
         );
 
         Self {
             storage_buffer_align: NonZeroU32::new(storage_buffer_align).unwrap(),
             dispatch_indirect_aligned_size,
             render_indirect_aligned_size,
+            trail_render_indirect_aligned_size,
         }
     }
 
@@ -1472,6 +1415,10 @@ impl GpuLimits {
     pub fn render_indirect_offset(&self, buffer_index: u32) -> u64 {
         self.render_indirect_aligned_size.get() as u64 * buffer_index as u64
     }
+
+    pub fn trail_render_indirect_offset(&self, buffer_index: u32) -> u64 {
+        self.trail_render_indirect_aligned_size.get() as u64 * buffer_index as u64
+    }
 }
 
 /// Global resource containing the GPU data to draw all the particle effects in
@@ -1505,14 +1452,18 @@ pub struct EffectsMeta {
     /// Bind group #3 of the vfx_init shader, containing the indirect render
     /// buffer.
     init_render_indirect_bind_group: Option<BindGroup>,
-    /// Bind group #3 of the vfx_update shader, containing the indirect render
-    /// buffer.
-    update_render_indirect_bind_group: Option<BindGroup>,
 
     sim_params_uniforms: UniformBuffer<GpuSimParams>,
     spawner_buffer: AlignedBufferVec<GpuSpawnerParams>,
     dispatch_indirect_buffer: BufferTable<GpuDispatchIndirect>,
     render_dispatch_buffer: BufferTable<GpuRenderIndirect>,
+    /// The indirect drawing buffer for particle trails.
+    trail_render_dispatch_buffer: BufferTable<GpuTrailRenderIndirect>,
+    /// Holds indices of chunks that refer to the portions of the particle trail
+    /// buffer.
+    ///
+    /// See the documentation for [`TrailChunks`] for more information.
+    trail_chunk_buffer: BufferTable<u32>,
     /// Unscaled vertices of the mesh of a single particle, generally a quad.
     /// The mesh is later scaled during rendering by the "particle size".
     // FIXME - This is a per-effect thing, unless we merge all meshes into a single buffer (makes
@@ -1523,6 +1474,8 @@ pub struct EffectsMeta {
     /// Various GPU limits and aligned sizes lazily allocated and cached for
     /// convenience.
     gpu_limits: GpuLimits,
+    /// Whether any particle trail buffer has been allocated.
+    trail_buffer_present: bool,
 }
 
 impl EffectsMeta {
@@ -1555,7 +1508,6 @@ impl EffectsMeta {
             spawner_bind_group: None,
             dr_indirect_bind_group: None,
             init_render_indirect_bind_group: None,
-            update_render_indirect_bind_group: None,
             sim_params_uniforms: UniformBuffer::default(),
             spawner_buffer: AlignedBufferVec::new(
                 BufferUsages::STORAGE,
@@ -1576,9 +1528,20 @@ impl EffectsMeta {
                 NonZeroU64::new(item_align),
                 Some("hanabi:buffer:render_dispatch".to_string()),
             ),
+            trail_render_dispatch_buffer: BufferTable::new(
+                BufferUsages::STORAGE | BufferUsages::INDIRECT,
+                NonZeroU64::new(item_align),
+                Some("hanabi:buffer:trail_render_dispatch".to_string()),
+            ),
+            trail_chunk_buffer: BufferTable::new(
+                BufferUsages::STORAGE,
+                NonZeroU64::new(item_align),
+                Some("hanabi:buffer:trail_chunk".to_string()),
+            ),
             vertices,
             indirect_dispatch_pipeline: None,
             gpu_limits,
+            trail_buffer_present: false,
         }
     }
 
@@ -1606,19 +1569,27 @@ impl EffectsMeta {
                     entity,
                     id
                 );
-                if let Some(buffer_index) = self.effect_cache.remove(id) {
+                if let Some(dropped_buffer) = self.effect_cache.remove(id) {
                     // Clear bind groups associated with the removed buffer
                     trace!(
                         "=> GPU buffer #{} gone, destroying its bind groups...",
-                        buffer_index
+                        dropped_buffer.index
                     );
-                    effect_bind_groups.particle_buffers.remove(&buffer_index);
+                    effect_bind_groups
+                        .particle_buffers
+                        .remove(&dropped_buffer.index);
 
                     // NOTE: by convention (see assert below) the cache ID is also the table ID, as
                     // those 3 data structures stay in sync.
-                    let table_id = BufferTableId(buffer_index);
+                    let table_id = BufferTableId(dropped_buffer.index);
                     self.dispatch_indirect_buffer.remove(table_id);
                     self.render_dispatch_buffer.remove(table_id);
+                    self.trail_render_dispatch_buffer.remove(table_id);
+
+                    // Delete all the buffer's trail chunk slots.
+                    for trail_chunk_id in dropped_buffer.trail_chunks.chunk_ids {
+                        self.trail_chunk_buffer.remove(trail_chunk_id);
+                    }
                 }
             }
         }
@@ -1631,11 +1602,20 @@ impl EffectsMeta {
 
         trace!("Adding {} newly spawned effects", added_effects.len());
         for added_effect in added_effects.drain(..) {
+            // Build a circular list of chunks.
+            let trail_chunks = TrailChunks {
+                chunk_ids: (0..=added_effect.trail_length)
+                    .map(|_| self.trail_chunk_buffer.insert(0))
+                    .collect::<Vec<_>>(),
+            };
+
             let cache_id = self.effect_cache.insert(
                 added_effect.handle,
                 added_effect.capacity,
+                added_effect.trail_capacity,
                 &added_effect.particle_layout,
                 &added_effect.property_layout,
+                trail_chunks,
                 added_effect.layout_flags,
                 // update_pipeline.pipeline.clone(),
                 render_queue,
@@ -1675,6 +1655,20 @@ impl EffectsMeta {
                 "Broken table invariant: buffer={} row={}",
                 index, table_id.0
             );
+
+            let table_id = self
+                .trail_render_dispatch_buffer
+                .insert(GpuTrailRenderIndirect {
+                    vertex_count: 6, // TODO - Flexible vertex count and mesh particles
+                    ..default()
+                });
+            // FIXME - Should have a single index and table bookeeping data structure, used
+            // by multiple buffers
+            assert_eq!(
+                table_id.0 as usize, index,
+                "Broken table invariant: buffer={} row={}",
+                index, table_id.0
+            );
         }
 
         // Once all changes are applied, immediately schedule any GPU buffer
@@ -1687,13 +1681,16 @@ impl EffectsMeta {
             // All those bind groups use the indirect buffer so need to be re-created.
             effect_bind_groups.particle_buffers.clear();
         }
-        if self
-            .render_dispatch_buffer
-            .allocate_gpu(render_device, render_queue)
-        {
-            // Currently we always re-create each frame any bind group that
-            // binds this buffer, so there's nothing to do here.
-        }
+
+        // We always currently recreate any bind group that binds any of these
+        // buffers every frame, so we don't need to do anything special if the
+        // buffer was allocated.
+        self.render_dispatch_buffer
+            .allocate_gpu(render_device, render_queue);
+        self.trail_render_dispatch_buffer
+            .allocate_gpu(render_device, render_queue);
+        self.trail_chunk_buffer
+            .allocate_gpu(render_device, render_queue);
     }
 }
 
@@ -1720,6 +1717,9 @@ bitflags! {
         const USE_ALPHA_MASK = (1 << 3);
         /// The effect is rendered with flipbook texture animation based on the [`Attribute::SPRITE_INDEX`] of each particle.
         const FLIPBOOK = (1 << 4);
+        /// A buffer for particle trails has been allocated (whether or not
+        /// trails are currently enabled).
+        const TRAILS_BUFFER_PRESENT = (1 << 5);
     }
 }
 
@@ -1758,8 +1758,6 @@ pub(crate) fn prepare_effects(
         .vertices
         .write_buffer(&render_device, &render_queue);
 
-    effects_meta.indirect_dispatch_pipeline = Some(dispatch_indirect_pipeline.pipeline.clone());
-
     // Clear last frame's buffer resizes which may have occured during last frame,
     // during `Node::run()` while the `BufferTable` could not be mutated.
     effects_meta
@@ -1793,12 +1791,21 @@ pub(crate) fn prepare_effects(
 
     // Build batcher inputs from extracted effects
     let effects = std::mem::take(&mut extracted_effects.effects);
+    effects_meta.trail_buffer_present = false;
     let mut effect_entity_list = effects
         .into_iter()
         .map(|(entity, extracted_effect)| {
             let id = *effects_meta.entity_map.get(&entity).unwrap();
             let property_buffer = effects_meta.effect_cache.get_property_buffer(id).cloned(); // clone handle for lifetime
             let effect_slice = effects_meta.effect_cache.get_slice(id);
+            let (trail_head_chunk, trail_tail_chunk) = effects_meta
+                .effect_cache
+                .get_trail_head_and_tail_chunks(id, extracted_effect.trail_tick)
+                .unwrap_or_default();
+
+            if extracted_effect.trail_capacity > 0 {
+                effects_meta.trail_buffer_present = true;
+            }
 
             BatchInput {
                 handle: extracted_effect.handle,
@@ -1809,6 +1816,11 @@ pub(crate) fn prepare_effects(
                 layout_flags: extracted_effect.layout_flags,
                 image_handle: extracted_effect.image_handle,
                 spawn_count: extracted_effect.spawn_count,
+                spawn_trail_particle: extracted_effect.spawn_trail_particle,
+                trail_length: extracted_effect.trail_length,
+                trail_capacity: extracted_effect.trail_capacity,
+                trail_head_chunk,
+                trail_tail_chunk,
                 transform: extracted_effect.transform.into(),
                 inverse_transform: extracted_effect.inverse_transform.into(),
                 property_buffer,
@@ -1820,6 +1832,12 @@ pub(crate) fn prepare_effects(
         .collect::<Vec<_>>();
     trace!("Collected {} extracted effects", effect_entity_list.len());
 
+    effects_meta.indirect_dispatch_pipeline = if effects_meta.trail_buffer_present {
+        Some(dispatch_indirect_pipeline.pipeline_trails.clone())
+    } else {
+        Some(dispatch_indirect_pipeline.pipeline_no_trails.clone())
+    };
+
     // Sort first by effect buffer index, then by slice range (see EffectSlice)
     // inside that buffer. This is critical for batching to work, because
     // batching effects is based on compatible items, which implies same GPU
@@ -1857,6 +1875,8 @@ pub(crate) fn prepare_effects(
                         } else {
                             Some(input.property_layout.min_binding_size())
                         },
+                        trail_buffer_present: input.trail_capacity > 0,
+                        trails_active: input.trail_capacity > 0 && input.trail_length > 0,
                     },
                 );
                 trace!("Init pipeline specialized: id={:?}", init_pipeline_id);
@@ -1875,6 +1895,8 @@ pub(crate) fn prepare_effects(
                         shader: input.effect_shader.update.clone(),
                         particle_layout: input.effect_slice.particle_layout.clone(),
                         property_layout: input.property_layout.clone(),
+                        trail_buffer_present: input.trail_capacity > 0,
+                        trails_active: input.trail_capacity > 0 && input.trail_length > 0,
                     },
                 );
                 trace!("Update pipeline specialized: id={:?}", update_pipeline_id);
@@ -1924,6 +1946,10 @@ pub(crate) fn prepare_effects(
                     // but the group_index is the index of the particle buffer, which can
                     // in theory (with batching) contain > 1 effect per buffer.
                     effect_index: input.effect_slice.group_index,
+                    spawn_trail_particle: input.spawn_trail_particle as _,
+                    trail_capacity: input.trail_capacity,
+                    trail_head_chunk: input.trail_head_chunk,
+                    trail_tail_chunk: input.trail_tail_chunk,
                 };
                 trace!("spawner_params = {:?}", spawner_params);
                 effects_meta.spawner_buffer.push(spawner_params);
@@ -2000,6 +2026,10 @@ pub(crate) fn prepare_effects(
             GpuDispatchIndirect::min_size().get() as usize,
             storage_align,
         ) as u32;
+        gpu_sim_params.trail_render_stride = next_multiple_of(
+            GpuTrailRenderIndirect::min_size().get() as usize,
+            storage_align,
+        ) as u32;
 
         trace!(
                 "Simulation parameters: time={} delta_time={} num_effects={} render_stride={} dispatch_stride={}",
@@ -2033,6 +2063,15 @@ pub(crate) struct BufferBindGroups {
     /// @binding(2) var<storage, read> dispatch_indirect : DispatchIndirect;
     /// ```
     render: BindGroup,
+    /// Bind group for the trail render graphic shader.
+    ///
+    /// ```wgsl
+    /// @binding(0) var<storage, read> particle_buffer : ParticleBuffer;
+    /// @binding(1) var<storage, read> indirect_buffer : IndirectBuffer;
+    /// @binding(2) var<storage, read> dispatch_indirect : DispatchIndirect;
+    /// ```
+    trail_render: Option<BindGroup>,
+    update_render_indirect: BindGroup,
 }
 
 #[derive(Default, Resource)]
@@ -2055,6 +2094,18 @@ impl EffectBindGroups {
             .get(&buffer_index)
             .map(|bg| &bg.render)
     }
+
+    pub fn trail_particle_render(&self, buffer_index: u32) -> Option<&BindGroup> {
+        self.particle_buffers
+            .get(&buffer_index)
+            .and_then(|bg| bg.trail_render.as_ref())
+    }
+
+    pub fn update_render_indirect(&self, buffer_index: u32) -> Option<&BindGroup> {
+        self.particle_buffers
+            .get(&buffer_index)
+            .map(|bg| &bg.update_render_indirect)
+    }
 }
 
 #[derive(SystemParam)]
@@ -2188,6 +2239,10 @@ fn emit_draw<T, F>(
                 .contains(LayoutFlags::LOCAL_SPACE_SIMULATION);
             let use_alpha_mask = batch.layout_flags.contains(LayoutFlags::USE_ALPHA_MASK);
             let flipbook = batch.layout_flags.contains(LayoutFlags::FLIPBOOK);
+            let trails_buffer_present = batch
+                .layout_flags
+                .contains(LayoutFlags::TRAILS_BUFFER_PRESENT);
+            let trails_active = batch.trails_active;
 
             // Specialize the render pipeline based on the effect batch
             trace!(
@@ -2208,6 +2263,8 @@ fn emit_draw<T, F>(
                     local_space_simulation,
                     use_alpha_mask,
                     flipbook,
+                    trails_buffer_present,
+                    trails_active,
                     #[cfg(all(feature = "2d", feature = "3d"))]
                     pipeline_mode,
                     msaa_samples,
@@ -2313,59 +2370,67 @@ pub(crate) fn queue_effects(
     ));
 
     // Create the bind group for the indirect dispatch of all effects
-    effects_meta.dr_indirect_bind_group = Some(
-        render_device.create_bind_group(
-            "hanabi:bind_group_vfx_indirect_dr_indirect",
-            &read_params
-                .dispatch_indirect_pipeline
-                .dispatch_indirect_layout,
-            &[
-                BindGroupEntry {
-                    binding: 0,
-                    resource: BindingResource::Buffer(BufferBinding {
-                        buffer: effects_meta.render_dispatch_buffer.buffer().unwrap(),
-                        offset: 0,
-                        size: None, //NonZeroU64::new(256), // Some(GpuRenderIndirect::min_size()),
-                    }),
-                },
-                BindGroupEntry {
-                    binding: 1,
-                    resource: BindingResource::Buffer(BufferBinding {
-                        buffer: effects_meta.dispatch_indirect_buffer.buffer().unwrap(),
-                        offset: 0,
-                        size: None, //NonZeroU64::new(256), // Some(GpuDispatchIndirect::min_size()),
-                    }),
-                },
-                BindGroupEntry {
-                    binding: 2,
-                    resource: BindingResource::Buffer(BufferBinding {
-                        buffer: effects_meta.spawner_buffer.buffer().unwrap(),
-                        offset: 0,
-                        size: None,
-                    }),
-                },
-            ],
-        ),
-    );
-
-    // Create the bind group for the indirect render buffer use in the init shader
-    effects_meta.init_render_indirect_bind_group = Some(render_device.create_bind_group(
-        "hanabi:bind_group_init_render_dispatch",
-        &read_params.init_pipeline.render_indirect_layout,
-        &[BindGroupEntry {
+    let mut entries = vec![
+        BindGroupEntry {
             binding: 0,
             resource: BindingResource::Buffer(BufferBinding {
                 buffer: effects_meta.render_dispatch_buffer.buffer().unwrap(),
                 offset: 0,
-                size: Some(GpuRenderIndirect::min_size()),
+                size: None, //NonZeroU64::new(256), // Some(GpuRenderIndirect::min_size()),
             }),
-        }],
+        },
+        BindGroupEntry {
+            binding: 1,
+            resource: BindingResource::Buffer(BufferBinding {
+                buffer: effects_meta.dispatch_indirect_buffer.buffer().unwrap(),
+                offset: 0,
+                size: None, //NonZeroU64::new(256), // Some(GpuDispatchIndirect::min_size()),
+            }),
+        },
+        BindGroupEntry {
+            binding: 2,
+            resource: BindingResource::Buffer(BufferBinding {
+                buffer: effects_meta.spawner_buffer.buffer().unwrap(),
+                offset: 0,
+                size: None,
+            }),
+        },
+    ];
+    let dispatch_indirect_layout = if effects_meta.trail_buffer_present {
+        entries.push(BindGroupEntry {
+            binding: 3,
+            resource: BindingResource::Buffer(BufferBinding {
+                buffer: effects_meta.trail_render_dispatch_buffer.buffer().unwrap(),
+                offset: 0,
+                size: None,
+            }),
+        });
+        entries.push(BindGroupEntry {
+            binding: 4,
+            resource: BindingResource::Buffer(BufferBinding {
+                buffer: effects_meta.trail_chunk_buffer.buffer().unwrap(),
+                offset: 0,
+                size: None,
+            }),
+        });
+        &read_params
+            .dispatch_indirect_pipeline
+            .dispatch_indirect_layout_trails
+    } else {
+        &read_params
+            .dispatch_indirect_pipeline
+            .dispatch_indirect_layout_no_trails
+    };
+    effects_meta.dr_indirect_bind_group = Some(render_device.create_bind_group(
+        "hanabi:bind_group_vfx_indirect_dr_indirect",
+        dispatch_indirect_layout,
+        &entries,
     ));
 
-    // Create the bind group for the indirect render buffer use in the update shader
-    effects_meta.update_render_indirect_bind_group = Some(render_device.create_bind_group(
-        "hanabi:bind_group_update_render_dispatch",
-        &read_params.update_pipeline.render_indirect_layout,
+    // Create the bind group for the indirect render buffer use in the init shader
+    effects_meta.init_render_indirect_bind_group = Some(render_device.create_bind_group(
+        "hanabi:bind_group_init_render_dispatch",
+        &read_params.init_pipeline.render_indirect_layout,
         &[BindGroupEntry {
             binding: 0,
             resource: BindingResource::Buffer(BufferBinding {
@@ -2414,37 +2479,24 @@ pub(crate) fn queue_effects(
                 // Bind group shared by the init and update compute shaders to simulate particles.
                 let layout = buffer.particle_layout_bind_group_simulate();
                 let label = format!("hanabi:bind_group_simulate_vfx{}_particles", buffer_index);
-                let simulate = if let Some(property_binding) = buffer.properties_max_binding() {
-                    let entries = [
-                        BindGroupEntry {
-                            binding: 0,
-                            resource: buffer.max_binding(),
-                        },
-                        BindGroupEntry {
-                            binding: 1,
-                            resource: buffer.indirect_max_binding(),
-                        },
-                        BindGroupEntry {
-                            binding: 2,
-                            resource: property_binding,
-                        },
-                    ];
-                    trace!("=> create update bind group '{}' with 3 entries", label);
-                    render_device.create_bind_group(Some(&label[..]), layout, &entries)
-                } else {
-                    let entries = [
-                        BindGroupEntry {
-                            binding: 0,
-                            resource: buffer.max_binding(),
-                        },
-                        BindGroupEntry {
-                            binding: 1,
-                            resource: buffer.indirect_max_binding(),
-                        },
-                    ];
-                    trace!("=> create update bind group '{}' with 2 entries", label);
-                    render_device.create_bind_group(Some(&label[..]), layout, &entries)
-                };
+                let mut entries = vec![
+                    BindGroupEntry {
+                        binding: 0,
+                        resource: buffer.max_binding(),
+                    },
+                    BindGroupEntry {
+                        binding: 1,
+                        resource: buffer.indirect_max_binding(),
+                    },
+                ];
+                if let Some(property_binding) = buffer.properties_max_binding() {
+                    entries.push(BindGroupEntry { binding: 2, resource: property_binding });
+                }
+                if let Some(trail_max_binding) = buffer.trail_max_binding() {
+                    entries.push(BindGroupEntry { binding: 3, resource: trail_max_binding });
+                }
+                trace!("=> create update bind group '{}' with {} entries", label, entries.len());
+                let simulate = render_device.create_bind_group(Some(&label[..]), layout, &entries);
 
                 // 
                 let mut entries = vec![
@@ -2475,16 +2527,86 @@ pub(crate) fn queue_effects(
                         }),
                     });
                 }
-                trace!("Creating render bind group with {} entries (layour flags: {:?})", entries.len(), buffer.layout_flags());
+                trace!("Creating render bind group with {} entries (layout flags: {:?})", entries.len(), buffer.layout_flags());
                 let render = render_device.create_bind_group(
                     &format!("hanabi:bind_group_render_vfx{buffer_index}_particles")[..],
                      buffer.particle_layout_bind_group_with_dispatch(),
                      &entries,
                 );
 
+                // Create the bind group for the trail particles for use in the
+                // render pipeline.
+                let trail_render = buffer.trail_max_binding().map(|trail_binding| {
+                    let mut entries = vec![
+                        BindGroupEntry {
+                            binding: 0,
+                            resource: trail_binding,
+                        },
+                        BindGroupEntry {
+                            binding: 1,
+                            resource: buffer.indirect_max_binding(),
+                        },
+                        BindGroupEntry {
+                            binding: 2,
+                            resource: BindingResource::Buffer(BufferBinding {
+                                buffer: &indirect_buffer,
+                                offset: 0,
+                                size: Some(GpuDispatchIndirect::min_size()),
+                            }),
+                        },
+                    ];
+                    if buffer.layout_flags().contains(LayoutFlags::LOCAL_SPACE_SIMULATION) {
+                        entries.push(
+                            BindGroupEntry {
+                                binding: 3,
+                                resource: BindingResource::Buffer(BufferBinding {
+                                    buffer: &spawner_buffer,
+                                    offset: 0,
+                                    size: Some(GpuSpawnerParams::min_size()),
+                                }),
+                            },
+                        );
+                    }
+                    render_device.create_bind_group(
+                        &format!("hanabi:bind_group_render_vfx{buffer_index}_trail_particles")[..],
+                        buffer.particle_layout_bind_group_with_dispatch(),
+                        &entries)
+                });
+
+                // Create the bind group for the indirect render buffer use in the update shader
+                let mut bind_group_entries = vec![
+                    BindGroupEntry {
+                        binding: 0,
+                        resource: BindingResource::Buffer(BufferBinding {
+                            buffer: effects_meta.render_dispatch_buffer.buffer().unwrap(),
+                            offset: 0,
+                            size: Some(GpuRenderIndirect::min_size()),
+                        }),
+                    }
+                ];
+                if buffer.trail_max_binding().is_some() {
+                    bind_group_entries.push(
+                        BindGroupEntry {
+                            binding: 1,
+                            resource: BindingResource::Buffer(BufferBinding {
+                                buffer: effects_meta.trail_render_dispatch_buffer.buffer().unwrap(),
+                                offset: 0,
+                                size: Some(GpuTrailRenderIndirect::min_size()),
+                            })
+                        }
+                    );
+                }
+                let update_render_indirect = render_device.create_bind_group(
+                    "hanabi:bind_group_update_render_dispatch",
+                    buffer.update_render_indirect_layout_bind_group(),
+                    &bind_group_entries,
+                );
+
                 BufferBindGroups {
                     simulate,
                     render,
+                    trail_render,
+                    update_render_indirect,
                 }
             });
     }
@@ -2665,6 +2787,36 @@ fn draw<'w>(
     entity: Entity,
     pipeline_id: CachedRenderPipelineId,
     params: &mut DrawEffectsSystemState,
+) {
+    // Draw particle heads, then particle trails.
+    draw_one_pass(
+        world,
+        pass,
+        view,
+        entity,
+        pipeline_id,
+        params,
+        /*drawing_trails=*/ false,
+    );
+    draw_one_pass(
+        world,
+        pass,
+        view,
+        entity,
+        pipeline_id,
+        params,
+        /*drawing_trails=*/ true,
+    );
+}
+
+fn draw_one_pass<'w>(
+    world: &'w World,
+    pass: &mut TrackedRenderPass<'w>,
+    view: Entity,
+    entity: Entity,
+    pipeline_id: CachedRenderPipelineId,
+    params: &mut DrawEffectsSystemState,
+    drawing_trails: bool,
 ) {
     let (effects_meta, effect_bind_groups, pipeline_cache, views, effects) = params.get(world);
     let view_uniform = views.get(view).unwrap();
@@ -2678,6 +2830,18 @@ fn draw<'w>(
         return;
     };
 
+    let bind_group = if drawing_trails {
+        let Some(bind_group) = effect_bind_groups.trail_particle_render(effect_batch.buffer_index)
+        else {
+            return;
+        };
+        bind_group
+    } else {
+        effect_bind_groups
+            .particle_render(effect_batch.buffer_index)
+            .unwrap()
+    };
+
     trace!("render pass");
 
     pass.set_render_pipeline(pipeline);
@@ -2711,13 +2875,7 @@ fn draw<'w>(
     } else {
         &dyn_uniform_indices[..1]
     };
-    pass.set_bind_group(
-        1,
-        effect_bind_groups
-            .particle_render(effect_batch.buffer_index)
-            .unwrap(),
-        dyn_uniform_indices,
-    );
+    pass.set_bind_group(1, bind_group, dyn_uniform_indices);
 
     // Particle texture
     if effect_batch
@@ -2740,16 +2898,24 @@ fn draw<'w>(
         }
     }
 
-    let render_indirect_buffer = effects_meta.render_dispatch_buffer.buffer().unwrap();
+    let (render_indirect_buffer, render_indirect_offset);
+    if drawing_trails {
+        render_indirect_buffer = effects_meta.trail_render_dispatch_buffer.buffer().unwrap();
+        render_indirect_offset = gpu_limits.trail_render_indirect_offset(effect_batch.buffer_index);
+    } else {
+        render_indirect_buffer = effects_meta.render_dispatch_buffer.buffer().unwrap();
+        render_indirect_offset = gpu_limits.render_indirect_offset(effect_batch.buffer_index);
+    }
 
-    let render_indirect_offset = gpu_limits.render_indirect_offset(effect_batch.buffer_index);
     trace!(
-        "Draw {} particles with {} vertices per particle for batch from buffer #{} (render_indirect_offset={}).",
+        "Draw {} {}particles with {} vertices per particle for batch from buffer #{} (render_indirect_offset={}).",
         effect_batch.slice.len(),
+        if drawing_trails { "trail " } else { "" },
         effects_meta.vertices.len(),
         effect_batch.buffer_index,
         render_indirect_offset
     );
+
     pass.draw_indirect(render_indirect_buffer, render_indirect_offset);
 }
 
@@ -2925,58 +3091,19 @@ impl Node for VfxSimulateNode {
                         pipeline_cache.get_compute_pipeline(batch.init_pipeline_id)
                     {
                         // Do not dispatch any init work if there's nothing to spawn this frame
-                        let spawn_count = batch.spawn_count;
-                        if spawn_count == 0 {
+                        if batch.spawn_count == 0 {
                             continue;
                         }
 
-                        const WORKGROUP_SIZE: u32 = 64;
-                        let workgroup_count = (spawn_count + WORKGROUP_SIZE - 1) / WORKGROUP_SIZE;
-
-                        // for (effect_entity, effect_slice) in effects_meta.entity_map.iter() {
-                        // Retrieve the ExtractedEffect from the entity
-                        // trace!("effect_entity={:?} effect_slice={:?}", effect_entity,
-                        // effect_slice); let effect =
-                        // self.effect_query.get_manual(world, *effect_entity).unwrap();
+                        let particle_buffer_info =
+                            ParticleBufferInfo::new(batch, effects_meta, num_batches, "init");
 
-                        // Get the slice to init
-                        // let effect_slice = effects_meta.get(&effect_entity);
-                        // let effect_group =
-                        //     &effects_meta.effect_cache.buffers()[batch.buffer_index as usize];
                         let Some(particles_bind_group) =
                             effect_bind_groups.particle_simulate(batch.buffer_index)
                         else {
                             continue;
                         };
 
-                        let item_size = batch.particle_layout.min_binding_size();
-                        let item_count = batch.slice.end - batch.slice.start;
-
-                        let spawner_base = batch.spawner_base;
-                        let buffer_offset = batch.slice.start;
-
-                        let spawner_buffer_aligned = effects_meta.spawner_buffer.aligned_size();
-                        assert!(
-                            spawner_buffer_aligned >= GpuSpawnerParams::min_size().get() as usize
-                        );
-                        let spawner_offset = spawner_base * spawner_buffer_aligned as u32;
-
-                        let render_indirect_offset = batch.buffer_index
-                            * effects_meta.render_dispatch_buffer.aligned_size() as u32;
-
-                        trace!(
-                            "record commands for init pipeline of effect {:?} ({} items / {}B/item) (spawn {} = {} workgroups) spawner_base={} spawner_offset={} buffer_offset={} render_indirect_offset={}...",
-                            batch.handle,
-                            item_count,
-                            item_size,
-                            spawn_count,
-                            workgroup_count,
-                            spawner_base,
-                            spawner_offset,
-                            buffer_offset,
-                            render_indirect_offset,
-                        );
-
                         // Setup compute pass
                         // compute_pass.set_pipeline(&effect_group.init_pipeline);
                         compute_pass.set_pipeline(init_pipeline);
@@ -2985,16 +3112,23 @@ impl Node for VfxSimulateNode {
                             effects_meta.sim_params_bind_group.as_ref().unwrap(),
                             &[],
                         );
-                        compute_pass.set_bind_group(
-                            1,
-                            particles_bind_group,
-                            &[buffer_offset, buffer_offset], /* FIXME: probably in bytes, so
-                                                              * probably wrong! */
-                        );
+
+                        let mut entries = vec![
+                            particle_buffer_info.buffer_offset,
+                            particle_buffer_info.buffer_offset,
+                        ];
+                        if batch
+                            .layout_flags
+                            .contains(LayoutFlags::TRAILS_BUFFER_PRESENT)
+                        {
+                            entries.push(particle_buffer_info.buffer_offset);
+                        }
+                        compute_pass.set_bind_group(1, particles_bind_group, &entries);
+
                         compute_pass.set_bind_group(
                             2,
                             effects_meta.spawner_bind_group.as_ref().unwrap(),
-                            &[spawner_offset],
+                            &[particle_buffer_info.spawner_offset],
                         );
                         compute_pass.set_bind_group(
                             3,
@@ -3002,9 +3136,13 @@ impl Node for VfxSimulateNode {
                                 .init_render_indirect_bind_group
                                 .as_ref()
                                 .unwrap(),
-                            &[render_indirect_offset],
+                            &[particle_buffer_info.render_indirect_offset],
+                        );
+                        compute_pass.dispatch_workgroups(
+                            particle_buffer_info.workgroup_count,
+                            1,
+                            1,
                         );
-                        compute_pass.dispatch_workgroups(workgroup_count, 1, 1);
                         trace!("init compute dispatched");
                     }
                 }
@@ -3087,31 +3225,14 @@ impl Node for VfxSimulateNode {
                         continue;
                     };
 
-                    let item_size = batch.particle_layout.size();
-                    let item_count = batch.slice.end - batch.slice.start;
-
-                    let spawner_base = batch.spawner_base;
-                    let buffer_offset = batch.slice.start;
-
-                    let spawner_buffer_aligned = effects_meta.spawner_buffer.aligned_size();
-                    assert!(spawner_buffer_aligned >= GpuSpawnerParams::min_size().get() as usize);
-
-                    let dispatch_indirect_offset = batch.buffer_index as u64
-                        * effects_meta.dispatch_indirect_buffer.aligned_size() as u64;
-
-                    let render_indirect_offset = batch.buffer_index
-                        * effects_meta.render_dispatch_buffer.aligned_size() as u32;
+                    let Some(update_render_indirect_bind_group) =
+                        effect_bind_groups.update_render_indirect(batch.buffer_index)
+                    else {
+                        continue;
+                    };
 
-                    trace!(
-                        "record commands for update pipeline of effect {:?} ({} items / {}B/item) spawner_base={} buffer_offset={} dispatch_indirect_offset={} render_indirect_offset={}...",
-                        batch.handle,
-                        item_count,
-                        item_size,
-                        spawner_base,
-                        buffer_offset,
-                        dispatch_indirect_offset,
-                        render_indirect_offset
-                    );
+                    let particle_buffer_info =
+                        ParticleBufferInfo::new(batch, effects_meta, num_batches, "update");
 
                     // Setup compute pass
                     // compute_pass.set_pipeline(&effect_group.update_pipeline);
@@ -3121,33 +3242,49 @@ impl Node for VfxSimulateNode {
                         effects_meta.sim_params_bind_group.as_ref().unwrap(),
                         &[],
                     );
-                    compute_pass.set_bind_group(
-                        1,
-                        particles_bind_group,
-                        &[buffer_offset, buffer_offset], /* FIXME: probably in bytes, so
-                                                          * probably wrong! */
-                    );
+
+                    // FIXME: Probably in bytes, so probably wrong!
+                    let mut buffer_indirect_offsets = vec![
+                        particle_buffer_info.buffer_offset,
+                        particle_buffer_info.buffer_offset,
+                    ];
+                    if batch
+                        .layout_flags
+                        .contains(LayoutFlags::TRAILS_BUFFER_PRESENT)
+                    {
+                        buffer_indirect_offsets.push(particle_buffer_info.buffer_offset);
+                    }
+                    compute_pass.set_bind_group(1, particles_bind_group, &buffer_indirect_offsets);
+
                     compute_pass.set_bind_group(
                         2,
                         effects_meta.spawner_bind_group.as_ref().unwrap(),
-                        &[spawner_base * spawner_buffer_aligned as u32],
+                        &[particle_buffer_info.spawner_base
+                            * particle_buffer_info.spawner_buffer_aligned as u32],
                     );
+
+                    let mut buffer_indirect_offsets =
+                        vec![particle_buffer_info.render_indirect_offset];
+                    if effects_meta.trail_buffer_present {
+                        buffer_indirect_offsets
+                            .push(particle_buffer_info.trail_render_indirect_offset);
+                    }
                     compute_pass.set_bind_group(
                         3,
-                        effects_meta
-                            .update_render_indirect_bind_group
-                            .as_ref()
-                            .unwrap(),
-                        &[render_indirect_offset],
+                        update_render_indirect_bind_group,
+                        &buffer_indirect_offsets,
                     );
 
                     if let Some(buffer) = effects_meta.dispatch_indirect_buffer.buffer() {
                         trace!(
                             "dispatch_workgroups_indirect: buffer={:?} offset={}",
                             buffer,
-                            dispatch_indirect_offset
+                            particle_buffer_info.dispatch_indirect_offset
+                        );
+                        compute_pass.dispatch_workgroups_indirect(
+                            buffer,
+                            particle_buffer_info.dispatch_indirect_offset,
                         );
-                        compute_pass.dispatch_workgroups_indirect(buffer, dispatch_indirect_offset);
                         // TODO - offset
                     }
 
@@ -3160,6 +3297,313 @@ impl Node for VfxSimulateNode {
     }
 }
 
+struct ParticleBufferInfo {
+    workgroup_count: u32,
+    spawner_base: u32,
+    buffer_offset: u32,
+    spawner_buffer_aligned: usize,
+    spawner_offset: u32,
+    dispatch_indirect_offset: u64,
+    render_indirect_offset: u32,
+    trail_render_indirect_offset: u32,
+}
+
+impl ParticleBufferInfo {
+    fn new(
+        batch: &EffectBatch,
+        effects_meta: &EffectsMeta,
+        num_batches: u32,
+        pipeline_name: &str,
+    ) -> ParticleBufferInfo {
+        const WORKGROUP_SIZE: u32 = 64;
+
+        let spawn_count = batch.spawn_count;
+
+        let workgroup_count = (num_batches + WORKGROUP_SIZE - 1) / WORKGROUP_SIZE;
+
+        let item_size = batch.particle_layout.min_binding_size();
+        let item_count = batch.slice.end - batch.slice.start;
+
+        let spawner_base = batch.spawner_base;
+        let buffer_offset = batch.slice.start;
+
+        let spawner_buffer_aligned = effects_meta.spawner_buffer.aligned_size();
+        assert!(spawner_buffer_aligned >= GpuSpawnerParams::min_size().get() as usize);
+        let spawner_offset = spawner_base * spawner_buffer_aligned as u32;
+
+        let dispatch_indirect_offset =
+            batch.buffer_index as u64 * effects_meta.dispatch_indirect_buffer.aligned_size() as u64;
+
+        let render_indirect_offset =
+            batch.buffer_index * effects_meta.render_dispatch_buffer.aligned_size() as u32;
+
+        let trail_render_indirect_offset =
+            batch.buffer_index * effects_meta.trail_render_dispatch_buffer.aligned_size() as u32;
+
+        trace!(
+            "record commands for {} of effect {:?} ({} items / {}B/item) \
+(spawn {} = {} workgroups) spawner_base={} spawner_offset={} buffer_offset={} \
+dispatch_indirect_offset={} render_indirect_offset={} trail_render_indirect_offset={}...",
+            pipeline_name,
+            batch.handle,
+            item_count,
+            item_size,
+            spawn_count,
+            workgroup_count,
+            spawner_base,
+            spawner_offset,
+            buffer_offset,
+            dispatch_indirect_offset,
+            render_indirect_offset,
+            trail_render_indirect_offset,
+        );
+
+        ParticleBufferInfo {
+            workgroup_count,
+            spawner_base,
+            buffer_offset,
+            spawner_buffer_aligned,
+            spawner_offset,
+            dispatch_indirect_offset,
+            render_indirect_offset,
+            trail_render_indirect_offset,
+        }
+    }
+}
+
+fn particle_bind_group_layout_entries(
+    particle_min_binding_size: NonZeroU64,
+    property_min_binding_size: Option<NonZeroU64>,
+    trail_buffer_present: bool,
+) -> Vec<BindGroupLayoutEntry> {
+    let mut entries = vec![
+        BindGroupLayoutEntry {
+            binding: 0,
+            visibility: ShaderStages::COMPUTE,
+            ty: BindingType::Buffer {
+                ty: BufferBindingType::Storage { read_only: false },
+                has_dynamic_offset: true,
+                min_binding_size: Some(particle_min_binding_size),
+            },
+            count: None,
+        },
+        BindGroupLayoutEntry {
+            binding: 1,
+            visibility: ShaderStages::COMPUTE,
+            ty: BindingType::Buffer {
+                ty: BufferBindingType::Storage { read_only: false },
+                has_dynamic_offset: true,
+                min_binding_size: BufferSize::new(12),
+            },
+            count: None,
+        },
+    ];
+
+    if let Some(property_min_binding_size) = property_min_binding_size {
+        entries.push(BindGroupLayoutEntry {
+            binding: 2,
+            visibility: ShaderStages::COMPUTE,
+            ty: BindingType::Buffer {
+                ty: BufferBindingType::Storage { read_only: true },
+                has_dynamic_offset: false, // TODO
+                min_binding_size: Some(property_min_binding_size),
+            },
+            count: None,
+        });
+    }
+
+    if trail_buffer_present {
+        entries.push(BindGroupLayoutEntry {
+            binding: 3,
+            visibility: ShaderStages::COMPUTE,
+            ty: BindingType::Buffer {
+                ty: BufferBindingType::Storage { read_only: false },
+                has_dynamic_offset: true,
+                min_binding_size: Some(particle_min_binding_size),
+            },
+            count: None,
+        });
+    }
+
+    entries
+}
+
+// Creates one of the compute pipelines to run the `vfx_indirect` dispatch
+// workgroup calculation shader.
+//
+// This is called twice: once for the compute pipeline without trails and once
+// for the compute pipeline with trails.
+fn create_dispatch_indirect_pipeline(
+    world: &mut World,
+    trails: bool,
+) -> (BindGroupLayout, ComputePipeline) {
+    let world = world.cell();
+    let render_device = world.get_resource::<RenderDevice>().unwrap();
+
+    // The GpuSpawnerParams is bound as an array element or as a standalone struct,
+    // so needs the proper align. Because WGSL removed the @stride attribute, we pad
+    // the WGSL type manually, so need to enforce min_binding_size everywhere.
+    let item_align = render_device.limits().min_storage_buffer_offset_alignment as usize;
+    let spawner_aligned_size = GpuSpawnerParams::aligned_size(item_align);
+    trace!(
+        "Aligning spawner params to {} bytes as device limits requires. Size: {} bytes.",
+        item_align,
+        spawner_aligned_size
+    );
+
+    trace!(
+        "GpuRenderIndirect: min_size={} | GpuDispatchIndirect: min_size={}",
+        GpuRenderIndirect::min_size(),
+        GpuDispatchIndirect::min_size()
+    );
+    let mut entries = vec![
+        BindGroupLayoutEntry {
+            binding: 0,
+            visibility: ShaderStages::COMPUTE,
+            ty: BindingType::Buffer {
+                ty: BufferBindingType::Storage { read_only: false },
+                has_dynamic_offset: false,
+                min_binding_size: Some(GpuRenderIndirect::min_size()),
+            },
+            count: None,
+        },
+        BindGroupLayoutEntry {
+            binding: 1,
+            visibility: ShaderStages::COMPUTE,
+            ty: BindingType::Buffer {
+                ty: BufferBindingType::Storage { read_only: false },
+                has_dynamic_offset: false,
+                min_binding_size: Some(GpuDispatchIndirect::min_size()),
+            },
+            count: None,
+        },
+        BindGroupLayoutEntry {
+            binding: 2,
+            visibility: ShaderStages::COMPUTE,
+            ty: BindingType::Buffer {
+                ty: BufferBindingType::Storage { read_only: true },
+                has_dynamic_offset: false,
+                min_binding_size: Some(NonZeroU64::new(spawner_aligned_size as u64).unwrap()),
+            },
+            count: None,
+        },
+    ];
+    if trails {
+        entries.push(BindGroupLayoutEntry {
+            binding: 3,
+            visibility: ShaderStages::COMPUTE,
+            ty: BindingType::Buffer {
+                ty: BufferBindingType::Storage { read_only: false },
+                has_dynamic_offset: false,
+                min_binding_size: Some(GpuTrailRenderIndirect::min_size()),
+            },
+            count: None,
+        });
+        entries.push(BindGroupLayoutEntry {
+            binding: 4,
+            visibility: ShaderStages::COMPUTE,
+            ty: BindingType::Buffer {
+                ty: BufferBindingType::Storage { read_only: false },
+                has_dynamic_offset: false,
+                min_binding_size: Some(
+                    u32::min_size().max((item_align as u64).try_into().unwrap()),
+                ),
+            },
+            count: None,
+        });
+    }
+    let dispatch_indirect_layout = render_device.create_bind_group_layout(
+        &*format!(
+            "hanabi:bind_group_layout:dispatch_indirect_dispatch_indirect_{}",
+            if trails { "trails" } else { "no_trails" }
+        ),
+        &entries,
+    );
+
+    trace!("GpuSimParams: min_size={}", GpuSimParams::min_size());
+    let sim_params_layout = render_device.create_bind_group_layout(
+        "hanabi:bind_group_layout:dispatch_indirect_sim_params",
+        &[BindGroupLayoutEntry {
+            binding: 0,
+            visibility: ShaderStages::COMPUTE,
+            ty: BindingType::Buffer {
+                ty: BufferBindingType::Uniform,
+                has_dynamic_offset: false,
+                min_binding_size: Some(GpuSimParams::min_size()),
+            },
+            count: None,
+        }],
+    );
+
+    let pipeline_layout = render_device.create_pipeline_layout(&PipelineLayoutDescriptor {
+        label: Some("hanabi:pipeline_layout:dispatch_indirect"),
+        bind_group_layouts: &[&dispatch_indirect_layout, &sim_params_layout],
+        push_constant_ranges: &[],
+    });
+
+    // We need to pad the Spawner WGSL struct based on the device padding so that we
+    // can use it as an array element but also has a direct struct binding.
+    let spawner_padding_code = GpuSpawnerParams::padding_code(item_align);
+    let indirect_code =
+        include_str!("vfx_indirect.wgsl").replace("{{SPAWNER_PADDING}}", &spawner_padding_code);
+
+    // Resolve imports. Because we don't insert this shader into Bevy' pipeline
+    // cache, we don't get that part "for free", so we have to do it manually here.
+    let indirect_naga_module = {
+        let mut composer = Composer::default();
+
+        // Import bevy_hanabi::vfx_common
+        {
+            let common_shader = HanabiPlugin::make_common_shader(item_align);
+            let mut desc: naga_oil::compose::ComposableModuleDescriptor<'_> =
+                (&common_shader).into();
+            desc.shader_defs.insert(
+                "SPAWNER_PADDING".to_string(),
+                naga_oil::compose::ShaderDefValue::Bool(true),
+            );
+            let res = composer.add_composable_module(desc);
+            assert!(res.is_ok());
+        }
+
+        let mut shader_defs = std::collections::HashMap::new();
+        if trails {
+            shader_defs.insert(
+                "TRAILS".to_string(),
+                naga_oil::compose::ShaderDefValue::Bool(true),
+            );
+        }
+
+        match composer.make_naga_module(NagaModuleDescriptor {
+            source: &indirect_code,
+            file_path: "vfx_indirect.wgsl",
+            shader_defs,
+            ..Default::default()
+        }) {
+            Ok(naga_module) => ShaderSource::Naga(Cow::Owned(naga_module)),
+            Err(compose_error) => panic!(
+                "Failed to compose vfx_indirect.wgsl, naga_oil returned: {}",
+                compose_error.emit_to_string(&composer)
+            ),
+        }
+    };
+
+    debug!("Create indirect dispatch shader:\n{}", indirect_code);
+
+    let shader_module = render_device.create_shader_module(ShaderModuleDescriptor {
+        label: Some("hanabi:vfx_indirect_shader"),
+        source: indirect_naga_module,
+    });
+
+    let pipeline = render_device.create_compute_pipeline(&RawComputePipelineDescriptor {
+        label: Some("hanabi:compute_pipeline:dispatch_indirect"),
+        layout: Some(&pipeline_layout),
+        module: &shader_module,
+        entry_point: "main",
+    });
+
+    (dispatch_indirect_layout, pipeline)
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;
diff --git a/src/render/vfx_common.wgsl b/src/render/vfx_common.wgsl
index f9e789c2..04b56fcc 100644
--- a/src/render/vfx_common.wgsl
+++ b/src/render/vfx_common.wgsl
@@ -14,6 +14,9 @@ struct SimParams {
     /// Stride in bytes of the DispatchIndirect struct. Used to calculate
     /// the position of each effect's data into the buffer of a batch.
     dispatch_stride: u32,
+    /// Stride in bytes of the TrailRenderIndirect struct. Used to calculate
+    /// the position of each effect's data into the buffer of a batch.
+    trail_render_stride: u32,
 //#endif
 }
 
@@ -24,6 +27,10 @@ struct Spawner {
     seed: u32,
     count: atomic<i32>,
     effect_index: u32,
+    spawn_trail_particle: u32,
+    trail_capacity: u32,
+    trail_head_chunk: u32,
+    trail_tail_chunk: u32,
 #ifdef SPAWNER_PADDING
     {{SPAWNER_PADDING}}
 #endif
@@ -109,6 +116,18 @@ struct RenderIndirect {
     max_update: u32,
 }
 
+const TRI_OFFSET_VERTEX_COUNT: u32 = 0u;
+const TRI_OFFSET_INSTANCE_COUNT: u32 = 1u;
+const TRI_OFFSET_BASE_INDEX: u32 = 2u;
+const TRI_OFFSET_BASE_INSTANCE: u32 = 3u;
+
+struct TrailRenderIndirect {
+    vertex_count: u32,
+    instance_count: atomic<u32>,
+    base_index: u32,
+    base_instance: u32,
+}
+
 var<private> seed : u32 = 0u;
 
 const tau: f32 = 6.283185307179586476925286766559;
diff --git a/src/render/vfx_indirect.wgsl b/src/render/vfx_indirect.wgsl
index 3e2c5653..421c13e5 100644
--- a/src/render/vfx_indirect.wgsl
+++ b/src/render/vfx_indirect.wgsl
@@ -2,7 +2,8 @@
     SimParams, Spawner,
     DI_OFFSET_X, DI_OFFSET_PONG,
     RI_OFFSET_ALIVE_COUNT, RI_OFFSET_MAX_UPDATE, RI_OFFSET_DEAD_COUNT,
-    RI_OFFSET_MAX_SPAWN, RI_OFFSET_INSTANCE_COUNT, RI_OFFSET_PING
+    RI_OFFSET_MAX_SPAWN, RI_OFFSET_INSTANCE_COUNT, RI_OFFSET_PING,
+    TRI_OFFSET_BASE_INSTANCE, TRI_OFFSET_INSTANCE_COUNT, TRI_OFFSET_TRAIL_INDICES
 }
 
 struct SpawnerBuffer {
@@ -12,6 +13,10 @@ struct SpawnerBuffer {
 @group(0) @binding(0) var<storage, read_write> render_indirect_buffer : array<u32>;
 @group(0) @binding(1) var<storage, read_write> dispatch_indirect_buffer : array<u32>;
 @group(0) @binding(2) var<storage, read> spawner_buffer : SpawnerBuffer;
+#ifdef TRAILS
+@group(0) @binding(3) var<storage, read_write> trail_render_indirect_buffer : array<u32>;
+@group(0) @binding(4) var<storage, read_write> trail_chunk_buffer : array<u32>;
+#endif
 @group(1) @binding(0) var<uniform> sim_params : SimParams;
 
 /// Calculate the indirect workgroups counts based on the number of particles alive.
@@ -36,6 +41,7 @@ fn main(@builtin(global_invocation_id) global_invocation_id: vec3<u32>) {
     // dispatch indirect arrays.
     let ri_base = sim_params.render_stride * effect_index / 4u;
     let di_base = sim_params.dispatch_stride * effect_index / 4u;
+    let tri_base = sim_params.trail_render_stride * effect_index / 4u;
 
     // Calculate the number of thread groups to dispatch for the update pass, which is
     // the number of alive particles rounded up to 64 (workgroup_size).
@@ -58,6 +64,27 @@ fn main(@builtin(global_invocation_id) global_invocation_id: vec3<u32>) {
     // with the particles actually alive at the end of their update (after aged).
     render_indirect_buffer[ri_base + RI_OFFSET_INSTANCE_COUNT] = 0u;
 
+#ifdef TRAILS
+    // If needed, spawn a trail particle.
+    if (spawner_buffer.spawners[index].spawn_trail_particle != 0) {
+        // Get the trail head and tail chunks.
+        let head_chunk_index = spawner_buffer.spawners[index].trail_head_chunk;
+        let tail_chunk_index = spawner_buffer.spawners[index].trail_tail_chunk;
+
+        // The previous tick's final instance becomes the current head.
+        let last_base_instance = trail_render_indirect_buffer[tri_base + TRI_OFFSET_BASE_INSTANCE];
+        let last_instance_count = trail_render_indirect_buffer[tri_base + TRI_OFFSET_INSTANCE_COUNT];
+        let last_instance_index = last_base_instance + last_instance_count;
+        trail_chunk_buffer[head_chunk_index] = last_instance_index;
+
+        // Calculate the new base count and instance index.
+        let tail_trail_index = trail_chunk_buffer[tail_chunk_index];
+        trail_render_indirect_buffer[tri_base + TRI_OFFSET_BASE_INSTANCE] = u32(tail_trail_index);
+        trail_render_indirect_buffer[tri_base + TRI_OFFSET_INSTANCE_COUNT] =
+            last_instance_index - u32(tail_trail_index);
+    }
+#endif
+
     // Swap ping/pong buffers
     let ping = render_indirect_buffer[ri_base + RI_OFFSET_PING];
     let pong = 1u - ping;
diff --git a/src/render/vfx_render.wgsl b/src/render/vfx_render.wgsl
index f593bb92..dd42b238 100644
--- a/src/render/vfx_render.wgsl
+++ b/src/render/vfx_render.wgsl
@@ -1,6 +1,6 @@
 #import bevy_render::view::View
 #import bevy_hanabi::vfx_common::{
-    DispatchIndirect, IndirectBuffer, SimParams, Spawner,
+    DispatchIndirect, IndirectBuffer, RenderIndirect, SimParams, Spawner,
     seed, tau, pcg_hash, to_float01, frand, frand2, frand3, frand4,
     rand_uniform, proj
 }
@@ -117,9 +117,18 @@ fn vertex(
     // @location(1) vertex_color: u32,
     // @location(1) vertex_velocity: vec3<f32>,
 ) -> VertexOutput {
+    // Is this a head particle or a trail particle?
+#ifdef TRAILS
+    // Trail particle.
+    let particle_index = instance_index % arrayLength(&particle_buffer.particles);
+    let particle = particle_buffer.particles[particle_index];
+#else
+    // Head particle.
     let pong = dispatch_indirect.pong;
     let index = indirect_buffer.indices[3u * instance_index + pong];
-    var particle = particle_buffer.particles[index];
+    let particle = particle_buffer.particles[index];
+#endif
+
     var out: VertexOutput;
 #ifdef PARTICLE_TEXTURE
     var uv = vertex_uv;
diff --git a/src/render/vfx_update.wgsl b/src/render/vfx_update.wgsl
index e24db8f2..8469d0fc 100644
--- a/src/render/vfx_update.wgsl
+++ b/src/render/vfx_update.wgsl
@@ -1,5 +1,5 @@
 #import bevy_hanabi::vfx_common::{
-    IndirectBuffer, RenderIndirect, SimParams, Spawner,
+    IndirectBuffer, RenderIndirect, TrailRenderIndirect, SimParams, Spawner,
     seed, tau, pcg_hash, to_float01, frand, frand2, frand3, frand4,
     rand_uniform, proj
 }
@@ -18,8 +18,10 @@ struct ParticleBuffer {
 @group(1) @binding(0) var<storage, read_write> particle_buffer : ParticleBuffer;
 @group(1) @binding(1) var<storage, read_write> indirect_buffer : IndirectBuffer;
 {{PROPERTIES_BINDING}}
+{{TRAIL_BINDING}}
 @group(2) @binding(0) var<storage, read_write> spawner : Spawner; // NOTE - same group as init
 @group(3) @binding(0) var<storage, read_write> render_indirect : RenderIndirect;
+{{TRAIL_RENDER_INDIRECT_BINDING}}
 
 {{UPDATE_EXTRA}}
 
@@ -66,5 +68,20 @@ fn main(@builtin(global_invocation_id) global_invocation_id: vec3<u32>) {
         // Increment alive particle count and write indirection index for later rendering
         let indirect_index = atomicAdd(&render_indirect.instance_count, 1u);
         indirect_buffer.indices[3u * indirect_index + ping] = index;
+
+#ifdef TRAILS
+        // If we're alive and due to spawn a trail particle, do that now.
+        //
+        // Note that we only adjust the instance count here, to reduce the
+        // number of atomic operations we need to do. *Next* frame,
+        // `vfx_indirect.wgsl` will take care of computing the value to write
+        // into the chunk buffer by examining the final instance count from
+        // *this* frame.
+        if (spawner.spawn_trail_particle != 0) {
+            let dest_index = trail_render_indirect.base_instance +
+                atomicAdd(&trail_render_indirect.instance_count, 1u);
+            trail_buffer.particles[dest_index % spawner.trail_capacity] = particle;
+        }
+#endif
     }
 }
diff --git a/src/spawn.rs b/src/spawn.rs
index f2d9c2f1..cc898f60 100644
--- a/src/spawn.rs
+++ b/src/spawn.rs
@@ -178,6 +178,13 @@ pub struct Spawner {
     /// spawner becomes active. If `false`, the spawner doesn't do anything
     /// until [`EffectSpawner::reset()`] is called.
     starts_immediately: bool,
+
+    /// The length of each trail, in particles.
+    trail_length: u32,
+
+    /// Time that elapses before a trail particle is spawned, in seconds. If
+    /// this is zero, then a trail particle is spawned every update.
+    trail_period: CpuValue<f32>,
 }
 
 impl Default for Spawner {
@@ -246,6 +253,8 @@ impl Spawner {
             period,
             starts_active: true,
             starts_immediately: true,
+            trail_period: CpuValue::Single(0.0),
+            trail_length: 0,
         }
     }
 
@@ -358,6 +367,61 @@ impl Spawner {
     pub fn starts_active(&self) -> bool {
         self.starts_active
     }
+
+    /// Sets the number of particles that make up each trail.
+    ///
+    /// The trail length is the same for all particles. This value doesn't
+    /// include the head particle.
+    pub fn set_trail_length(&mut self, trail_length: u32) {
+        self.trail_length = trail_length;
+    }
+
+    /// The number of particles that make up each trail.
+    ///
+    /// The trail length is the same for all particles. This value doesn't
+    /// include the head particle.
+    pub fn trail_length(&self) -> u32 {
+        self.trail_length
+    }
+
+    /// Sets the number of particles that make up each trail, using the builder
+    /// pattern.
+    ///
+    /// The trail length is the same for all particles. This value doesn't
+    /// include the head particle.
+    pub fn with_trail_length(mut self, trail_length: u32) -> Self {
+        self.trail_length = trail_length;
+        self
+    }
+
+    /// Sets the amount of time between emission of trail particles.
+    ///
+    /// After this much time elapses, a new trail particle is spawned.
+    ///
+    /// This value is the same for all particles.
+    pub fn set_trail_period(&mut self, trail_period: CpuValue<f32>) {
+        self.trail_period = trail_period;
+    }
+
+    /// The amount of time between emission of trail particles.
+    ///
+    /// After this much time elapses, a new trail particle is spawned.
+    ///
+    /// This value is the same for all particles.
+    pub fn trail_period(&self) -> CpuValue<f32> {
+        self.trail_period
+    }
+
+    /// Sets the amount of time between emission of trail particles, using the
+    /// builder pattern.
+    ///
+    /// After this much time elapses, a new trail particle is spawned.
+    ///
+    /// This value is the same for all particles.
+    pub fn with_trail_period(mut self, trail_period: CpuValue<f32>) -> Self {
+        self.trail_period = trail_period;
+        self
+    }
 }
 
 /// Runtime component maintaining the state of the spawner for an effect.
@@ -390,8 +454,20 @@ pub struct EffectSpawner {
     /// Fractional remainder of particle count to spawn.
     spawn_remainder: f32,
 
+    /// Accumulated time since last trail spawn.
+    trail_time: f32,
+
+    /// Sampled value of `trail_spawn_time`.
+    curr_trail_spawn_time: f32,
+
+    /// Number of times a trail particle spawn event has occurred.
+    trail_ticks: u32,
+
     /// Whether the system is active. Defaults to `true`.
     active: bool,
+
+    /// Whether we should spawn a trail particle, as calculated by last [`tick()`] call.
+    spawn_trail_particle: bool,
 }
 
 impl EffectSpawner {
@@ -412,7 +488,11 @@ impl EffectSpawner {
             limit: 0.,
             spawn_count: 0,
             spawn_remainder: 0.,
+            trail_time: 0.0,
+            curr_trail_spawn_time: 0.0,
+            trail_ticks: 0,
             active: spawner.starts_active(),
+            spawn_trail_particle: false,
         }
     }
 
@@ -521,6 +601,28 @@ impl EffectSpawner {
         self.spawn_remainder -= count;
         self.spawn_count = count as u32;
 
+        // Handle trails.
+
+        self.trail_time += dt;
+        self.spawn_trail_particle = self.trail_time >= self.curr_trail_spawn_time;
+
+        if self.spawn_trail_particle {
+            // 0 spawn time means "spawn a trail particle every frame". Handle
+            // this case.
+            if self.curr_trail_spawn_time == 0.0 {
+                self.trail_time = 0.0;
+            } else {
+                self.trail_time %= self.curr_trail_spawn_time;
+            }
+
+            self.resample_trail_spawn_time(rng);
+
+            self.trail_ticks += 1;
+            if self.trail_ticks == self.spawner.trail_length + 1 {
+                self.trail_ticks = 0;
+            }
+        }
+
         self.spawn_count
     }
 
@@ -536,11 +638,35 @@ impl EffectSpawner {
         self.spawn_count
     }
 
+    /// The total number of chunks of trail particles that have been spawned
+    /// over the life of this particle effect.
+    ///
+    /// This includes dead trail particles; thus, this number is monotonically
+    /// increasing (until it wraps around).
+    #[inline]
+    pub fn trail_tick(&self) -> u32 {
+        self.trail_ticks
+    }
+
+    /// Whether a chunk of trail particles is to be spawned this frame.
+    ///
+    /// This occurs at the set trail period.
+    #[inline]
+    pub fn spawn_trail_particle(&self) -> bool {
+        self.spawn_trail_particle
+    }
+
     /// Resamples the spawn time and period.
     fn resample(&mut self, rng: &mut Pcg32) {
         self.limit = self.spawner.period.sample(rng);
         self.curr_spawn_time = self.spawner.spawn_time.sample(rng).clamp(0.0, self.limit);
     }
+
+    /// Resamples the trail period, and recalculates the trail spawn time based
+    /// on it.
+    fn resample_trail_spawn_time(&mut self, rng: &mut Pcg32) {
+        self.curr_trail_spawn_time = self.spawner.trail_period.sample(rng);
+    }
 }
 
 /// Tick all the spawners of the visible [`ParticleEffect`] components.